From b7c961c7e52eb13005a76a0b0089a2235418d6c5 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Fri, 22 May 2026 21:03:15 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: W-61/llama3-hh-helpful-qt045-b0p8-20260429-085449 Source: Original Platform --- .gitattributes | 36 + README.md | 62 + all_results.json | 9 + config.json | 29 + generation_config.json | 9 + margin_logs/margins.jsonl | 681 ++ margin_logs/step_0000001.npy | 3 + margin_logs/step_0000002.npy | 3 + margin_logs/step_0000003.npy | 3 + margin_logs/step_0000004.npy | 3 + margin_logs/step_0000005.npy | 3 + margin_logs/step_0000006.npy | 3 + margin_logs/step_0000007.npy | 3 + margin_logs/step_0000008.npy | 3 + margin_logs/step_0000009.npy | 3 + margin_logs/step_0000010.npy | 3 + margin_logs/step_0000011.npy | 3 + margin_logs/step_0000012.npy | 3 + margin_logs/step_0000013.npy | 3 + margin_logs/step_0000014.npy | 3 + margin_logs/step_0000015.npy | 3 + margin_logs/step_0000016.npy | 3 + margin_logs/step_0000017.npy | 3 + margin_logs/step_0000018.npy | 3 + margin_logs/step_0000019.npy | 3 + margin_logs/step_0000020.npy | 3 + margin_logs/step_0000021.npy | 3 + margin_logs/step_0000022.npy | 3 + margin_logs/step_0000023.npy | 3 + margin_logs/step_0000024.npy | 3 + margin_logs/step_0000025.npy | 3 + margin_logs/step_0000026.npy | 3 + margin_logs/step_0000027.npy | 3 + margin_logs/step_0000028.npy | 3 + margin_logs/step_0000029.npy | 3 + margin_logs/step_0000030.npy | 3 + margin_logs/step_0000031.npy | 3 + margin_logs/step_0000032.npy | 3 + margin_logs/step_0000033.npy | 3 + margin_logs/step_0000034.npy | 3 + margin_logs/step_0000035.npy | 3 + margin_logs/step_0000036.npy | 3 + margin_logs/step_0000037.npy | 3 + margin_logs/step_0000038.npy | 3 + margin_logs/step_0000039.npy | 3 + margin_logs/step_0000040.npy | 3 + margin_logs/step_0000041.npy | 3 + margin_logs/step_0000042.npy | 3 + margin_logs/step_0000043.npy | 3 + margin_logs/step_0000044.npy | 3 + margin_logs/step_0000045.npy | 3 + margin_logs/step_0000046.npy | 3 + margin_logs/step_0000047.npy | 3 + margin_logs/step_0000048.npy | 3 + margin_logs/step_0000049.npy | 3 + margin_logs/step_0000050.npy | 3 + margin_logs/step_0000051.npy | 3 + margin_logs/step_0000052.npy | 3 + margin_logs/step_0000053.npy | 3 + margin_logs/step_0000054.npy | 3 + margin_logs/step_0000055.npy | 3 + margin_logs/step_0000056.npy | 3 + margin_logs/step_0000057.npy | 3 + margin_logs/step_0000058.npy | 3 + margin_logs/step_0000059.npy | 3 + margin_logs/step_0000060.npy | 3 + margin_logs/step_0000061.npy | 3 + margin_logs/step_0000062.npy | 3 + margin_logs/step_0000063.npy | 3 + margin_logs/step_0000064.npy | 3 + margin_logs/step_0000065.npy | 3 + margin_logs/step_0000066.npy | 3 + margin_logs/step_0000067.npy | 3 + margin_logs/step_0000068.npy | 3 + margin_logs/step_0000069.npy | 3 + margin_logs/step_0000070.npy | 3 + margin_logs/step_0000071.npy | 3 + margin_logs/step_0000072.npy | 3 + margin_logs/step_0000073.npy | 3 + margin_logs/step_0000074.npy | 3 + margin_logs/step_0000075.npy | 3 + margin_logs/step_0000076.npy | 3 + margin_logs/step_0000077.npy | 3 + margin_logs/step_0000078.npy | 3 + margin_logs/step_0000079.npy | 3 + margin_logs/step_0000080.npy | 3 + margin_logs/step_0000081.npy | 3 + margin_logs/step_0000082.npy | 3 + margin_logs/step_0000083.npy | 3 + margin_logs/step_0000084.npy | 3 + margin_logs/step_0000085.npy | 3 + margin_logs/step_0000086.npy | 3 + margin_logs/step_0000087.npy | 3 + margin_logs/step_0000088.npy | 3 + margin_logs/step_0000089.npy | 3 + margin_logs/step_0000090.npy | 3 + margin_logs/step_0000091.npy | 3 + margin_logs/step_0000092.npy | 3 + margin_logs/step_0000093.npy | 3 + margin_logs/step_0000094.npy | 3 + margin_logs/step_0000095.npy | 3 + margin_logs/step_0000096.npy | 3 + margin_logs/step_0000097.npy | 3 + margin_logs/step_0000098.npy | 3 + margin_logs/step_0000099.npy | 3 + margin_logs/step_0000100.npy | 3 + margin_logs/step_0000101.npy | 3 + margin_logs/step_0000102.npy | 3 + margin_logs/step_0000103.npy | 3 + margin_logs/step_0000104.npy | 3 + margin_logs/step_0000105.npy | 3 + margin_logs/step_0000106.npy | 3 + margin_logs/step_0000107.npy | 3 + margin_logs/step_0000108.npy | 3 + margin_logs/step_0000109.npy | 3 + margin_logs/step_0000110.npy | 3 + margin_logs/step_0000111.npy | 3 + margin_logs/step_0000112.npy | 3 + margin_logs/step_0000113.npy | 3 + margin_logs/step_0000114.npy | 3 + margin_logs/step_0000115.npy | 3 + margin_logs/step_0000116.npy | 3 + margin_logs/step_0000117.npy | 3 + margin_logs/step_0000118.npy | 3 + margin_logs/step_0000119.npy | 3 + margin_logs/step_0000120.npy | 3 + margin_logs/step_0000121.npy | 3 + margin_logs/step_0000122.npy | 3 + margin_logs/step_0000123.npy | 3 + margin_logs/step_0000124.npy | 3 + margin_logs/step_0000125.npy | 3 + margin_logs/step_0000126.npy | 3 + margin_logs/step_0000127.npy | 3 + margin_logs/step_0000128.npy | 3 + margin_logs/step_0000129.npy | 3 + margin_logs/step_0000130.npy | 3 + margin_logs/step_0000131.npy | 3 + margin_logs/step_0000132.npy | 3 + margin_logs/step_0000133.npy | 3 + margin_logs/step_0000134.npy | 3 + margin_logs/step_0000135.npy | 3 + margin_logs/step_0000136.npy | 3 + margin_logs/step_0000137.npy | 3 + margin_logs/step_0000138.npy | 3 + margin_logs/step_0000139.npy | 3 + margin_logs/step_0000140.npy | 3 + margin_logs/step_0000141.npy | 3 + margin_logs/step_0000142.npy | 3 + margin_logs/step_0000143.npy | 3 + margin_logs/step_0000144.npy | 3 + margin_logs/step_0000145.npy | 3 + margin_logs/step_0000146.npy | 3 + margin_logs/step_0000147.npy | 3 + margin_logs/step_0000148.npy | 3 + margin_logs/step_0000149.npy | 3 + margin_logs/step_0000150.npy | 3 + margin_logs/step_0000151.npy | 3 + margin_logs/step_0000152.npy | 3 + margin_logs/step_0000153.npy | 3 + margin_logs/step_0000154.npy | 3 + margin_logs/step_0000155.npy | 3 + margin_logs/step_0000156.npy | 3 + margin_logs/step_0000157.npy | 3 + margin_logs/step_0000158.npy | 3 + margin_logs/step_0000159.npy | 3 + margin_logs/step_0000160.npy | 3 + margin_logs/step_0000161.npy | 3 + margin_logs/step_0000162.npy | 3 + margin_logs/step_0000163.npy | 3 + margin_logs/step_0000164.npy | 3 + margin_logs/step_0000165.npy | 3 + margin_logs/step_0000166.npy | 3 + margin_logs/step_0000167.npy | 3 + margin_logs/step_0000168.npy | 3 + margin_logs/step_0000169.npy | 3 + margin_logs/step_0000170.npy | 3 + margin_logs/step_0000171.npy | 3 + margin_logs/step_0000172.npy | 3 + margin_logs/step_0000173.npy | 3 + margin_logs/step_0000174.npy | 3 + margin_logs/step_0000175.npy | 3 + margin_logs/step_0000176.npy | 3 + margin_logs/step_0000177.npy | 3 + margin_logs/step_0000178.npy | 3 + margin_logs/step_0000179.npy | 3 + margin_logs/step_0000180.npy | 3 + margin_logs/step_0000181.npy | 3 + margin_logs/step_0000182.npy | 3 + margin_logs/step_0000183.npy | 3 + margin_logs/step_0000184.npy | 3 + margin_logs/step_0000185.npy | 3 + margin_logs/step_0000186.npy | 3 + margin_logs/step_0000187.npy | 3 + margin_logs/step_0000188.npy | 3 + margin_logs/step_0000189.npy | 3 + margin_logs/step_0000190.npy | 3 + margin_logs/step_0000191.npy | 3 + margin_logs/step_0000192.npy | 3 + margin_logs/step_0000193.npy | 3 + margin_logs/step_0000194.npy | 3 + margin_logs/step_0000195.npy | 3 + margin_logs/step_0000196.npy | 3 + margin_logs/step_0000197.npy | 3 + margin_logs/step_0000198.npy | 3 + margin_logs/step_0000199.npy | 3 + margin_logs/step_0000200.npy | 3 + margin_logs/step_0000201.npy | 3 + margin_logs/step_0000202.npy | 3 + margin_logs/step_0000203.npy | 3 + margin_logs/step_0000204.npy | 3 + margin_logs/step_0000205.npy | 3 + margin_logs/step_0000206.npy | 3 + margin_logs/step_0000207.npy | 3 + margin_logs/step_0000208.npy | 3 + margin_logs/step_0000209.npy | 3 + margin_logs/step_0000210.npy | 3 + margin_logs/step_0000211.npy | 3 + margin_logs/step_0000212.npy | 3 + margin_logs/step_0000213.npy | 3 + margin_logs/step_0000214.npy | 3 + margin_logs/step_0000215.npy | 3 + margin_logs/step_0000216.npy | 3 + margin_logs/step_0000217.npy | 3 + margin_logs/step_0000218.npy | 3 + margin_logs/step_0000219.npy | 3 + margin_logs/step_0000220.npy | 3 + margin_logs/step_0000221.npy | 3 + margin_logs/step_0000222.npy | 3 + margin_logs/step_0000223.npy | 3 + margin_logs/step_0000224.npy | 3 + margin_logs/step_0000225.npy | 3 + margin_logs/step_0000226.npy | 3 + margin_logs/step_0000227.npy | 3 + margin_logs/step_0000228.npy | 3 + margin_logs/step_0000229.npy | 3 + margin_logs/step_0000230.npy | 3 + margin_logs/step_0000231.npy | 3 + margin_logs/step_0000232.npy | 3 + margin_logs/step_0000233.npy | 3 + margin_logs/step_0000234.npy | 3 + margin_logs/step_0000235.npy | 3 + margin_logs/step_0000236.npy | 3 + margin_logs/step_0000237.npy | 3 + margin_logs/step_0000238.npy | 3 + margin_logs/step_0000239.npy | 3 + margin_logs/step_0000240.npy | 3 + margin_logs/step_0000241.npy | 3 + margin_logs/step_0000242.npy | 3 + margin_logs/step_0000243.npy | 3 + margin_logs/step_0000244.npy | 3 + margin_logs/step_0000245.npy | 3 + margin_logs/step_0000246.npy | 3 + margin_logs/step_0000247.npy | 3 + margin_logs/step_0000248.npy | 3 + margin_logs/step_0000249.npy | 3 + margin_logs/step_0000250.npy | 3 + margin_logs/step_0000251.npy | 3 + margin_logs/step_0000252.npy | 3 + margin_logs/step_0000253.npy | 3 + margin_logs/step_0000254.npy | 3 + margin_logs/step_0000255.npy | 3 + margin_logs/step_0000256.npy | 3 + margin_logs/step_0000257.npy | 3 + margin_logs/step_0000258.npy | 3 + margin_logs/step_0000259.npy | 3 + margin_logs/step_0000260.npy | 3 + margin_logs/step_0000261.npy | 3 + margin_logs/step_0000262.npy | 3 + margin_logs/step_0000263.npy | 3 + margin_logs/step_0000264.npy | 3 + margin_logs/step_0000265.npy | 3 + margin_logs/step_0000266.npy | 3 + margin_logs/step_0000267.npy | 3 + margin_logs/step_0000268.npy | 3 + margin_logs/step_0000269.npy | 3 + margin_logs/step_0000270.npy | 3 + margin_logs/step_0000271.npy | 3 + margin_logs/step_0000272.npy | 3 + margin_logs/step_0000273.npy | 3 + margin_logs/step_0000274.npy | 3 + margin_logs/step_0000275.npy | 3 + margin_logs/step_0000276.npy | 3 + margin_logs/step_0000277.npy | 3 + margin_logs/step_0000278.npy | 3 + margin_logs/step_0000279.npy | 3 + margin_logs/step_0000280.npy | 3 + margin_logs/step_0000281.npy | 3 + margin_logs/step_0000282.npy | 3 + margin_logs/step_0000283.npy | 3 + margin_logs/step_0000284.npy | 3 + margin_logs/step_0000285.npy | 3 + margin_logs/step_0000286.npy | 3 + margin_logs/step_0000287.npy | 3 + margin_logs/step_0000288.npy | 3 + margin_logs/step_0000289.npy | 3 + margin_logs/step_0000290.npy | 3 + margin_logs/step_0000291.npy | 3 + margin_logs/step_0000292.npy | 3 + margin_logs/step_0000293.npy | 3 + margin_logs/step_0000294.npy | 3 + margin_logs/step_0000295.npy | 3 + margin_logs/step_0000296.npy | 3 + margin_logs/step_0000297.npy | 3 + margin_logs/step_0000298.npy | 3 + margin_logs/step_0000299.npy | 3 + margin_logs/step_0000300.npy | 3 + margin_logs/step_0000301.npy | 3 + margin_logs/step_0000302.npy | 3 + margin_logs/step_0000303.npy | 3 + margin_logs/step_0000304.npy | 3 + margin_logs/step_0000305.npy | 3 + margin_logs/step_0000306.npy | 3 + margin_logs/step_0000307.npy | 3 + margin_logs/step_0000308.npy | 3 + margin_logs/step_0000309.npy | 3 + margin_logs/step_0000310.npy | 3 + margin_logs/step_0000311.npy | 3 + margin_logs/step_0000312.npy | 3 + margin_logs/step_0000313.npy | 3 + margin_logs/step_0000314.npy | 3 + margin_logs/step_0000315.npy | 3 + margin_logs/step_0000316.npy | 3 + margin_logs/step_0000317.npy | 3 + margin_logs/step_0000318.npy | 3 + margin_logs/step_0000319.npy | 3 + margin_logs/step_0000320.npy | 3 + margin_logs/step_0000321.npy | 3 + margin_logs/step_0000322.npy | 3 + margin_logs/step_0000323.npy | 3 + margin_logs/step_0000324.npy | 3 + margin_logs/step_0000325.npy | 3 + margin_logs/step_0000326.npy | 3 + margin_logs/step_0000327.npy | 3 + margin_logs/step_0000328.npy | 3 + margin_logs/step_0000329.npy | 3 + margin_logs/step_0000330.npy | 3 + margin_logs/step_0000331.npy | 3 + margin_logs/step_0000332.npy | 3 + margin_logs/step_0000333.npy | 3 + margin_logs/step_0000334.npy | 3 + margin_logs/step_0000335.npy | 3 + margin_logs/step_0000336.npy | 3 + margin_logs/step_0000337.npy | 3 + margin_logs/step_0000338.npy | 3 + margin_logs/step_0000339.npy | 3 + margin_logs/step_0000340.npy | 3 + margin_logs/step_0000341.npy | 3 + margin_logs/step_0000342.npy | 3 + margin_logs/step_0000343.npy | 3 + margin_logs/step_0000344.npy | 3 + margin_logs/step_0000345.npy | 3 + margin_logs/step_0000346.npy | 3 + margin_logs/step_0000347.npy | 3 + margin_logs/step_0000348.npy | 3 + margin_logs/step_0000349.npy | 3 + margin_logs/step_0000350.npy | 3 + margin_logs/step_0000351.npy | 3 + margin_logs/step_0000352.npy | 3 + margin_logs/step_0000353.npy | 3 + margin_logs/step_0000354.npy | 3 + margin_logs/step_0000355.npy | 3 + margin_logs/step_0000356.npy | 3 + margin_logs/step_0000357.npy | 3 + margin_logs/step_0000358.npy | 3 + margin_logs/step_0000359.npy | 3 + margin_logs/step_0000360.npy | 3 + margin_logs/step_0000361.npy | 3 + margin_logs/step_0000362.npy | 3 + margin_logs/step_0000363.npy | 3 + margin_logs/step_0000364.npy | 3 + margin_logs/step_0000365.npy | 3 + margin_logs/step_0000366.npy | 3 + margin_logs/step_0000367.npy | 3 + margin_logs/step_0000368.npy | 3 + margin_logs/step_0000369.npy | 3 + margin_logs/step_0000370.npy | 3 + margin_logs/step_0000371.npy | 3 + margin_logs/step_0000372.npy | 3 + margin_logs/step_0000373.npy | 3 + margin_logs/step_0000374.npy | 3 + margin_logs/step_0000375.npy | 3 + margin_logs/step_0000376.npy | 3 + margin_logs/step_0000377.npy | 3 + margin_logs/step_0000378.npy | 3 + margin_logs/step_0000379.npy | 3 + margin_logs/step_0000380.npy | 3 + margin_logs/step_0000381.npy | 3 + margin_logs/step_0000382.npy | 3 + margin_logs/step_0000383.npy | 3 + margin_logs/step_0000384.npy | 3 + margin_logs/step_0000385.npy | 3 + margin_logs/step_0000386.npy | 3 + margin_logs/step_0000387.npy | 3 + margin_logs/step_0000388.npy | 3 + margin_logs/step_0000389.npy | 3 + margin_logs/step_0000390.npy | 3 + margin_logs/step_0000391.npy | 3 + margin_logs/step_0000392.npy | 3 + margin_logs/step_0000393.npy | 3 + margin_logs/step_0000394.npy | 3 + margin_logs/step_0000395.npy | 3 + margin_logs/step_0000396.npy | 3 + margin_logs/step_0000397.npy | 3 + margin_logs/step_0000398.npy | 3 + margin_logs/step_0000399.npy | 3 + margin_logs/step_0000400.npy | 3 + margin_logs/step_0000401.npy | 3 + margin_logs/step_0000402.npy | 3 + margin_logs/step_0000403.npy | 3 + margin_logs/step_0000404.npy | 3 + margin_logs/step_0000405.npy | 3 + margin_logs/step_0000406.npy | 3 + margin_logs/step_0000407.npy | 3 + margin_logs/step_0000408.npy | 3 + margin_logs/step_0000409.npy | 3 + margin_logs/step_0000410.npy | 3 + margin_logs/step_0000411.npy | 3 + margin_logs/step_0000412.npy | 3 + margin_logs/step_0000413.npy | 3 + margin_logs/step_0000414.npy | 3 + margin_logs/step_0000415.npy | 3 + margin_logs/step_0000416.npy | 3 + margin_logs/step_0000417.npy | 3 + margin_logs/step_0000418.npy | 3 + margin_logs/step_0000419.npy | 3 + margin_logs/step_0000420.npy | 3 + margin_logs/step_0000421.npy | 3 + margin_logs/step_0000422.npy | 3 + margin_logs/step_0000423.npy | 3 + margin_logs/step_0000424.npy | 3 + margin_logs/step_0000425.npy | 3 + margin_logs/step_0000426.npy | 3 + margin_logs/step_0000427.npy | 3 + margin_logs/step_0000428.npy | 3 + margin_logs/step_0000429.npy | 3 + margin_logs/step_0000430.npy | 3 + margin_logs/step_0000431.npy | 3 + margin_logs/step_0000432.npy | 3 + margin_logs/step_0000433.npy | 3 + margin_logs/step_0000434.npy | 3 + margin_logs/step_0000435.npy | 3 + margin_logs/step_0000436.npy | 3 + margin_logs/step_0000437.npy | 3 + margin_logs/step_0000438.npy | 3 + margin_logs/step_0000439.npy | 3 + margin_logs/step_0000440.npy | 3 + margin_logs/step_0000441.npy | 3 + margin_logs/step_0000442.npy | 3 + margin_logs/step_0000443.npy | 3 + margin_logs/step_0000444.npy | 3 + margin_logs/step_0000445.npy | 3 + margin_logs/step_0000446.npy | 3 + margin_logs/step_0000447.npy | 3 + margin_logs/step_0000448.npy | 3 + margin_logs/step_0000449.npy | 3 + margin_logs/step_0000450.npy | 3 + margin_logs/step_0000451.npy | 3 + margin_logs/step_0000452.npy | 3 + margin_logs/step_0000453.npy | 3 + margin_logs/step_0000454.npy | 3 + margin_logs/step_0000455.npy | 3 + margin_logs/step_0000456.npy | 3 + margin_logs/step_0000457.npy | 3 + margin_logs/step_0000458.npy | 3 + margin_logs/step_0000459.npy | 3 + margin_logs/step_0000460.npy | 3 + margin_logs/step_0000461.npy | 3 + margin_logs/step_0000462.npy | 3 + margin_logs/step_0000463.npy | 3 + margin_logs/step_0000464.npy | 3 + margin_logs/step_0000465.npy | 3 + margin_logs/step_0000466.npy | 3 + margin_logs/step_0000467.npy | 3 + margin_logs/step_0000468.npy | 3 + margin_logs/step_0000469.npy | 3 + margin_logs/step_0000470.npy | 3 + margin_logs/step_0000471.npy | 3 + margin_logs/step_0000472.npy | 3 + margin_logs/step_0000473.npy | 3 + margin_logs/step_0000474.npy | 3 + margin_logs/step_0000475.npy | 3 + margin_logs/step_0000476.npy | 3 + margin_logs/step_0000477.npy | 3 + margin_logs/step_0000478.npy | 3 + margin_logs/step_0000479.npy | 3 + margin_logs/step_0000480.npy | 3 + margin_logs/step_0000481.npy | 3 + margin_logs/step_0000482.npy | 3 + margin_logs/step_0000483.npy | 3 + margin_logs/step_0000484.npy | 3 + margin_logs/step_0000485.npy | 3 + margin_logs/step_0000486.npy | 3 + margin_logs/step_0000487.npy | 3 + margin_logs/step_0000488.npy | 3 + margin_logs/step_0000489.npy | 3 + margin_logs/step_0000490.npy | 3 + margin_logs/step_0000491.npy | 3 + margin_logs/step_0000492.npy | 3 + margin_logs/step_0000493.npy | 3 + margin_logs/step_0000494.npy | 3 + margin_logs/step_0000495.npy | 3 + margin_logs/step_0000496.npy | 3 + margin_logs/step_0000497.npy | 3 + margin_logs/step_0000498.npy | 3 + margin_logs/step_0000499.npy | 3 + margin_logs/step_0000500.npy | 3 + margin_logs/step_0000501.npy | 3 + margin_logs/step_0000502.npy | 3 + margin_logs/step_0000503.npy | 3 + margin_logs/step_0000504.npy | 3 + margin_logs/step_0000505.npy | 3 + margin_logs/step_0000506.npy | 3 + margin_logs/step_0000507.npy | 3 + margin_logs/step_0000508.npy | 3 + margin_logs/step_0000509.npy | 3 + margin_logs/step_0000510.npy | 3 + margin_logs/step_0000511.npy | 3 + margin_logs/step_0000512.npy | 3 + margin_logs/step_0000513.npy | 3 + margin_logs/step_0000514.npy | 3 + margin_logs/step_0000515.npy | 3 + margin_logs/step_0000516.npy | 3 + margin_logs/step_0000517.npy | 3 + margin_logs/step_0000518.npy | 3 + margin_logs/step_0000519.npy | 3 + margin_logs/step_0000520.npy | 3 + margin_logs/step_0000521.npy | 3 + margin_logs/step_0000522.npy | 3 + margin_logs/step_0000523.npy | 3 + margin_logs/step_0000524.npy | 3 + margin_logs/step_0000525.npy | 3 + margin_logs/step_0000526.npy | 3 + margin_logs/step_0000527.npy | 3 + margin_logs/step_0000528.npy | 3 + margin_logs/step_0000529.npy | 3 + margin_logs/step_0000530.npy | 3 + margin_logs/step_0000531.npy | 3 + margin_logs/step_0000532.npy | 3 + margin_logs/step_0000533.npy | 3 + margin_logs/step_0000534.npy | 3 + margin_logs/step_0000535.npy | 3 + margin_logs/step_0000536.npy | 3 + margin_logs/step_0000537.npy | 3 + margin_logs/step_0000538.npy | 3 + margin_logs/step_0000539.npy | 3 + margin_logs/step_0000540.npy | 3 + margin_logs/step_0000541.npy | 3 + margin_logs/step_0000542.npy | 3 + margin_logs/step_0000543.npy | 3 + margin_logs/step_0000544.npy | 3 + margin_logs/step_0000545.npy | 3 + margin_logs/step_0000546.npy | 3 + margin_logs/step_0000547.npy | 3 + margin_logs/step_0000548.npy | 3 + margin_logs/step_0000549.npy | 3 + margin_logs/step_0000550.npy | 3 + margin_logs/step_0000551.npy | 3 + margin_logs/step_0000552.npy | 3 + margin_logs/step_0000553.npy | 3 + margin_logs/step_0000554.npy | 3 + margin_logs/step_0000555.npy | 3 + margin_logs/step_0000556.npy | 3 + margin_logs/step_0000557.npy | 3 + margin_logs/step_0000558.npy | 3 + margin_logs/step_0000559.npy | 3 + margin_logs/step_0000560.npy | 3 + margin_logs/step_0000561.npy | 3 + margin_logs/step_0000562.npy | 3 + margin_logs/step_0000563.npy | 3 + margin_logs/step_0000564.npy | 3 + margin_logs/step_0000565.npy | 3 + margin_logs/step_0000566.npy | 3 + margin_logs/step_0000567.npy | 3 + margin_logs/step_0000568.npy | 3 + margin_logs/step_0000569.npy | 3 + margin_logs/step_0000570.npy | 3 + margin_logs/step_0000571.npy | 3 + margin_logs/step_0000572.npy | 3 + margin_logs/step_0000573.npy | 3 + margin_logs/step_0000574.npy | 3 + margin_logs/step_0000575.npy | 3 + margin_logs/step_0000576.npy | 3 + margin_logs/step_0000577.npy | 3 + margin_logs/step_0000578.npy | 3 + margin_logs/step_0000579.npy | 3 + margin_logs/step_0000580.npy | 3 + margin_logs/step_0000581.npy | 3 + margin_logs/step_0000582.npy | 3 + margin_logs/step_0000583.npy | 3 + margin_logs/step_0000584.npy | 3 + margin_logs/step_0000585.npy | 3 + margin_logs/step_0000586.npy | 3 + margin_logs/step_0000587.npy | 3 + margin_logs/step_0000588.npy | 3 + margin_logs/step_0000589.npy | 3 + margin_logs/step_0000590.npy | 3 + margin_logs/step_0000591.npy | 3 + margin_logs/step_0000592.npy | 3 + margin_logs/step_0000593.npy | 3 + margin_logs/step_0000594.npy | 3 + margin_logs/step_0000595.npy | 3 + margin_logs/step_0000596.npy | 3 + margin_logs/step_0000597.npy | 3 + margin_logs/step_0000598.npy | 3 + margin_logs/step_0000599.npy | 3 + margin_logs/step_0000600.npy | 3 + margin_logs/step_0000601.npy | 3 + margin_logs/step_0000602.npy | 3 + margin_logs/step_0000603.npy | 3 + margin_logs/step_0000604.npy | 3 + margin_logs/step_0000605.npy | 3 + margin_logs/step_0000606.npy | 3 + margin_logs/step_0000607.npy | 3 + margin_logs/step_0000608.npy | 3 + margin_logs/step_0000609.npy | 3 + margin_logs/step_0000610.npy | 3 + margin_logs/step_0000611.npy | 3 + margin_logs/step_0000612.npy | 3 + margin_logs/step_0000613.npy | 3 + margin_logs/step_0000614.npy | 3 + margin_logs/step_0000615.npy | 3 + margin_logs/step_0000616.npy | 3 + margin_logs/step_0000617.npy | 3 + margin_logs/step_0000618.npy | 3 + margin_logs/step_0000619.npy | 3 + margin_logs/step_0000620.npy | 3 + margin_logs/step_0000621.npy | 3 + margin_logs/step_0000622.npy | 3 + margin_logs/step_0000623.npy | 3 + margin_logs/step_0000624.npy | 3 + margin_logs/step_0000625.npy | 3 + margin_logs/step_0000626.npy | 3 + margin_logs/step_0000627.npy | 3 + margin_logs/step_0000628.npy | 3 + margin_logs/step_0000629.npy | 3 + margin_logs/step_0000630.npy | 3 + margin_logs/step_0000631.npy | 3 + margin_logs/step_0000632.npy | 3 + margin_logs/step_0000633.npy | 3 + margin_logs/step_0000634.npy | 3 + margin_logs/step_0000635.npy | 3 + margin_logs/step_0000636.npy | 3 + margin_logs/step_0000637.npy | 3 + margin_logs/step_0000638.npy | 3 + margin_logs/step_0000639.npy | 3 + margin_logs/step_0000640.npy | 3 + margin_logs/step_0000641.npy | 3 + margin_logs/step_0000642.npy | 3 + margin_logs/step_0000643.npy | 3 + margin_logs/step_0000644.npy | 3 + margin_logs/step_0000645.npy | 3 + margin_logs/step_0000646.npy | 3 + margin_logs/step_0000647.npy | 3 + margin_logs/step_0000648.npy | 3 + margin_logs/step_0000649.npy | 3 + margin_logs/step_0000650.npy | 3 + margin_logs/step_0000651.npy | 3 + margin_logs/step_0000652.npy | 3 + margin_logs/step_0000653.npy | 3 + margin_logs/step_0000654.npy | 3 + margin_logs/step_0000655.npy | 3 + margin_logs/step_0000656.npy | 3 + margin_logs/step_0000657.npy | 3 + margin_logs/step_0000658.npy | 3 + margin_logs/step_0000659.npy | 3 + margin_logs/step_0000660.npy | 3 + margin_logs/step_0000661.npy | 3 + margin_logs/step_0000662.npy | 3 + margin_logs/step_0000663.npy | 3 + margin_logs/step_0000664.npy | 3 + margin_logs/step_0000665.npy | 3 + margin_logs/step_0000666.npy | 3 + margin_logs/step_0000667.npy | 3 + margin_logs/step_0000668.npy | 3 + margin_logs/step_0000669.npy | 3 + margin_logs/step_0000670.npy | 3 + margin_logs/step_0000671.npy | 3 + margin_logs/step_0000672.npy | 3 + margin_logs/step_0000673.npy | 3 + margin_logs/step_0000674.npy | 3 + margin_logs/step_0000675.npy | 3 + margin_logs/step_0000676.npy | 3 + margin_logs/step_0000677.npy | 3 + margin_logs/step_0000678.npy | 3 + margin_logs/step_0000679.npy | 3 + margin_logs/step_0000680.npy | 3 + margin_logs/step_0000681.npy | 3 + model-00001-of-00007.safetensors | 3 + model-00002-of-00007.safetensors | 3 + model-00003-of-00007.safetensors | 3 + model-00004-of-00007.safetensors | 3 + model-00005-of-00007.safetensors | 3 + model-00006-of-00007.safetensors | 3 + model-00007-of-00007.safetensors | 3 + model.safetensors.index.json | 298 + special_tokens_map.json | 23 + tokenizer.json | 3 + tokenizer_config.json | 2064 ++++ train.log | 1160 +++ train_results.json | 9 + trainer_state.json | 15706 +++++++++++++++++++++++++++++ 701 files changed, 22153 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 all_results.json create mode 100644 config.json create mode 100644 generation_config.json create mode 100644 margin_logs/margins.jsonl create mode 100644 margin_logs/step_0000001.npy create mode 100644 margin_logs/step_0000002.npy create mode 100644 margin_logs/step_0000003.npy create mode 100644 margin_logs/step_0000004.npy create mode 100644 margin_logs/step_0000005.npy create mode 100644 margin_logs/step_0000006.npy create mode 100644 margin_logs/step_0000007.npy create mode 100644 margin_logs/step_0000008.npy create mode 100644 margin_logs/step_0000009.npy create mode 100644 margin_logs/step_0000010.npy create mode 100644 margin_logs/step_0000011.npy create mode 100644 margin_logs/step_0000012.npy create mode 100644 margin_logs/step_0000013.npy create mode 100644 margin_logs/step_0000014.npy create mode 100644 margin_logs/step_0000015.npy create mode 100644 margin_logs/step_0000016.npy create mode 100644 margin_logs/step_0000017.npy create mode 100644 margin_logs/step_0000018.npy create mode 100644 margin_logs/step_0000019.npy create mode 100644 margin_logs/step_0000020.npy create mode 100644 margin_logs/step_0000021.npy create mode 100644 margin_logs/step_0000022.npy create mode 100644 margin_logs/step_0000023.npy create mode 100644 margin_logs/step_0000024.npy create mode 100644 margin_logs/step_0000025.npy create mode 100644 margin_logs/step_0000026.npy create mode 100644 margin_logs/step_0000027.npy create mode 100644 margin_logs/step_0000028.npy create mode 100644 margin_logs/step_0000029.npy create mode 100644 margin_logs/step_0000030.npy create mode 100644 margin_logs/step_0000031.npy create mode 100644 margin_logs/step_0000032.npy create mode 100644 margin_logs/step_0000033.npy create mode 100644 margin_logs/step_0000034.npy create mode 100644 margin_logs/step_0000035.npy create mode 100644 margin_logs/step_0000036.npy create mode 100644 margin_logs/step_0000037.npy create mode 100644 margin_logs/step_0000038.npy create mode 100644 margin_logs/step_0000039.npy create mode 100644 margin_logs/step_0000040.npy create mode 100644 margin_logs/step_0000041.npy create mode 100644 margin_logs/step_0000042.npy create mode 100644 margin_logs/step_0000043.npy create mode 100644 margin_logs/step_0000044.npy create mode 100644 margin_logs/step_0000045.npy create mode 100644 margin_logs/step_0000046.npy create mode 100644 margin_logs/step_0000047.npy create mode 100644 margin_logs/step_0000048.npy create mode 100644 margin_logs/step_0000049.npy create mode 100644 margin_logs/step_0000050.npy create mode 100644 margin_logs/step_0000051.npy create mode 100644 margin_logs/step_0000052.npy create mode 100644 margin_logs/step_0000053.npy create mode 100644 margin_logs/step_0000054.npy create mode 100644 margin_logs/step_0000055.npy create mode 100644 margin_logs/step_0000056.npy create mode 100644 margin_logs/step_0000057.npy create mode 100644 margin_logs/step_0000058.npy create mode 100644 margin_logs/step_0000059.npy create mode 100644 margin_logs/step_0000060.npy create mode 100644 margin_logs/step_0000061.npy create mode 100644 margin_logs/step_0000062.npy create mode 100644 margin_logs/step_0000063.npy create mode 100644 margin_logs/step_0000064.npy create mode 100644 margin_logs/step_0000065.npy create mode 100644 margin_logs/step_0000066.npy create mode 100644 margin_logs/step_0000067.npy create mode 100644 margin_logs/step_0000068.npy create mode 100644 margin_logs/step_0000069.npy create mode 100644 margin_logs/step_0000070.npy create mode 100644 margin_logs/step_0000071.npy create mode 100644 margin_logs/step_0000072.npy create mode 100644 margin_logs/step_0000073.npy create mode 100644 margin_logs/step_0000074.npy create mode 100644 margin_logs/step_0000075.npy create mode 100644 margin_logs/step_0000076.npy create mode 100644 margin_logs/step_0000077.npy create mode 100644 margin_logs/step_0000078.npy create mode 100644 margin_logs/step_0000079.npy create mode 100644 margin_logs/step_0000080.npy create mode 100644 margin_logs/step_0000081.npy create mode 100644 margin_logs/step_0000082.npy create mode 100644 margin_logs/step_0000083.npy create mode 100644 margin_logs/step_0000084.npy create mode 100644 margin_logs/step_0000085.npy create mode 100644 margin_logs/step_0000086.npy create mode 100644 margin_logs/step_0000087.npy create mode 100644 margin_logs/step_0000088.npy create mode 100644 margin_logs/step_0000089.npy create mode 100644 margin_logs/step_0000090.npy create mode 100644 margin_logs/step_0000091.npy create mode 100644 margin_logs/step_0000092.npy create mode 100644 margin_logs/step_0000093.npy create mode 100644 margin_logs/step_0000094.npy create mode 100644 margin_logs/step_0000095.npy create mode 100644 margin_logs/step_0000096.npy create mode 100644 margin_logs/step_0000097.npy create mode 100644 margin_logs/step_0000098.npy create mode 100644 margin_logs/step_0000099.npy create mode 100644 margin_logs/step_0000100.npy create mode 100644 margin_logs/step_0000101.npy create mode 100644 margin_logs/step_0000102.npy create mode 100644 margin_logs/step_0000103.npy create mode 100644 margin_logs/step_0000104.npy create mode 100644 margin_logs/step_0000105.npy create mode 100644 margin_logs/step_0000106.npy create mode 100644 margin_logs/step_0000107.npy create mode 100644 margin_logs/step_0000108.npy create mode 100644 margin_logs/step_0000109.npy create mode 100644 margin_logs/step_0000110.npy create mode 100644 margin_logs/step_0000111.npy create mode 100644 margin_logs/step_0000112.npy create mode 100644 margin_logs/step_0000113.npy create mode 100644 margin_logs/step_0000114.npy create mode 100644 margin_logs/step_0000115.npy create mode 100644 margin_logs/step_0000116.npy create mode 100644 margin_logs/step_0000117.npy create mode 100644 margin_logs/step_0000118.npy create mode 100644 margin_logs/step_0000119.npy create mode 100644 margin_logs/step_0000120.npy create mode 100644 margin_logs/step_0000121.npy create mode 100644 margin_logs/step_0000122.npy create mode 100644 margin_logs/step_0000123.npy create mode 100644 margin_logs/step_0000124.npy create mode 100644 margin_logs/step_0000125.npy create mode 100644 margin_logs/step_0000126.npy create mode 100644 margin_logs/step_0000127.npy create mode 100644 margin_logs/step_0000128.npy create mode 100644 margin_logs/step_0000129.npy create mode 100644 margin_logs/step_0000130.npy create mode 100644 margin_logs/step_0000131.npy create mode 100644 margin_logs/step_0000132.npy create mode 100644 margin_logs/step_0000133.npy create mode 100644 margin_logs/step_0000134.npy create mode 100644 margin_logs/step_0000135.npy create mode 100644 margin_logs/step_0000136.npy create mode 100644 margin_logs/step_0000137.npy create mode 100644 margin_logs/step_0000138.npy create mode 100644 margin_logs/step_0000139.npy create mode 100644 margin_logs/step_0000140.npy create mode 100644 margin_logs/step_0000141.npy create mode 100644 margin_logs/step_0000142.npy create mode 100644 margin_logs/step_0000143.npy create mode 100644 margin_logs/step_0000144.npy create mode 100644 margin_logs/step_0000145.npy create mode 100644 margin_logs/step_0000146.npy create mode 100644 margin_logs/step_0000147.npy create mode 100644 margin_logs/step_0000148.npy create mode 100644 margin_logs/step_0000149.npy create mode 100644 margin_logs/step_0000150.npy create mode 100644 margin_logs/step_0000151.npy create mode 100644 margin_logs/step_0000152.npy create mode 100644 margin_logs/step_0000153.npy create mode 100644 margin_logs/step_0000154.npy create mode 100644 margin_logs/step_0000155.npy create mode 100644 margin_logs/step_0000156.npy create mode 100644 margin_logs/step_0000157.npy create mode 100644 margin_logs/step_0000158.npy create mode 100644 margin_logs/step_0000159.npy create mode 100644 margin_logs/step_0000160.npy create mode 100644 margin_logs/step_0000161.npy create mode 100644 margin_logs/step_0000162.npy create mode 100644 margin_logs/step_0000163.npy create mode 100644 margin_logs/step_0000164.npy create mode 100644 margin_logs/step_0000165.npy create mode 100644 margin_logs/step_0000166.npy create mode 100644 margin_logs/step_0000167.npy create mode 100644 margin_logs/step_0000168.npy create mode 100644 margin_logs/step_0000169.npy create mode 100644 margin_logs/step_0000170.npy create mode 100644 margin_logs/step_0000171.npy create mode 100644 margin_logs/step_0000172.npy create mode 100644 margin_logs/step_0000173.npy create mode 100644 margin_logs/step_0000174.npy create mode 100644 margin_logs/step_0000175.npy create mode 100644 margin_logs/step_0000176.npy create mode 100644 margin_logs/step_0000177.npy create mode 100644 margin_logs/step_0000178.npy create mode 100644 margin_logs/step_0000179.npy create mode 100644 margin_logs/step_0000180.npy create mode 100644 margin_logs/step_0000181.npy create mode 100644 margin_logs/step_0000182.npy create mode 100644 margin_logs/step_0000183.npy create mode 100644 margin_logs/step_0000184.npy create mode 100644 margin_logs/step_0000185.npy create mode 100644 margin_logs/step_0000186.npy create mode 100644 margin_logs/step_0000187.npy create mode 100644 margin_logs/step_0000188.npy create mode 100644 margin_logs/step_0000189.npy create mode 100644 margin_logs/step_0000190.npy create mode 100644 margin_logs/step_0000191.npy create mode 100644 margin_logs/step_0000192.npy create mode 100644 margin_logs/step_0000193.npy create mode 100644 margin_logs/step_0000194.npy create mode 100644 margin_logs/step_0000195.npy create mode 100644 margin_logs/step_0000196.npy create mode 100644 margin_logs/step_0000197.npy create mode 100644 margin_logs/step_0000198.npy create mode 100644 margin_logs/step_0000199.npy create mode 100644 margin_logs/step_0000200.npy create mode 100644 margin_logs/step_0000201.npy create mode 100644 margin_logs/step_0000202.npy create mode 100644 margin_logs/step_0000203.npy create mode 100644 margin_logs/step_0000204.npy create mode 100644 margin_logs/step_0000205.npy create mode 100644 margin_logs/step_0000206.npy create mode 100644 margin_logs/step_0000207.npy create mode 100644 margin_logs/step_0000208.npy create mode 100644 margin_logs/step_0000209.npy create mode 100644 margin_logs/step_0000210.npy create mode 100644 margin_logs/step_0000211.npy create mode 100644 margin_logs/step_0000212.npy create mode 100644 margin_logs/step_0000213.npy create mode 100644 margin_logs/step_0000214.npy create mode 100644 margin_logs/step_0000215.npy create mode 100644 margin_logs/step_0000216.npy create mode 100644 margin_logs/step_0000217.npy create mode 100644 margin_logs/step_0000218.npy create mode 100644 margin_logs/step_0000219.npy create mode 100644 margin_logs/step_0000220.npy create mode 100644 margin_logs/step_0000221.npy create mode 100644 margin_logs/step_0000222.npy create mode 100644 margin_logs/step_0000223.npy create mode 100644 margin_logs/step_0000224.npy create mode 100644 margin_logs/step_0000225.npy create mode 100644 margin_logs/step_0000226.npy create mode 100644 margin_logs/step_0000227.npy create mode 100644 margin_logs/step_0000228.npy create mode 100644 margin_logs/step_0000229.npy create mode 100644 margin_logs/step_0000230.npy create mode 100644 margin_logs/step_0000231.npy create mode 100644 margin_logs/step_0000232.npy create mode 100644 margin_logs/step_0000233.npy create mode 100644 margin_logs/step_0000234.npy create mode 100644 margin_logs/step_0000235.npy create mode 100644 margin_logs/step_0000236.npy create mode 100644 margin_logs/step_0000237.npy create mode 100644 margin_logs/step_0000238.npy create mode 100644 margin_logs/step_0000239.npy create mode 100644 margin_logs/step_0000240.npy create mode 100644 margin_logs/step_0000241.npy create mode 100644 margin_logs/step_0000242.npy create mode 100644 margin_logs/step_0000243.npy create mode 100644 margin_logs/step_0000244.npy create mode 100644 margin_logs/step_0000245.npy create mode 100644 margin_logs/step_0000246.npy create mode 100644 margin_logs/step_0000247.npy create mode 100644 margin_logs/step_0000248.npy create mode 100644 margin_logs/step_0000249.npy create mode 100644 margin_logs/step_0000250.npy create mode 100644 margin_logs/step_0000251.npy create mode 100644 margin_logs/step_0000252.npy create mode 100644 margin_logs/step_0000253.npy create mode 100644 margin_logs/step_0000254.npy create mode 100644 margin_logs/step_0000255.npy create mode 100644 margin_logs/step_0000256.npy create mode 100644 margin_logs/step_0000257.npy create mode 100644 margin_logs/step_0000258.npy create mode 100644 margin_logs/step_0000259.npy create mode 100644 margin_logs/step_0000260.npy create mode 100644 margin_logs/step_0000261.npy create mode 100644 margin_logs/step_0000262.npy create mode 100644 margin_logs/step_0000263.npy create mode 100644 margin_logs/step_0000264.npy create mode 100644 margin_logs/step_0000265.npy create mode 100644 margin_logs/step_0000266.npy create mode 100644 margin_logs/step_0000267.npy create mode 100644 margin_logs/step_0000268.npy create mode 100644 margin_logs/step_0000269.npy create mode 100644 margin_logs/step_0000270.npy create mode 100644 margin_logs/step_0000271.npy create mode 100644 margin_logs/step_0000272.npy create mode 100644 margin_logs/step_0000273.npy create mode 100644 margin_logs/step_0000274.npy create mode 100644 margin_logs/step_0000275.npy create mode 100644 margin_logs/step_0000276.npy create mode 100644 margin_logs/step_0000277.npy create mode 100644 margin_logs/step_0000278.npy create mode 100644 margin_logs/step_0000279.npy create mode 100644 margin_logs/step_0000280.npy create mode 100644 margin_logs/step_0000281.npy create mode 100644 margin_logs/step_0000282.npy create mode 100644 margin_logs/step_0000283.npy create mode 100644 margin_logs/step_0000284.npy create mode 100644 margin_logs/step_0000285.npy create mode 100644 margin_logs/step_0000286.npy create mode 100644 margin_logs/step_0000287.npy create mode 100644 margin_logs/step_0000288.npy create mode 100644 margin_logs/step_0000289.npy create mode 100644 margin_logs/step_0000290.npy create mode 100644 margin_logs/step_0000291.npy create mode 100644 margin_logs/step_0000292.npy create mode 100644 margin_logs/step_0000293.npy create mode 100644 margin_logs/step_0000294.npy create mode 100644 margin_logs/step_0000295.npy create mode 100644 margin_logs/step_0000296.npy create mode 100644 margin_logs/step_0000297.npy create mode 100644 margin_logs/step_0000298.npy create mode 100644 margin_logs/step_0000299.npy create mode 100644 margin_logs/step_0000300.npy create mode 100644 margin_logs/step_0000301.npy create mode 100644 margin_logs/step_0000302.npy create mode 100644 margin_logs/step_0000303.npy create mode 100644 margin_logs/step_0000304.npy create mode 100644 margin_logs/step_0000305.npy create mode 100644 margin_logs/step_0000306.npy create mode 100644 margin_logs/step_0000307.npy create mode 100644 margin_logs/step_0000308.npy create mode 100644 margin_logs/step_0000309.npy create mode 100644 margin_logs/step_0000310.npy create mode 100644 margin_logs/step_0000311.npy create mode 100644 margin_logs/step_0000312.npy create mode 100644 margin_logs/step_0000313.npy create mode 100644 margin_logs/step_0000314.npy create mode 100644 margin_logs/step_0000315.npy create mode 100644 margin_logs/step_0000316.npy create mode 100644 margin_logs/step_0000317.npy create mode 100644 margin_logs/step_0000318.npy create mode 100644 margin_logs/step_0000319.npy create mode 100644 margin_logs/step_0000320.npy create mode 100644 margin_logs/step_0000321.npy create mode 100644 margin_logs/step_0000322.npy create mode 100644 margin_logs/step_0000323.npy create mode 100644 margin_logs/step_0000324.npy create mode 100644 margin_logs/step_0000325.npy create mode 100644 margin_logs/step_0000326.npy create mode 100644 margin_logs/step_0000327.npy create mode 100644 margin_logs/step_0000328.npy create mode 100644 margin_logs/step_0000329.npy create mode 100644 margin_logs/step_0000330.npy create mode 100644 margin_logs/step_0000331.npy create mode 100644 margin_logs/step_0000332.npy create mode 100644 margin_logs/step_0000333.npy create mode 100644 margin_logs/step_0000334.npy create mode 100644 margin_logs/step_0000335.npy create mode 100644 margin_logs/step_0000336.npy create mode 100644 margin_logs/step_0000337.npy create mode 100644 margin_logs/step_0000338.npy create mode 100644 margin_logs/step_0000339.npy create mode 100644 margin_logs/step_0000340.npy create mode 100644 margin_logs/step_0000341.npy create mode 100644 margin_logs/step_0000342.npy create mode 100644 margin_logs/step_0000343.npy create mode 100644 margin_logs/step_0000344.npy create mode 100644 margin_logs/step_0000345.npy create mode 100644 margin_logs/step_0000346.npy create mode 100644 margin_logs/step_0000347.npy create mode 100644 margin_logs/step_0000348.npy create mode 100644 margin_logs/step_0000349.npy create mode 100644 margin_logs/step_0000350.npy create mode 100644 margin_logs/step_0000351.npy create mode 100644 margin_logs/step_0000352.npy create mode 100644 margin_logs/step_0000353.npy create mode 100644 margin_logs/step_0000354.npy create mode 100644 margin_logs/step_0000355.npy create mode 100644 margin_logs/step_0000356.npy create mode 100644 margin_logs/step_0000357.npy create mode 100644 margin_logs/step_0000358.npy create mode 100644 margin_logs/step_0000359.npy create mode 100644 margin_logs/step_0000360.npy create mode 100644 margin_logs/step_0000361.npy create mode 100644 margin_logs/step_0000362.npy create mode 100644 margin_logs/step_0000363.npy create mode 100644 margin_logs/step_0000364.npy create mode 100644 margin_logs/step_0000365.npy create mode 100644 margin_logs/step_0000366.npy create mode 100644 margin_logs/step_0000367.npy create mode 100644 margin_logs/step_0000368.npy create mode 100644 margin_logs/step_0000369.npy create mode 100644 margin_logs/step_0000370.npy create mode 100644 margin_logs/step_0000371.npy create mode 100644 margin_logs/step_0000372.npy create mode 100644 margin_logs/step_0000373.npy create mode 100644 margin_logs/step_0000374.npy create mode 100644 margin_logs/step_0000375.npy create mode 100644 margin_logs/step_0000376.npy create mode 100644 margin_logs/step_0000377.npy create mode 100644 margin_logs/step_0000378.npy create mode 100644 margin_logs/step_0000379.npy create mode 100644 margin_logs/step_0000380.npy create mode 100644 margin_logs/step_0000381.npy create mode 100644 margin_logs/step_0000382.npy create mode 100644 margin_logs/step_0000383.npy create mode 100644 margin_logs/step_0000384.npy create mode 100644 margin_logs/step_0000385.npy create mode 100644 margin_logs/step_0000386.npy create mode 100644 margin_logs/step_0000387.npy create mode 100644 margin_logs/step_0000388.npy create mode 100644 margin_logs/step_0000389.npy create mode 100644 margin_logs/step_0000390.npy create mode 100644 margin_logs/step_0000391.npy create mode 100644 margin_logs/step_0000392.npy create mode 100644 margin_logs/step_0000393.npy create mode 100644 margin_logs/step_0000394.npy create mode 100644 margin_logs/step_0000395.npy create mode 100644 margin_logs/step_0000396.npy create mode 100644 margin_logs/step_0000397.npy create mode 100644 margin_logs/step_0000398.npy create mode 100644 margin_logs/step_0000399.npy create mode 100644 margin_logs/step_0000400.npy create mode 100644 margin_logs/step_0000401.npy create mode 100644 margin_logs/step_0000402.npy create mode 100644 margin_logs/step_0000403.npy create mode 100644 margin_logs/step_0000404.npy create mode 100644 margin_logs/step_0000405.npy create mode 100644 margin_logs/step_0000406.npy create mode 100644 margin_logs/step_0000407.npy create mode 100644 margin_logs/step_0000408.npy create mode 100644 margin_logs/step_0000409.npy create mode 100644 margin_logs/step_0000410.npy create mode 100644 margin_logs/step_0000411.npy create mode 100644 margin_logs/step_0000412.npy create mode 100644 margin_logs/step_0000413.npy create mode 100644 margin_logs/step_0000414.npy create mode 100644 margin_logs/step_0000415.npy create mode 100644 margin_logs/step_0000416.npy create mode 100644 margin_logs/step_0000417.npy create mode 100644 margin_logs/step_0000418.npy create mode 100644 margin_logs/step_0000419.npy create mode 100644 margin_logs/step_0000420.npy create mode 100644 margin_logs/step_0000421.npy create mode 100644 margin_logs/step_0000422.npy create mode 100644 margin_logs/step_0000423.npy create mode 100644 margin_logs/step_0000424.npy create mode 100644 margin_logs/step_0000425.npy create mode 100644 margin_logs/step_0000426.npy create mode 100644 margin_logs/step_0000427.npy create mode 100644 margin_logs/step_0000428.npy create mode 100644 margin_logs/step_0000429.npy create mode 100644 margin_logs/step_0000430.npy create mode 100644 margin_logs/step_0000431.npy create mode 100644 margin_logs/step_0000432.npy create mode 100644 margin_logs/step_0000433.npy create mode 100644 margin_logs/step_0000434.npy create mode 100644 margin_logs/step_0000435.npy create mode 100644 margin_logs/step_0000436.npy create mode 100644 margin_logs/step_0000437.npy create mode 100644 margin_logs/step_0000438.npy create mode 100644 margin_logs/step_0000439.npy create mode 100644 margin_logs/step_0000440.npy create mode 100644 margin_logs/step_0000441.npy create mode 100644 margin_logs/step_0000442.npy create mode 100644 margin_logs/step_0000443.npy create mode 100644 margin_logs/step_0000444.npy create mode 100644 margin_logs/step_0000445.npy create mode 100644 margin_logs/step_0000446.npy create mode 100644 margin_logs/step_0000447.npy create mode 100644 margin_logs/step_0000448.npy create mode 100644 margin_logs/step_0000449.npy create mode 100644 margin_logs/step_0000450.npy create mode 100644 margin_logs/step_0000451.npy create mode 100644 margin_logs/step_0000452.npy create mode 100644 margin_logs/step_0000453.npy create mode 100644 margin_logs/step_0000454.npy create mode 100644 margin_logs/step_0000455.npy create mode 100644 margin_logs/step_0000456.npy create mode 100644 margin_logs/step_0000457.npy create mode 100644 margin_logs/step_0000458.npy create mode 100644 margin_logs/step_0000459.npy create mode 100644 margin_logs/step_0000460.npy create mode 100644 margin_logs/step_0000461.npy create mode 100644 margin_logs/step_0000462.npy create mode 100644 margin_logs/step_0000463.npy create mode 100644 margin_logs/step_0000464.npy create mode 100644 margin_logs/step_0000465.npy create mode 100644 margin_logs/step_0000466.npy create mode 100644 margin_logs/step_0000467.npy create mode 100644 margin_logs/step_0000468.npy create mode 100644 margin_logs/step_0000469.npy create mode 100644 margin_logs/step_0000470.npy create mode 100644 margin_logs/step_0000471.npy create mode 100644 margin_logs/step_0000472.npy create mode 100644 margin_logs/step_0000473.npy create mode 100644 margin_logs/step_0000474.npy create mode 100644 margin_logs/step_0000475.npy create mode 100644 margin_logs/step_0000476.npy create mode 100644 margin_logs/step_0000477.npy create mode 100644 margin_logs/step_0000478.npy create mode 100644 margin_logs/step_0000479.npy create mode 100644 margin_logs/step_0000480.npy create mode 100644 margin_logs/step_0000481.npy create mode 100644 margin_logs/step_0000482.npy create mode 100644 margin_logs/step_0000483.npy create mode 100644 margin_logs/step_0000484.npy create mode 100644 margin_logs/step_0000485.npy create mode 100644 margin_logs/step_0000486.npy create mode 100644 margin_logs/step_0000487.npy create mode 100644 margin_logs/step_0000488.npy create mode 100644 margin_logs/step_0000489.npy create mode 100644 margin_logs/step_0000490.npy create mode 100644 margin_logs/step_0000491.npy create mode 100644 margin_logs/step_0000492.npy create mode 100644 margin_logs/step_0000493.npy create mode 100644 margin_logs/step_0000494.npy create mode 100644 margin_logs/step_0000495.npy create mode 100644 margin_logs/step_0000496.npy create mode 100644 margin_logs/step_0000497.npy create mode 100644 margin_logs/step_0000498.npy create mode 100644 margin_logs/step_0000499.npy create mode 100644 margin_logs/step_0000500.npy create mode 100644 margin_logs/step_0000501.npy create mode 100644 margin_logs/step_0000502.npy create mode 100644 margin_logs/step_0000503.npy create mode 100644 margin_logs/step_0000504.npy create mode 100644 margin_logs/step_0000505.npy create mode 100644 margin_logs/step_0000506.npy create mode 100644 margin_logs/step_0000507.npy create mode 100644 margin_logs/step_0000508.npy create mode 100644 margin_logs/step_0000509.npy create mode 100644 margin_logs/step_0000510.npy create mode 100644 margin_logs/step_0000511.npy create mode 100644 margin_logs/step_0000512.npy create mode 100644 margin_logs/step_0000513.npy create mode 100644 margin_logs/step_0000514.npy create mode 100644 margin_logs/step_0000515.npy create mode 100644 margin_logs/step_0000516.npy create mode 100644 margin_logs/step_0000517.npy create mode 100644 margin_logs/step_0000518.npy create mode 100644 margin_logs/step_0000519.npy create mode 100644 margin_logs/step_0000520.npy create mode 100644 margin_logs/step_0000521.npy create mode 100644 margin_logs/step_0000522.npy create mode 100644 margin_logs/step_0000523.npy create mode 100644 margin_logs/step_0000524.npy create mode 100644 margin_logs/step_0000525.npy create mode 100644 margin_logs/step_0000526.npy create mode 100644 margin_logs/step_0000527.npy create mode 100644 margin_logs/step_0000528.npy create mode 100644 margin_logs/step_0000529.npy create mode 100644 margin_logs/step_0000530.npy create mode 100644 margin_logs/step_0000531.npy create mode 100644 margin_logs/step_0000532.npy create mode 100644 margin_logs/step_0000533.npy create mode 100644 margin_logs/step_0000534.npy create mode 100644 margin_logs/step_0000535.npy create mode 100644 margin_logs/step_0000536.npy create mode 100644 margin_logs/step_0000537.npy create mode 100644 margin_logs/step_0000538.npy create mode 100644 margin_logs/step_0000539.npy create mode 100644 margin_logs/step_0000540.npy create mode 100644 margin_logs/step_0000541.npy create mode 100644 margin_logs/step_0000542.npy create mode 100644 margin_logs/step_0000543.npy create mode 100644 margin_logs/step_0000544.npy create mode 100644 margin_logs/step_0000545.npy create mode 100644 margin_logs/step_0000546.npy create mode 100644 margin_logs/step_0000547.npy create mode 100644 margin_logs/step_0000548.npy create mode 100644 margin_logs/step_0000549.npy create mode 100644 margin_logs/step_0000550.npy create mode 100644 margin_logs/step_0000551.npy create mode 100644 margin_logs/step_0000552.npy create mode 100644 margin_logs/step_0000553.npy create mode 100644 margin_logs/step_0000554.npy create mode 100644 margin_logs/step_0000555.npy create mode 100644 margin_logs/step_0000556.npy create mode 100644 margin_logs/step_0000557.npy create mode 100644 margin_logs/step_0000558.npy create mode 100644 margin_logs/step_0000559.npy create mode 100644 margin_logs/step_0000560.npy create mode 100644 margin_logs/step_0000561.npy create mode 100644 margin_logs/step_0000562.npy create mode 100644 margin_logs/step_0000563.npy create mode 100644 margin_logs/step_0000564.npy create mode 100644 margin_logs/step_0000565.npy create mode 100644 margin_logs/step_0000566.npy create mode 100644 margin_logs/step_0000567.npy create mode 100644 margin_logs/step_0000568.npy create mode 100644 margin_logs/step_0000569.npy create mode 100644 margin_logs/step_0000570.npy create mode 100644 margin_logs/step_0000571.npy create mode 100644 margin_logs/step_0000572.npy create mode 100644 margin_logs/step_0000573.npy create mode 100644 margin_logs/step_0000574.npy create mode 100644 margin_logs/step_0000575.npy create mode 100644 margin_logs/step_0000576.npy create mode 100644 margin_logs/step_0000577.npy create mode 100644 margin_logs/step_0000578.npy create mode 100644 margin_logs/step_0000579.npy create mode 100644 margin_logs/step_0000580.npy create mode 100644 margin_logs/step_0000581.npy create mode 100644 margin_logs/step_0000582.npy create mode 100644 margin_logs/step_0000583.npy create mode 100644 margin_logs/step_0000584.npy create mode 100644 margin_logs/step_0000585.npy create mode 100644 margin_logs/step_0000586.npy create mode 100644 margin_logs/step_0000587.npy create mode 100644 margin_logs/step_0000588.npy create mode 100644 margin_logs/step_0000589.npy create mode 100644 margin_logs/step_0000590.npy create mode 100644 margin_logs/step_0000591.npy create mode 100644 margin_logs/step_0000592.npy create mode 100644 margin_logs/step_0000593.npy create mode 100644 margin_logs/step_0000594.npy create mode 100644 margin_logs/step_0000595.npy create mode 100644 margin_logs/step_0000596.npy create mode 100644 margin_logs/step_0000597.npy create mode 100644 margin_logs/step_0000598.npy create mode 100644 margin_logs/step_0000599.npy create mode 100644 margin_logs/step_0000600.npy create mode 100644 margin_logs/step_0000601.npy create mode 100644 margin_logs/step_0000602.npy create mode 100644 margin_logs/step_0000603.npy create mode 100644 margin_logs/step_0000604.npy create mode 100644 margin_logs/step_0000605.npy create mode 100644 margin_logs/step_0000606.npy create mode 100644 margin_logs/step_0000607.npy create mode 100644 margin_logs/step_0000608.npy create mode 100644 margin_logs/step_0000609.npy create mode 100644 margin_logs/step_0000610.npy create mode 100644 margin_logs/step_0000611.npy create mode 100644 margin_logs/step_0000612.npy create mode 100644 margin_logs/step_0000613.npy create mode 100644 margin_logs/step_0000614.npy create mode 100644 margin_logs/step_0000615.npy create mode 100644 margin_logs/step_0000616.npy create mode 100644 margin_logs/step_0000617.npy create mode 100644 margin_logs/step_0000618.npy create mode 100644 margin_logs/step_0000619.npy create mode 100644 margin_logs/step_0000620.npy create mode 100644 margin_logs/step_0000621.npy create mode 100644 margin_logs/step_0000622.npy create mode 100644 margin_logs/step_0000623.npy create mode 100644 margin_logs/step_0000624.npy create mode 100644 margin_logs/step_0000625.npy create mode 100644 margin_logs/step_0000626.npy create mode 100644 margin_logs/step_0000627.npy create mode 100644 margin_logs/step_0000628.npy create mode 100644 margin_logs/step_0000629.npy create mode 100644 margin_logs/step_0000630.npy create mode 100644 margin_logs/step_0000631.npy create mode 100644 margin_logs/step_0000632.npy create mode 100644 margin_logs/step_0000633.npy create mode 100644 margin_logs/step_0000634.npy create mode 100644 margin_logs/step_0000635.npy create mode 100644 margin_logs/step_0000636.npy create mode 100644 margin_logs/step_0000637.npy create mode 100644 margin_logs/step_0000638.npy create mode 100644 margin_logs/step_0000639.npy create mode 100644 margin_logs/step_0000640.npy create mode 100644 margin_logs/step_0000641.npy create mode 100644 margin_logs/step_0000642.npy create mode 100644 margin_logs/step_0000643.npy create mode 100644 margin_logs/step_0000644.npy create mode 100644 margin_logs/step_0000645.npy create mode 100644 margin_logs/step_0000646.npy create mode 100644 margin_logs/step_0000647.npy create mode 100644 margin_logs/step_0000648.npy create mode 100644 margin_logs/step_0000649.npy create mode 100644 margin_logs/step_0000650.npy create mode 100644 margin_logs/step_0000651.npy create mode 100644 margin_logs/step_0000652.npy create mode 100644 margin_logs/step_0000653.npy create mode 100644 margin_logs/step_0000654.npy create mode 100644 margin_logs/step_0000655.npy create mode 100644 margin_logs/step_0000656.npy create mode 100644 margin_logs/step_0000657.npy create mode 100644 margin_logs/step_0000658.npy create mode 100644 margin_logs/step_0000659.npy create mode 100644 margin_logs/step_0000660.npy create mode 100644 margin_logs/step_0000661.npy create mode 100644 margin_logs/step_0000662.npy create mode 100644 margin_logs/step_0000663.npy create mode 100644 margin_logs/step_0000664.npy create mode 100644 margin_logs/step_0000665.npy create mode 100644 margin_logs/step_0000666.npy create mode 100644 margin_logs/step_0000667.npy create mode 100644 margin_logs/step_0000668.npy create mode 100644 margin_logs/step_0000669.npy create mode 100644 margin_logs/step_0000670.npy create mode 100644 margin_logs/step_0000671.npy create mode 100644 margin_logs/step_0000672.npy create mode 100644 margin_logs/step_0000673.npy create mode 100644 margin_logs/step_0000674.npy create mode 100644 margin_logs/step_0000675.npy create mode 100644 margin_logs/step_0000676.npy create mode 100644 margin_logs/step_0000677.npy create mode 100644 margin_logs/step_0000678.npy create mode 100644 margin_logs/step_0000679.npy create mode 100644 margin_logs/step_0000680.npy create mode 100644 margin_logs/step_0000681.npy create mode 100644 model-00001-of-00007.safetensors create mode 100644 model-00002-of-00007.safetensors create mode 100644 model-00003-of-00007.safetensors create mode 100644 model-00004-of-00007.safetensors create mode 100644 model-00005-of-00007.safetensors create mode 100644 model-00006-of-00007.safetensors create mode 100644 model-00007-of-00007.safetensors create mode 100644 model.safetensors.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 train.log create mode 100644 train_results.json create mode 100644 trainer_state.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..13685d2 --- /dev/null +++ b/README.md @@ -0,0 +1,62 @@ +--- +library_name: transformers +base_model: W-61/llama-3-8b-base-sft-hh-helpful-4xh200 +tags: +- alignment-handbook +- new-dpo +- generated_from_trainer +datasets: +- Anthropic/hh-rlhf +model-index: +- name: llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449 + results: [] +--- + + + +# llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449 + +This model is a fine-tuned version of [W-61/llama-3-8b-base-sft-hh-helpful-4xh200](https://huggingface.co/W-61/llama-3-8b-base-sft-hh-helpful-4xh200) on the Anthropic/hh-rlhf dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 5e-07 +- train_batch_size: 8 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 64 +- total_eval_batch_size: 32 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 1 + +### Training results + + + +### Framework versions + +- Transformers 4.51.0 +- Pytorch 2.3.1+cu121 +- Datasets 2.21.0 +- Tokenizers 0.21.4 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..b5ae8dd --- /dev/null +++ b/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 0.0, + "train_loss": 1.0911195537242244, + "train_runtime": 1739.0324, + "train_samples": 43598, + "train_samples_per_second": 25.07, + "train_steps_per_second": 0.392 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..5092b09 --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.51.0", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..76247c9 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,9 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.51.0" +} diff --git a/margin_logs/margins.jsonl b/margin_logs/margins.jsonl new file mode 100644 index 0000000..6d38404 --- /dev/null +++ b/margin_logs/margins.jsonl @@ -0,0 +1,681 @@ +{"epoch": 0.0, "step": 1, "batch_size": 64, "mean": -0.02287048101425171, "std": 0.42023447155952454, "min": -1.4034271240234375, "p10": -0.46674575805664065, "median": 0.04234886169433594, "p90": 0.4323463439941407, "max": 0.89263916015625, "pos_frac": 0.53125, "sample": [-0.06523895263671875, 0.436798095703125, 0.27811431884765625, -0.9194221496582031, 0.018890380859375, 0.20587158203125, 0.18878173828125, -0.3968696594238281, 0.26206207275390625, 0.2470550537109375, -0.040912628173828125, 0.4394989013671875, -0.44133758544921875, -0.39148712158203125, 0.2764854431152344, 0.89263916015625, -0.42584991455078125, -0.46125030517578125, -0.8638992309570312, -0.3508758544921875, 0.371368408203125, 0.887847900390625, -0.382904052734375, 0.36145782470703125, -0.4890003204345703, 0.052455902099609375, -0.036136627197265625, 0.23079299926757812, 0.2469482421875, 0.1643218994140625, -0.07129669189453125, 0.2790794372558594, 0.3637123107910156, -0.8916168212890625, 0.03298759460449219, -0.2790107727050781, -0.17860984802246094, 0.23892593383789062, 0.05171012878417969, -0.2564239501953125, -0.14655303955078125, 0.27777862548828125, 0.0810394287109375, -1.4034271240234375, -0.28739166259765625, -0.1489429473876953, 0.44918060302734375, 0.1693286895751953, 0.10933303833007812, -0.14766693115234375, -0.40944671630859375, -0.18532562255859375, 0.6261310577392578, -0.20856857299804688, 0.602569580078125, 0.05538177490234375, 0.1505279541015625, 0.1313800811767578, -0.006317138671875, 0.42195892333984375, -0.29936981201171875, -0.4691009521484375, 0.16705322265625, -0.5789260864257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000001.npy"} +{"epoch": 0.0014684287812041115, "step": 2, "batch_size": 64, "mean": -0.06572240591049194, "std": 0.3523969054222107, "min": -0.9291305541992188, "p10": -0.46334152221679686, "median": -0.05502510070800781, "p90": 0.3672500610351563, "max": 1.0444793701171875, "pos_frac": 0.4375, "sample": [-0.2829437255859375, 0.3027191162109375, -0.19867706298828125, -0.3062286376953125, 0.10318756103515625, 0.20131683349609375, -0.34906005859375, 0.2802886962890625, 0.1914520263671875, -0.31072998046875, 0.08922195434570312, 0.10284614562988281, -0.03655242919921875, -0.0604095458984375, -0.06208038330078125, 0.32562255859375, -0.37982177734375, 0.2746162414550781, -0.049640655517578125, 0.3752174377441406, -0.103973388671875, 0.0699462890625, 0.36417388916015625, -0.033428192138671875, 0.37265777587890625, -0.3787078857421875, -0.6610565185546875, 0.4720420837402344, 0.47701263427734375, -0.27928924560546875, -0.44719696044921875, -0.0965118408203125, -0.7628555297851562, 0.046764373779296875, 0.06670379638671875, -0.9291305541992188, -0.7122802734375, -0.16554832458496094, 0.1485595703125, -0.07539939880371094, 0.2588920593261719, 0.039890289306640625, 0.201690673828125, 0.0623016357421875, 1.0444793701171875, -0.37696075439453125, -0.02794647216796875, -0.223297119140625, -0.35730743408203125, -0.1309051513671875, -0.3106689453125, -0.11409187316894531, -0.1669769287109375, 0.131317138671875, -0.2361297607421875, 0.4093780517578125, -0.6485977172851562, 0.36856842041015625, -0.1951904296875, -0.4702606201171875, -0.7624168395996094, 0.008928298950195312, -0.31630706787109375, 0.022550582885742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000002.npy"} +{"epoch": 0.002936857562408223, "step": 3, "batch_size": 64, "mean": -0.0063214898109436035, "std": 0.29263725876808167, "min": -0.7884864807128906, "p10": -0.4050006866455078, "median": 0.01871967315673828, "p90": 0.3696033477783204, "max": 0.6647491455078125, "pos_frac": 0.515625, "sample": [-0.25975799560546875, -0.48389434814453125, -0.42066192626953125, -0.08747100830078125, 0.07093429565429688, 0.16068649291992188, 0.3574028015136719, 0.1372814178466797, 0.05029296875, 0.19922637939453125, -0.2255096435546875, -0.20222854614257812, -0.4464111328125, -0.3383941650390625, 0.025384902954101562, 0.3994903564453125, 0.22357177734375, -0.7884864807128906, 0.2975006103515625, -0.23572158813476562, -0.33176422119140625, -0.053375244140625, 0.3748321533203125, -0.5855560302734375, -0.4741363525390625, 0.04038047790527344, -0.0794219970703125, 0.030185699462890625, 0.2866497039794922, -0.06502532958984375, 0.5228195190429688, 0.2412261962890625, 0.04646492004394531, 0.012054443359375, -0.018838882446289062, 0.48188018798828125, -0.12237548828125, -0.3912086486816406, -0.15631103515625, 0.168914794921875, -0.07980728149414062, -0.10637664794921875, 0.45513153076171875, 0.086883544921875, -0.11824798583984375, -0.052280426025390625, 0.0696258544921875, 0.08156967163085938, -0.3565559387207031, -0.41091156005859375, 0.26354026794433594, -0.09646415710449219, 0.14566612243652344, 0.6647491455078125, 0.04693031311035156, 0.6484718322753906, 0.15070343017578125, -0.025989532470703125, 0.1833038330078125, 0.1400146484375, 0.031558990478515625, -0.2490692138671875, -0.07940673828125, -0.15824508666992188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000003.npy"} +{"epoch": 0.004405286343612335, "step": 4, "batch_size": 64, "mean": -0.0759580135345459, "std": 0.36372819542884827, "min": -0.869476318359375, "p10": -0.5696212768554687, "median": -0.10717391967773438, "p90": 0.45080108642578126, "max": 0.6917762756347656, "pos_frac": 0.390625, "sample": [-0.12559127807617188, -0.15245819091796875, -0.1810169219970703, -0.156707763671875, -0.7438430786132812, 0.06967544555664062, 0.3477630615234375, -0.686676025390625, -0.04104804992675781, -0.4476470947265625, 0.05824089050292969, 0.6917762756347656, 0.471405029296875, -0.18747711181640625, -0.570220947265625, 0.011486053466796875, -0.7214088439941406, 0.14379119873046875, -0.39173126220703125, 0.14043617248535156, -0.5819931030273438, -0.117462158203125, 0.1939849853515625, -0.4255218505859375, -0.01291656494140625, -0.212677001953125, 0.21015167236328125, 0.0788421630859375, -0.2456378936767578, 0.052978515625, -0.083343505859375, -0.085662841796875, -0.32323455810546875, -0.14973831176757812, -0.600067138671875, 0.5031337738037109, -0.09992218017578125, 0.2648468017578125, -0.3113365173339844, -0.0982208251953125, 0.15727996826171875, -0.12126922607421875, -0.447357177734375, 0.4513092041015625, 0.21038055419921875, 0.3154296875, -0.4470672607421875, 0.4837646484375, 0.2553272247314453, -0.47265625, 0.3073463439941406, 0.5628700256347656, 0.24791526794433594, -0.5682220458984375, -0.36545372009277344, 0.449615478515625, 0.6627197265625, -0.3406829833984375, -0.0524444580078125, -0.1144256591796875, -0.1698150634765625, -0.35148048400878906, -0.869476318359375, -0.12987136840820312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000004.npy"} +{"epoch": 0.005873715124816446, "step": 5, "batch_size": 64, "mean": -0.02732786536216736, "std": 0.39857810735702515, "min": -1.950653076171875, "p10": -0.4790130615234375, "median": 0.03408527374267578, "p90": 0.4086189270019531, "max": 0.7193527221679688, "pos_frac": 0.546875, "sample": [-0.4909515380859375, 0.41156005859375, 0.7193527221679688, 0.3675537109375, 0.1884765625, -1.950653076171875, 0.2838287353515625, 0.0603485107421875, 0.03170585632324219, -0.23768234252929688, -0.122528076171875, 0.435455322265625, 0.007946014404296875, -0.14524459838867188, -0.4288597106933594, 0.10703277587890625, 0.1904296875, 0.1372241973876953, 0.0235595703125, -0.464080810546875, 0.11975288391113281, 0.40175628662109375, -0.3351097106933594, -0.154632568359375, 0.11123847961425781, -0.15714263916015625, -0.03324127197265625, -0.22548675537109375, -0.48541259765625, -0.03655242919921875, 0.1568756103515625, 0.2053508758544922, 0.427642822265625, 0.25836181640625, 0.036464691162109375, -0.0419158935546875, 0.4827728271484375, -0.30080413818359375, 0.41355133056640625, 0.1424560546875, 0.141754150390625, 0.0824432373046875, 0.11666107177734375, -0.8726043701171875, 0.294036865234375, 0.5059661865234375, 0.07204437255859375, -0.3251800537109375, -0.1342620849609375, -0.39054107666015625, 0.222564697265625, -0.20138168334960938, 0.20781707763671875, -0.5222053527832031, -0.00186920166015625, -0.173583984375, -0.202056884765625, -0.49900054931640625, -0.26360321044921875, 0.3003120422363281, -0.5909881591796875, 0.3358039855957031, -0.3206939697265625, 0.35918426513671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000005.npy"} +{"epoch": 0.007342143906020558, "step": 6, "batch_size": 64, "mean": 0.027667373418807983, "std": 0.36957481503486633, "min": -0.9196224212646484, "p10": -0.496502685546875, "median": 0.07563304901123047, "p90": 0.4421979904174806, "max": 1.063995361328125, "pos_frac": 0.609375, "sample": [-0.03833770751953125, 0.12307167053222656, 0.000675201416015625, 0.0297393798828125, 0.14764785766601562, 0.5104522705078125, -0.9196224212646484, -0.5377044677734375, 0.5940093994140625, -0.2735443115234375, 0.08971786499023438, 0.06788063049316406, -0.46527099609375, 0.4574146270751953, -0.2684364318847656, -0.040313720703125, -0.0630035400390625, -0.2759532928466797, 0.2109527587890625, 0.02155303955078125, 0.004657745361328125, -0.24341201782226562, -0.416473388671875, -0.0488739013671875, 0.11568450927734375, 0.05733489990234375, 0.39391326904296875, 0.19002532958984375, 0.18288421630859375, -0.16960525512695312, -0.5098876953125, 0.6573657989501953, 0.20983123779296875, -0.07938003540039062, 0.08576202392578125, 0.601531982421875, -0.75054931640625, -0.5572166442871094, 0.2816429138183594, 0.02831268310546875, 0.36224365234375, 0.2419281005859375, 0.1853790283203125, 0.2714347839355469, 0.8223762512207031, 0.15557289123535156, 0.3389396667480469, 0.11944580078125, -0.4395904541015625, -0.2173004150390625, 0.11421966552734375, 0.26842498779296875, 0.08338546752929688, -0.05895042419433594, 0.4066925048828125, -0.1961841583251953, 0.173187255859375, -0.7134857177734375, 0.132659912109375, 0.095703125, -0.182373046875, -0.10394287109375, 1.063995361328125, -0.557525634765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000006.npy"} +{"epoch": 0.00881057268722467, "step": 7, "batch_size": 64, "mean": 0.030851304531097412, "std": 0.38409531116485596, "min": -0.7110519409179688, "p10": -0.4211700439453125, "median": -0.004039764404296875, "p90": 0.5575794219970706, "max": 0.95855712890625, "pos_frac": 0.5, "sample": [-0.06478118896484375, -0.667083740234375, -0.04272651672363281, -0.17118453979492188, -0.32442474365234375, 0.3579254150390625, -0.5526237487792969, 0.461090087890625, -0.6569671630859375, 0.18210601806640625, -0.1610107421875, 0.30944061279296875, -0.01221466064453125, 0.58258056640625, -0.569976806640625, 0.6551513671875, 0.20571136474609375, -0.09366416931152344, 0.17059898376464844, 0.02761077880859375, 0.042667388916015625, 0.4704132080078125, 0.8304481506347656, -0.19561004638671875, -0.1800537109375, 0.05692291259765625, -0.12700462341308594, 0.06492424011230469, -0.3686943054199219, 0.34452056884765625, 0.045169830322265625, 0.47263336181640625, -0.00868988037109375, 0.1340789794921875, 0.34930419921875, 0.371826171875, 0.0006103515625, -0.10022163391113281, -0.38641357421875, -0.7110519409179688, -0.234619140625, -0.2928466796875, 0.09600830078125, -0.04242706298828125, 0.13776779174804688, 0.0321502685546875, 0.219329833984375, -0.3668670654296875, 0.504913330078125, -0.15348052978515625, 0.95855712890625, -0.311737060546875, -0.436065673828125, 0.9234542846679688, -0.10088348388671875, 0.29662322998046875, -0.1958160400390625, -0.0848236083984375, 0.65985107421875, 0.5801506042480469, 0.17012786865234375, -0.306121826171875, -0.32904052734375, -0.491058349609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000007.npy"} +{"epoch": 0.010279001468428781, "step": 8, "batch_size": 64, "mean": -0.010592788457870483, "std": 0.39657652378082275, "min": -1.546478271484375, "p10": -0.45051116943359376, "median": 0.023336410522460938, "p90": 0.48334197998046874, "max": 0.76129150390625, "pos_frac": 0.53125, "sample": [0.3569488525390625, 0.48175048828125, 0.27198028564453125, -0.10066604614257812, -0.43068695068359375, 0.021310806274414062, 0.5755271911621094, -0.009613037109375, 0.356292724609375, 0.23093414306640625, -0.5371780395507812, 0.05802154541015625, 0.17451095581054688, 0.2557868957519531, 0.1176910400390625, -0.134185791015625, 0.10250473022460938, 0.1966400146484375, 0.23147201538085938, 0.20780181884765625, -0.19420623779296875, -1.546478271484375, -0.1273345947265625, 0.76129150390625, -0.10384368896484375, -0.8636016845703125, 0.0075836181640625, 0.4981575012207031, 0.524017333984375, -0.374114990234375, -0.05486297607421875, -0.8019256591796875, -0.20975112915039062, 0.3148651123046875, -0.042568206787109375, -0.087799072265625, -0.151702880859375, 0.030748367309570312, 0.05193138122558594, 0.5313720703125, 0.18773651123046875, -0.25888824462890625, 0.07537841796875, -0.320343017578125, 0.10816001892089844, -0.929901123046875, -0.3537139892578125, -0.1009674072265625, 0.3343315124511719, -0.0886688232421875, -0.4572792053222656, 0.4840240478515625, -0.4723052978515625, -0.4347190856933594, 0.408294677734375, 0.027057647705078125, -0.10887908935546875, -0.22794342041015625, -0.0347442626953125, 0.20868682861328125, 0.025362014770507812, 0.160186767578125, -0.055072784423828125, 0.557647705078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000008.npy"} +{"epoch": 0.011747430249632892, "step": 9, "batch_size": 64, "mean": 0.028307169675827026, "std": 0.4377601444721222, "min": -1.091064453125, "p10": -0.6275459289550781, "median": 0.03544807434082031, "p90": 0.5717700958251957, "max": 1.0733718872070312, "pos_frac": 0.546875, "sample": [0.09737396240234375, -0.18262481689453125, 0.4240226745605469, 0.4305877685546875, -0.02410888671875, -0.09162139892578125, 0.18534088134765625, 0.19037628173828125, 0.7161865234375, 0.4748039245605469, -0.4491462707519531, -0.300079345703125, -0.6085281372070312, -0.79913330078125, -0.662139892578125, -1.091064453125, 0.25028228759765625, -0.24482345581054688, -0.6356964111328125, 0.05207061767578125, -0.4473762512207031, 0.11467552185058594, -0.2551422119140625, -0.08099365234375, 0.8000259399414062, 0.38748931884765625, 0.43141937255859375, -0.1795654296875, -0.09756851196289062, 1.0733718872070312, -0.1474456787109375, -0.16823196411132812, -0.09447479248046875, 0.22222900390625, -0.1457061767578125, 0.0401611328125, -0.11308479309082031, 0.011226654052734375, 0.6133270263671875, 0.7460556030273438, 0.06109619140625, -0.0189056396484375, -0.813385009765625, 0.23270416259765625, 0.07711601257324219, 0.25595855712890625, 0.1221466064453125, -0.0600433349609375, -0.06421661376953125, -0.13931655883789062, 0.1914825439453125, 0.032375335693359375, 0.37282562255859375, 0.42954444885253906, 0.15203475952148438, 0.3812408447265625, 0.9319305419921875, 0.6358184814453125, 0.3592205047607422, 0.011444091796875, -0.6510086059570312, 0.03852081298828125, -0.9200286865234375, -0.24936676025390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000009.npy"} +{"epoch": 0.013215859030837005, "step": 10, "batch_size": 64, "mean": 0.04434826970100403, "std": 0.39100170135498047, "min": -1.0137176513671875, "p10": -0.413632583618164, "median": 0.013975143432617188, "p90": 0.5165412902832032, "max": 1.19439697265625, "pos_frac": 0.515625, "sample": [0.2727813720703125, -0.2717437744140625, -0.16520309448242188, -0.049175262451171875, -0.30748748779296875, 0.2420806884765625, 0.020654678344726562, 0.09513092041015625, 0.524169921875, 0.01922607421875, 0.03290557861328125, 0.49874114990234375, -0.4726905822753906, 0.867889404296875, -1.0137176513671875, 0.5782928466796875, -0.3519248962402344, 0.008724212646484375, -0.1551036834716797, 0.32830810546875, 0.28823089599609375, -0.093170166015625, 0.304718017578125, 0.2261066436767578, -0.155853271484375, -0.23550796508789062, -0.44672393798828125, -0.0212860107421875, 0.1473369598388672, 0.2773551940917969, -0.2162017822265625, -0.4400787353515625, -0.013957977294921875, 0.04819488525390625, -0.1666107177734375, -0.2921943664550781, -0.738861083984375, 0.5316352844238281, 0.2778453826904297, -0.06663703918457031, -0.10917854309082031, -0.3029022216796875, -0.06506919860839844, 0.39794921875, -0.256103515625, 1.0105819702148438, 0.15808868408203125, 0.36019134521484375, -0.0944671630859375, 0.22785186767578125, -0.46984100341796875, -0.2203216552734375, 0.16357421875, 0.3595008850097656, -0.10784912109375, -0.6580581665039062, 0.39745330810546875, 0.16133880615234375, 0.2517547607421875, 1.19439697265625, 0.6267929077148438, -0.1801776885986328, 0.15876007080078125, -0.08217620849609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000010.npy"} +{"epoch": 0.014684287812041116, "step": 11, "batch_size": 64, "mean": 0.016193389892578125, "std": 0.40519535541534424, "min": -1.169677734375, "p10": -0.38324813842773436, "median": 0.000370025634765625, "p90": 0.5429534912109376, "max": 1.0765762329101562, "pos_frac": 0.5, "sample": [0.19642257690429688, 0.2459259033203125, 0.16986083984375, 0.19841384887695312, 0.04689788818359375, -0.0189666748046875, 0.01970672607421875, 0.10541725158691406, -0.3009452819824219, -0.3902130126953125, -0.028299331665039062, -0.22309112548828125, 0.312408447265625, 0.13663482666015625, -0.1368408203125, -0.078216552734375, 0.24445343017578125, 0.12168502807617188, 0.707611083984375, 0.7709579467773438, 0.553436279296875, 0.421051025390625, -0.4535808563232422, 0.1489734649658203, -0.24309539794921875, 0.20868682861328125, -0.43511199951171875, -0.29213714599609375, 0.053314208984375, 0.31656646728515625, -0.2634429931640625, -0.25970458984375, 0.7466259002685547, 0.7404632568359375, -0.303131103515625, 0.5159149169921875, 0.08365631103515625, 1.0765762329101562, -0.8674774169921875, -0.11023330688476562, -0.18966293334960938, -0.3022346496582031, 0.24412918090820312, -0.7927398681640625, 0.09487152099609375, -0.13856124877929688, 0.245849609375, -0.2082233428955078, -1.169677734375, -0.0240631103515625, -0.047504425048828125, -0.2043304443359375, 0.51849365234375, -0.5001373291015625, 0.3654975891113281, -0.17545318603515625, -0.3635406494140625, -0.09309768676757812, -0.25205230712890625, 0.68017578125, 0.2457122802734375, 0.05950736999511719, -0.32675743103027344, -0.36699676513671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000011.npy"} +{"epoch": 0.016152716593245228, "step": 12, "batch_size": 64, "mean": -0.011764273047447205, "std": 0.39213281869888306, "min": -1.247467041015625, "p10": -0.4428184509277343, "median": -0.0026121139526367188, "p90": 0.3972772598266603, "max": 1.1016387939453125, "pos_frac": 0.5, "sample": [-0.054931640625, 0.101226806640625, 0.5074996948242188, -1.247467041015625, -0.0815582275390625, -0.09731292724609375, 0.153961181640625, 0.23241424560546875, 0.24811553955078125, 0.48799896240234375, 0.5938186645507812, -0.8863677978515625, -0.46079254150390625, 0.2689361572265625, -0.0977325439453125, -0.07403564453125, -0.08943939208984375, 0.016754150390625, 0.21479034423828125, -0.23778915405273438, -0.2808990478515625, -0.28430938720703125, -0.021392822265625, -0.8749771118164062, -0.6138229370117188, 0.18097686767578125, -0.40087890625, 0.8131561279296875, -0.286895751953125, -0.2027740478515625, -0.74774169921875, 1.1016387939453125, -0.307830810546875, 0.21657180786132812, -0.0384368896484375, 0.14559364318847656, -0.13222885131835938, -0.8183364868164062, -0.37078857421875, 0.07109451293945312, 0.08592605590820312, 0.20221710205078125, 0.09021759033203125, 0.4091224670410156, -0.16135406494140625, 0.316131591796875, -0.164642333984375, -0.0489654541015625, 0.19068241119384766, 0.16908645629882812, 0.0033588409423828125, 0.20508384704589844, 0.048583984375, 0.36963844299316406, 0.2143096923828125, -0.0367431640625, -0.1523284912109375, -0.07487869262695312, -0.020517349243164062, 0.04470634460449219, 0.4426994323730469, 0.14061355590820312, 0.3369140625, -0.00858306884765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000012.npy"} +{"epoch": 0.01762114537444934, "step": 13, "batch_size": 64, "mean": -0.047012150287628174, "std": 0.364255428314209, "min": -1.33050537109375, "p10": -0.4458404541015625, "median": -0.05298614501953125, "p90": 0.30151405334472664, "max": 1.255096435546875, "pos_frac": 0.4375, "sample": [0.1667938232421875, -0.30043601989746094, 0.14919281005859375, -0.0451812744140625, 0.1566925048828125, 0.1255340576171875, -0.02519989013671875, 0.028364181518554688, 0.32077789306640625, -0.4885749816894531, 0.2403106689453125, 0.5418930053710938, -0.30573272705078125, -0.34033966064453125, -0.3102455139160156, 1.255096435546875, -0.2371063232421875, -0.54498291015625, -0.09037017822265625, 0.331512451171875, -0.14129638671875, 0.0882720947265625, -0.22870635986328125, -0.1725788116455078, 0.2115802764892578, -0.5938262939453125, -0.07824325561523438, -0.3224163055419922, -0.11048126220703125, 0.259674072265625, -0.3221893310546875, 0.539459228515625, 0.2555999755859375, 0.2791557312011719, -1.33050537109375, -0.02823638916015625, -0.071014404296875, -0.36716270446777344, 0.2231769561767578, 0.439300537109375, -0.09713363647460938, 0.1936054229736328, -0.060791015625, -0.11131668090820312, 0.1425628662109375, 0.11996650695800781, -0.10684776306152344, 0.19369125366210938, -0.23321533203125, -0.16770172119140625, -0.25930023193359375, 0.24034881591796875, -0.10912322998046875, 0.12041854858398438, -0.7473373413085938, 0.0178375244140625, -0.01570892333984375, 0.14537811279296875, 0.31109619140625, -0.686614990234375, -0.459564208984375, -0.413818359375, 0.1171417236328125, -0.2999114990234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000013.npy"} +{"epoch": 0.01908957415565345, "step": 14, "batch_size": 64, "mean": -0.009398102760314941, "std": 0.41669103503227234, "min": -1.182403564453125, "p10": -0.45866889953613277, "median": -0.011407852172851562, "p90": 0.44987030029296876, "max": 1.34564208984375, "pos_frac": 0.484375, "sample": [-0.39453887939453125, 0.22813987731933594, 0.13343429565429688, 0.5767173767089844, 0.6347808837890625, 0.29390716552734375, -0.402374267578125, -0.026702880859375, 0.44788360595703125, 0.01555633544921875, -0.46744537353515625, -0.2677497863769531, 0.9580230712890625, 0.17157745361328125, -0.007724761962890625, -0.1069793701171875, 0.004535675048828125, 0.06220245361328125, 0.07183456420898438, -0.13935089111328125, -0.035369873046875, -0.180511474609375, 0.2013092041015625, 0.365570068359375, 0.907073974609375, -0.32419586181640625, -1.182403564453125, 0.0277862548828125, 0.0834808349609375, 0.11945343017578125, -0.198577880859375, -0.15015792846679688, 0.35778045654296875, 0.4784812927246094, -0.056026458740234375, 0.3504180908203125, 0.045848846435546875, -0.25067138671875, -0.5846843719482422, 0.2467041015625, -0.36676025390625, -0.0150909423828125, -0.8471832275390625, 0.45072174072265625, -0.28383636474609375, -0.11985015869140625, 0.2253704071044922, -0.20718765258789062, -0.02410888671875, -0.4381904602050781, -0.799224853515625, 0.0096893310546875, -0.4999542236328125, -0.6620826721191406, 0.153778076171875, 1.34564208984375, 0.16765594482421875, -0.1238861083984375, -0.4366455078125, 0.04666900634765625, -0.10549354553222656, 0.05745697021484375, -0.10584259033203125, -0.03015899658203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000014.npy"} +{"epoch": 0.020558002936857563, "step": 15, "batch_size": 64, "mean": 0.03897008299827576, "std": 0.4234123229980469, "min": -1.4273605346679688, "p10": -0.41009674072265623, "median": 0.06708431243896484, "p90": 0.5243247985839845, "max": 0.800506591796875, "pos_frac": 0.546875, "sample": [0.07308006286621094, 0.800506591796875, 0.62176513671875, -0.014194488525390625, 0.5373001098632812, 0.1269359588623047, -0.05018043518066406, 0.04088592529296875, -0.14908599853515625, 0.168487548828125, -0.368865966796875, -0.07359123229980469, 0.3956298828125, -0.6740341186523438, 0.36643218994140625, -0.3999176025390625, 0.23547744750976562, -0.0148162841796875, 0.7113800048828125, 0.21675872802734375, 0.4296150207519531, -1.4273605346679688, -0.115234375, -0.124786376953125, -0.6150608062744141, -0.0408935546875, 0.308685302734375, 0.5419692993164062, -0.3723907470703125, 0.27825927734375, -0.414459228515625, -0.9023284912109375, -0.30138397216796875, -0.09383201599121094, 0.31866455078125, 0.755828857421875, -0.6724014282226562, 0.44913482666015625, 0.47637939453125, -0.29077720642089844, -0.059520721435546875, 0.2473773956298828, 0.08864212036132812, 0.11743736267089844, 0.11130523681640625, -0.11505126953125, 0.05231666564941406, -0.1323223114013672, 0.1671600341796875, 0.11122512817382812, 0.06108856201171875, 0.4639434814453125, -0.267120361328125, -0.11083221435546875, 0.4344635009765625, 0.7389678955078125, 0.494049072265625, -0.39763641357421875, 0.3258171081542969, -0.2268524169921875, 0.11156082153320312, -0.7858428955078125, 0.3981895446777344, -0.07186126708984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000015.npy"} +{"epoch": 0.022026431718061675, "step": 16, "batch_size": 64, "mean": 0.09726375341415405, "std": 0.37172242999076843, "min": -0.879669189453125, "p10": -0.37824935913085933, "median": 0.10445404052734375, "p90": 0.5905632019042969, "max": 0.858795166015625, "pos_frac": 0.625, "sample": [0.19019317626953125, 0.4799957275390625, -0.31462860107421875, 0.224609375, -0.7449722290039062, -0.062267303466796875, 0.0680084228515625, -0.007617950439453125, 0.08191680908203125, 0.858795166015625, 0.1602783203125, 0.753021240234375, 0.173370361328125, 0.58026123046875, 0.047153472900390625, -0.40435791015625, 0.0972137451171875, -0.6290740966796875, 0.05621337890625, 0.34716796875, 0.6839218139648438, 0.2839241027832031, 0.3307685852050781, -0.1306610107421875, 0.2739830017089844, -0.31497955322265625, -0.0305328369140625, -0.054042816162109375, 0.10043907165527344, 0.10846900939941406, 0.24984359741210938, 0.13521194458007812, -0.176849365234375, -0.13284683227539062, -0.060821533203125, 0.2630615234375, 0.5949783325195312, -0.879669189453125, -0.4062347412109375, -0.035037994384765625, 0.23688507080078125, 0.08458709716796875, -0.031452178955078125, 0.4200439453125, 0.31804656982421875, 0.6143951416015625, -0.08119583129882812, -0.08306884765625, -0.46338653564453125, -0.285736083984375, 0.1978740692138672, 0.34958648681640625, 0.254150390625, 0.3503608703613281, -0.0137481689453125, -0.31732940673828125, 0.12434959411621094, 0.568328857421875, 0.6738739013671875, 0.6480712890625, 0.0886688232421875, 0.43294525146484375, 0.24739837646484375, -0.866973876953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000016.npy"} +{"epoch": 0.023494860499265784, "step": 17, "batch_size": 64, "mean": 0.015069544315338135, "std": 0.41791844367980957, "min": -1.1902618408203125, "p10": -0.4908855438232422, "median": 0.07535552978515625, "p90": 0.3731548309326172, "max": 1.2110748291015625, "pos_frac": 0.578125, "sample": [0.2750205993652344, 0.04689979553222656, 0.19811630249023438, 0.08203125, 0.1265411376953125, -0.23158836364746094, 0.35723876953125, -0.6027374267578125, 0.0686798095703125, -0.199554443359375, 0.5059814453125, 0.3990936279296875, -0.0925750732421875, -0.210205078125, -0.4695930480957031, 0.273712158203125, -0.4990348815917969, 0.22057533264160156, 0.29668426513671875, -0.44705963134765625, 0.103912353515625, -0.016469955444335938, 0.23983383178710938, 0.36907196044921875, -0.4172821044921875, 0.05684661865234375, 0.3749046325683594, -0.4821510314941406, 0.1087646484375, 0.1982421875, -0.49462890625, 0.3656768798828125, 0.3521881103515625, 0.03878021240234375, 0.20204925537109375, -0.0150604248046875, -0.695953369140625, 0.143341064453125, -0.1585216522216797, 0.1490478515625, 0.971710205078125, 0.10746383666992188, -0.3535785675048828, 0.19746780395507812, -1.1902618408203125, -0.47918701171875, 0.0535125732421875, -0.4983253479003906, -0.01889801025390625, -0.10223388671875, -0.07890701293945312, -0.16227340698242188, -0.1486358642578125, -0.23046112060546875, 0.114501953125, -1.146453857421875, 0.7750244140625, 0.6732254028320312, 1.2110748291015625, -0.0968017578125, 0.3616828918457031, 0.2346954345703125, 0.10565185546875, 0.14363861083984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000017.npy"} +{"epoch": 0.024963289280469897, "step": 18, "batch_size": 64, "mean": 0.06848806142807007, "std": 0.3612028658390045, "min": -1.17144775390625, "p10": -0.29725894927978513, "median": 0.016361236572265625, "p90": 0.47162742614746106, "max": 0.9110870361328125, "pos_frac": 0.53125, "sample": [0.3938484191894531, -1.17144775390625, 0.4869384765625, 0.2597198486328125, -0.23044204711914062, -0.10054779052734375, -0.2858428955078125, 0.0824432373046875, 0.020782470703125, 0.34593772888183594, -0.3329315185546875, -0.10016822814941406, -0.2928009033203125, 0.4359016418457031, -0.2514190673828125, 0.9110870361328125, 0.4050312042236328, -0.15895843505859375, 0.24576568603515625, -0.10347747802734375, -0.13997459411621094, -0.13085174560546875, 0.271575927734375, 0.0020465850830078125, 0.3640937805175781, -0.04308128356933594, 0.4889984130859375, -0.06427001953125, -0.29916954040527344, 0.10344123840332031, 0.16184234619140625, 0.3882274627685547, -0.012256622314453125, -0.20107269287109375, 0.404144287109375, -0.00164794921875, 0.41259765625, 0.2035655975341797, -0.32576751708984375, -0.2250213623046875, 0.4194526672363281, 0.01194000244140625, -0.0544586181640625, 0.63812255859375, 0.4927825927734375, 0.24649810791015625, 0.2667999267578125, -0.4084739685058594, 0.2121734619140625, 0.6689109802246094, 0.3640785217285156, -0.12675094604492188, -0.19586944580078125, -0.3070030212402344, 0.33636474609375, 0.250762939453125, 0.503326416015625, -0.16545677185058594, -0.9583740234375, 0.3740520477294922, -0.15598678588867188, -0.2587699890136719, 0.38155364990234375, -0.06927871704101562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000018.npy"} +{"epoch": 0.02643171806167401, "step": 19, "batch_size": 64, "mean": 0.08037763833999634, "std": 0.358684241771698, "min": -0.6400909423828125, "p10": -0.3041576385498047, "median": 0.09328556060791016, "p90": 0.4556358337402344, "max": 1.34185791015625, "pos_frac": 0.640625, "sample": [0.3461875915527344, 0.11254310607910156, -0.3047218322753906, -0.062103271484375, 0.45740509033203125, -0.07598876953125, 0.22044754028320312, -0.09346389770507812, 0.15402793884277344, 1.34185791015625, 0.451507568359375, -0.18352508544921875, 0.5311012268066406, -0.2907257080078125, 0.3843536376953125, -0.6400909423828125, -0.5195465087890625, 0.09301376342773438, -0.29290008544921875, -0.1717987060546875, 0.53558349609375, 0.13629150390625, 0.14522933959960938, -0.4321136474609375, 0.06937599182128906, 0.3126850128173828, 0.42608642578125, 0.10906982421875, -0.24672317504882812, -0.275146484375, -0.19612884521484375, 0.3927745819091797, 0.09355735778808594, 0.04447746276855469, 0.21964263916015625, -0.2599773406982422, -0.3028411865234375, -0.16298294067382812, 0.07907867431640625, -0.31793212890625, 0.1068267822265625, 0.1011199951171875, 0.03899383544921875, -0.6284637451171875, 0.23926162719726562, 0.8798294067382812, 0.15900421142578125, 0.7327804565429688, 0.0795440673828125, 0.40476036071777344, 0.04248809814453125, 0.16030120849609375, 0.12324333190917969, 0.0086517333984375, 0.44449615478515625, 0.11481094360351562, 0.01911163330078125, -0.019054412841796875, -0.294830322265625, -0.511505126953125, 0.691619873046875, -0.1744232177734375, 0.2601776123046875, 0.33783721923828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000019.npy"} +{"epoch": 0.027900146842878122, "step": 20, "batch_size": 64, "mean": 0.046672046184539795, "std": 0.37399789690971375, "min": -1.1619949340820312, "p10": -0.3291997909545898, "median": 0.08116340637207031, "p90": 0.4895580291748047, "max": 0.8569259643554688, "pos_frac": 0.59375, "sample": [0.08279609680175781, 0.1089019775390625, -0.23363113403320312, 0.52764892578125, 0.4584197998046875, 0.4888343811035156, -0.15506744384765625, -0.3878631591796875, -0.086090087890625, 0.223968505859375, -1.1619949340820312, 0.1474456787109375, 0.0471038818359375, -0.11668014526367188, 0.15121841430664062, -0.0198822021484375, -0.15126419067382812, 0.6041450500488281, -0.059230804443359375, 0.1627044677734375, 0.25689697265625, -0.9395904541015625, 0.029552459716796875, -0.28544044494628906, 0.1142730712890625, 0.052730560302734375, -0.17404937744140625, 0.11529350280761719, 0.4167060852050781, -0.1386737823486328, -0.25121307373046875, -0.5757827758789062, -0.19298553466796875, 0.08136367797851562, 0.3071022033691406, -0.0180206298828125, -0.10272216796875, -1.0747909545898438, -0.08385086059570312, -0.34795379638671875, 0.544830322265625, 0.179534912109375, 0.25811767578125, 0.09050369262695312, 0.080963134765625, 0.1985931396484375, 0.485443115234375, 0.472503662109375, 0.8569259643554688, -0.0095062255859375, -0.35407257080078125, -0.26676177978515625, 0.19797515869140625, 0.1386871337890625, 0.6629867553710938, -0.06963348388671875, 0.516143798828125, 0.33006858825683594, 0.10330009460449219, 0.0238800048828125, 0.3405799865722656, -0.171661376953125, 0.4898681640625, 0.067413330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000020.npy"} +{"epoch": 0.02936857562408223, "step": 21, "batch_size": 64, "mean": 0.1474984586238861, "std": 0.3634709119796753, "min": -0.723419189453125, "p10": -0.2752777099609375, "median": 0.1314220428466797, "p90": 0.6264558792114261, "max": 1.2214508056640625, "pos_frac": 0.671875, "sample": [0.32635498046875, 0.021070480346679688, 0.8905563354492188, 0.5196685791015625, 0.2195281982421875, 0.1618671417236328, -0.08082389831542969, -0.2817821502685547, -0.2698516845703125, 0.002655029296875, 0.857086181640625, -0.16355133056640625, -0.11597442626953125, 0.15728378295898438, 0.31781005859375, 0.7434921264648438, 0.18152999877929688, 0.1420879364013672, -0.1696949005126953, 0.1897125244140625, 0.0167236328125, 0.677276611328125, -0.10704803466796875, -0.20196914672851562, 0.13262939453125, 1.116973876953125, 0.106109619140625, -0.3267974853515625, -0.2776031494140625, -0.32053375244140625, -0.723419189453125, 0.37908935546875, 0.0729217529296875, 0.30052757263183594, -0.368011474609375, -0.147247314453125, -0.038425445556640625, 0.5401401519775391, 0.341217041015625, -0.19858551025390625, 0.40039825439453125, 0.24130630493164062, 0.34505462646484375, 0.030239105224609375, 0.019683837890625, -0.31935882568359375, 0.6634483337402344, -0.12625885009765625, 0.3321094512939453, 0.1842193603515625, 0.20565223693847656, 0.2879524230957031, 0.25458526611328125, 0.3422393798828125, 0.13021469116210938, -0.17774581909179688, 0.5116729736328125, 0.0988616943359375, 1.2214508056640625, 0.39272117614746094, -0.16469573974609375, 0.1011505126953125, 0.10162353515625, -0.2596168518066406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000021.npy"} +{"epoch": 0.030837004405286344, "step": 22, "batch_size": 64, "mean": 0.1367432177066803, "std": 0.3962913155555725, "min": -0.7413253784179688, "p10": -0.3664276123046875, "median": 0.12553119659423828, "p90": 0.628402328491211, "max": 1.0591278076171875, "pos_frac": 0.65625, "sample": [0.13176727294921875, 0.381195068359375, -0.2176666259765625, 0.04533958435058594, 0.33740234375, -0.1754302978515625, 0.1895294189453125, 0.3329181671142578, 0.11929512023925781, 0.49005126953125, 0.47566986083984375, 0.2647705078125, -0.2307281494140625, 0.8960037231445312, -0.2532825469970703, -0.110198974609375, 0.11385345458984375, 0.11917304992675781, 0.0473785400390625, -0.368194580078125, 0.09317398071289062, 0.5118560791015625, -0.33477020263671875, -0.6802978515625, -0.3896636962890625, 0.16933631896972656, 0.8435134887695312, 0.3906707763671875, -0.20754241943359375, 0.812957763671875, 0.13957595825195312, 0.3671875, 0.16526031494140625, -0.11339187622070312, 0.3016624450683594, 0.04998779296875, 0.38199615478515625, -0.7413253784179688, -0.25334930419921875, 0.47734832763671875, 0.6184234619140625, 0.3037147521972656, 0.21942138671875, -0.3890724182128906, -0.04895591735839844, 0.48772430419921875, -0.3623046875, -0.562530517578125, 1.0591278076171875, 0.21588897705078125, 0.03557586669921875, 0.1549072265625, -0.23744964599609375, -0.1738567352294922, -0.431121826171875, 0.17083740234375, 0.087921142578125, 0.07336807250976562, 0.6026763916015625, 0.866729736328125, -0.040740966796875, 0.6326789855957031, 0.916015625, -0.020444869995117188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000022.npy"} +{"epoch": 0.032305433186490456, "step": 23, "batch_size": 64, "mean": 0.19482776522636414, "std": 0.44403576850891113, "min": -0.6177215576171875, "p10": -0.28785400390625, "median": 0.17000198364257812, "p90": 0.6769685745239262, "max": 1.7056884765625, "pos_frac": 0.625, "sample": [-0.07014656066894531, 0.05693817138671875, 0.547393798828125, 0.0810699462890625, 0.3550224304199219, 0.3755378723144531, 0.33023834228515625, -0.25028228759765625, -0.13404273986816406, -0.2781982421875, 1.7056884765625, 0.9258956909179688, 0.344085693359375, 0.16305160522460938, 0.4906730651855469, -0.076934814453125, -0.0062808990478515625, 0.07576370239257812, 0.3538360595703125, 0.22523880004882812, -0.0371551513671875, -0.2919921875, 1.3695220947265625, 0.17695236206054688, -0.05773735046386719, -0.3694610595703125, -0.4470367431640625, -0.6177215576171875, 0.22798919677734375, -0.18310546875, -0.433685302734375, 0.4469146728515625, -0.307769775390625, 0.49596595764160156, 0.14493751525878906, -0.11614990234375, -0.1357879638671875, 0.2147369384765625, 0.024732589721679688, 0.5162124633789062, 0.3448677062988281, 0.3967781066894531, 0.2149200439453125, 0.20173263549804688, 0.05536651611328125, 1.4366607666015625, 0.2082977294921875, -0.003753662109375, 0.37813568115234375, -0.224365234375, -0.14125442504882812, 0.3909912109375, 0.4475517272949219, -0.3441028594970703, -0.04894447326660156, 0.36934661865234375, 0.762603759765625, 1.0135345458984375, -0.25838661193847656, 0.27392578125, 0.11069488525390625, -0.24169921875, 0.5671195983886719, 0.7240467071533203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000023.npy"} +{"epoch": 0.033773861967694566, "step": 24, "batch_size": 64, "mean": 0.24681302905082703, "std": 0.3852858543395996, "min": -1.05438232421875, "p10": -0.15871315002441405, "median": 0.2105693817138672, "p90": 0.676677703857422, "max": 1.017578125, "pos_frac": 0.8125, "sample": [0.819091796875, 0.22774887084960938, -0.025026321411132812, 0.09455108642578125, 0.38970947265625, 0.3717937469482422, 0.5186386108398438, -0.3398399353027344, 0.837371826171875, -0.1030120849609375, 0.5660266876220703, -0.15118026733398438, 0.15813446044921875, 0.6553421020507812, 0.5914955139160156, 0.07912826538085938, 0.3358192443847656, 0.6490898132324219, 0.6303672790527344, 0.193389892578125, 0.5692558288574219, -0.6009292602539062, -0.02081298828125, 0.10593414306640625, 0.8098602294921875, 0.4531745910644531, 0.16662979125976562, -0.5632171630859375, 0.685821533203125, 0.24640655517578125, 1.017578125, 0.4347076416015625, 0.026439666748046875, -0.26555633544921875, 0.411346435546875, -1.05438232421875, 0.0072784423828125, 0.1649017333984375, -0.486785888671875, 0.8258590698242188, 0.520538330078125, 0.12945556640625, -0.10779953002929688, -0.1619415283203125, 0.5302352905273438, 0.7685699462890625, 0.4233856201171875, 0.034759521484375, 0.60699462890625, 0.06989097595214844, 0.46099853515625, 0.05549430847167969, 0.16562652587890625, 0.1550579071044922, 0.0512237548828125, 0.545684814453125, 0.4118328094482422, 0.05966949462890625, 0.4324302673339844, 0.10976409912109375, 0.1558990478515625, 0.0264739990234375, 0.283721923828125, 0.6359176635742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000024.npy"} +{"epoch": 0.03524229074889868, "step": 25, "batch_size": 64, "mean": 0.18760085105895996, "std": 0.43097376823425293, "min": -0.7965469360351562, "p10": -0.26451873779296875, "median": 0.16068649291992188, "p90": 0.7383563995361329, "max": 1.2985687255859375, "pos_frac": 0.671875, "sample": [0.12804794311523438, 0.23276138305664062, 0.812713623046875, 0.28350067138671875, -0.15913772583007812, -0.01641082763671875, -0.7965469360351562, 0.7387237548828125, -0.234710693359375, 0.09479713439941406, -0.062652587890625, 1.13702392578125, 0.019969940185546875, 0.7374992370605469, 1.2985687255859375, -0.20354270935058594, -0.48266029357910156, 0.44549560546875, -0.15159988403320312, 0.16452789306640625, -0.756683349609375, 0.1047821044921875, 0.3458576202392578, 0.41656494140625, 0.0524749755859375, 0.5882492065429688, 0.1568450927734375, 0.70672607421875, -0.5544586181640625, -0.11836051940917969, 0.20113372802734375, 0.22026824951171875, -0.41257476806640625, 0.34706878662109375, 0.2742156982421875, -0.1481781005859375, -0.11646842956542969, 1.28704833984375, -0.151397705078125, 0.33989715576171875, -0.3827705383300781, 0.5273056030273438, 0.9083175659179688, 0.12749099731445312, 0.033061981201171875, -0.23712158203125, 0.16898345947265625, -0.082855224609375, 0.7791748046875, 0.4086265563964844, 0.154876708984375, 0.08495330810546875, 0.20885086059570312, 0.1884918212890625, 0.6402816772460938, -0.01061248779296875, 0.0061187744140625, 0.6921272277832031, 0.3874053955078125, -0.004108428955078125, 0.2913932800292969, 0.39542388916015625, -0.2762603759765625, 0.2279205322265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000025.npy"} +{"epoch": 0.03671071953010279, "step": 26, "batch_size": 64, "mean": 0.31715625524520874, "std": 0.5526702404022217, "min": -1.0053253173828125, "p10": -0.32676544189453116, "median": 0.3045196533203125, "p90": 1.0309844970703128, "max": 2.0223388671875, "pos_frac": 0.71875, "sample": [0.113128662109375, 0.014377593994140625, 0.460693359375, -0.12035369873046875, 0.77655029296875, 0.47946929931640625, 0.0391845703125, 0.46823883056640625, 0.16300201416015625, -0.10274887084960938, 0.555694580078125, 0.04736328125, 2.0223388671875, 0.46804046630859375, 0.0314483642578125, 0.842254638671875, 0.640228271484375, -0.22142791748046875, 0.609588623046875, -0.37191009521484375, 0.43927764892578125, -0.12311935424804688, 0.2072906494140625, 0.071136474609375, 0.04207611083984375, 1.3531112670898438, 0.8749923706054688, -0.553558349609375, 0.33710479736328125, 1.282806396484375, -0.48699951171875, -0.41898345947265625, 0.5139675140380859, 0.19086647033691406, -0.12320899963378906, -0.004852294921875, 1.2952117919921875, -0.1016845703125, 0.1168212890625, 1.7333297729492188, 0.305633544921875, 0.01271820068359375, 1.2339706420898438, 0.323577880859375, 0.37346649169921875, 0.6611099243164062, -1.0053253173828125, 0.7004241943359375, -0.12446403503417969, -0.11997032165527344, -0.1392364501953125, 0.19121551513671875, -0.4105339050292969, 0.464691162109375, 0.4080047607421875, 0.62451171875, 0.547760009765625, 1.0691757202148438, 0.9418716430664062, -0.4925880432128906, 0.30340576171875, -0.12099075317382812, 0.5562324523925781, 0.43259239196777344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000026.npy"} +{"epoch": 0.0381791483113069, "step": 27, "batch_size": 64, "mean": 0.47805216908454895, "std": 0.5398308634757996, "min": -0.45752525329589844, "p10": -0.10123405456542968, "median": 0.39064788818359375, "p90": 1.0535186767578126, "max": 2.9734344482421875, "pos_frac": 0.859375, "sample": [0.3744697570800781, 0.6298332214355469, 0.6958694458007812, 0.027984619140625, 0.6280479431152344, 1.257659912109375, 0.142181396484375, 0.312225341796875, 1.226318359375, -0.45752525329589844, 0.7053928375244141, 0.3829193115234375, 0.40216827392578125, 0.23453521728515625, 0.91754150390625, 0.49668121337890625, 2.9734344482421875, 0.5941238403320312, 0.42498016357421875, 0.8998298645019531, 0.17241287231445312, 0.0204620361328125, 0.5248031616210938, -0.3114738464355469, 0.37218475341796875, 0.3603248596191406, 0.3461723327636719, 0.1545581817626953, 0.6294212341308594, 1.8933181762695312, 0.07790374755859375, 0.08979988098144531, -0.14921188354492188, 0.975311279296875, 0.26476287841796875, 0.0355224609375, 0.6416473388671875, 0.5415191650390625, -0.08802413940429688, 0.4095802307128906, -0.3013343811035156, 0.4989166259765625, -0.150421142578125, 0.8932571411132812, 0.07563400268554688, 1.264068603515625, -0.16461944580078125, 0.2936515808105469, 1.1271209716796875, 0.7782974243164062, 1.0570526123046875, 0.1056365966796875, 0.6535415649414062, 0.91778564453125, 0.7979583740234375, 0.41764068603515625, 1.0452728271484375, -0.01180267333984375, 0.34255027770996094, -0.10689544677734375, 0.39837646484375, 0.22625732421875, 0.3167266845703125, 0.2910003662109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000027.npy"} +{"epoch": 0.039647577092511016, "step": 28, "batch_size": 64, "mean": 0.3269844055175781, "std": 0.5473117828369141, "min": -1.1264495849609375, "p10": -0.23368549346923828, "median": 0.3130359649658203, "p90": 0.929747200012207, "max": 1.8195648193359375, "pos_frac": 0.6875, "sample": [0.5857925415039062, -0.001708984375, 0.16159820556640625, 0.206787109375, 0.9269313812255859, 0.2210693359375, 0.24153709411621094, 0.1654052734375, -0.2325439453125, -0.0023403167724609375, -0.17584228515625, 1.02142333984375, 1.8195648193359375, 0.6037750244140625, 0.23374176025390625, -0.1685047149658203, 0.1132659912109375, 1.6129913330078125, -0.047901153564453125, 0.02283477783203125, 0.48603057861328125, -0.37549591064453125, -0.08786773681640625, 0.5328369140625, 0.01104736328125, 0.7650146484375, 0.5304794311523438, 0.548492431640625, 1.4155616760253906, 0.4675025939941406, 0.3031654357910156, 0.72845458984375, 0.09131240844726562, -0.15814208984375, 0.3289756774902344, 0.0015659332275390625, 0.7048721313476562, 0.64306640625, 0.686676025390625, -0.23967361450195312, 0.41849517822265625, 0.5850391387939453, -0.08180046081542969, 0.9309539794921875, 1.2599639892578125, 0.4232635498046875, 0.5107955932617188, -0.2341747283935547, 0.9159584045410156, 0.39160919189453125, 1.1056060791015625, -1.0530242919921875, -0.23793792724609375, 0.322906494140625, -0.17375946044921875, 0.4518775939941406, -0.08287811279296875, 0.8892822265625, 0.920684814453125, -0.14557266235351562, 0.7374496459960938, -1.1264495849609375, -0.4029998779296875, -0.09003829956054688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000028.npy"} +{"epoch": 0.041116005873715125, "step": 29, "batch_size": 64, "mean": 0.5435110330581665, "std": 0.47907721996307373, "min": -0.2703857421875, "p10": -0.05721931457519531, "median": 0.4859962463378906, "p90": 1.1236518859863283, "max": 1.654754638671875, "pos_frac": 0.859375, "sample": [1.0502395629882812, 0.03613853454589844, 1.4118499755859375, 0.40850830078125, 0.40686798095703125, 0.4542236328125, 0.196441650390625, 0.44173431396484375, -0.059017181396484375, 0.1060028076171875, -0.0530242919921875, 0.05158042907714844, 1.1742630004882812, 0.6664810180664062, -0.2703857421875, 0.8603057861328125, 0.8216552734375, 0.09227752685546875, 0.13301849365234375, 1.0702896118164062, -0.236419677734375, 0.9146804809570312, 0.9024620056152344, 0.2775096893310547, 1.3866195678710938, 0.8463363647460938, 0.1355133056640625, 0.69732666015625, 0.707916259765625, 0.620819091796875, 0.05397796630859375, 1.0380783081054688, 0.9883041381835938, 0.1738739013671875, 0.40185546875, 0.2219867706298828, -0.16268157958984375, 0.13527679443359375, 1.0994644165039062, 0.90570068359375, 0.871856689453125, 0.9801559448242188, 1.1340179443359375, 0.2855072021484375, 0.7523593902587891, -0.039459228515625, 1.0289154052734375, -0.0693206787109375, 0.8375968933105469, -0.13879966735839844, 0.023403167724609375, 0.028638839721679688, 1.4545440673828125, 0.4683074951171875, 0.6846961975097656, 1.654754638671875, 0.3289031982421875, 0.3986663818359375, 1.0158653259277344, 0.5656089782714844, 1.1815338134765625, 0.5036849975585938, -0.14714813232421875, 0.8723640441894531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000029.npy"} +{"epoch": 0.042584434654919234, "step": 30, "batch_size": 64, "mean": 0.7230584621429443, "std": 0.7328934073448181, "min": -0.6118450164794922, "p10": -0.0256057739257812, "median": 0.5879249572753906, "p90": 1.5614387512207033, "max": 3.876251220703125, "pos_frac": 0.890625, "sample": [0.3845176696777344, 0.2635536193847656, 0.07767677307128906, 0.601348876953125, 1.2273712158203125, 0.04061126708984375, 0.061100006103515625, -0.3961181640625, -0.0469970703125, 0.8810272216796875, 0.5745010375976562, 0.969329833984375, 1.5156326293945312, 1.2464866638183594, 1.14593505859375, 1.506805419921875, 0.8122482299804688, 1.8477020263671875, 0.4006805419921875, 1.792510986328125, 0.3214569091796875, 1.036468505859375, 3.876251220703125, 0.5130386352539062, 0.8560333251953125, 0.3034934997558594, 0.8386993408203125, 0.298614501953125, -0.3193073272705078, 0.26077842712402344, 0.4870452880859375, 0.21959304809570312, 0.75274658203125, 0.21504783630371094, 0.23157882690429688, 0.498077392578125, 2.1826934814453125, 1.76251220703125, 1.492767333984375, 0.9364032745361328, 0.0243072509765625, 1.314422607421875, 0.8279399871826172, 0.6727371215820312, -0.4527435302734375, 0.915283203125, 1.5810699462890625, 0.15038108825683594, 0.9537887573242188, 1.1121673583984375, 1.483978271484375, 1.1379776000976562, 0.3411540985107422, -0.6118450164794922, 0.08387184143066406, 0.8296279907226562, 0.4936981201171875, -0.053279876708984375, 0.91973876953125, -0.07549858093261719, 0.2212066650390625, 1.842529296875, 0.5493698120117188, 0.34394264221191406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000030.npy"} +{"epoch": 0.04405286343612335, "step": 31, "batch_size": 64, "mean": 0.5276015996932983, "std": 0.7071222066879272, "min": -1.0256805419921875, "p10": -0.1848886489868164, "median": 0.4151649475097656, "p90": 1.5833168029785158, "max": 2.940093994140625, "pos_frac": 0.78125, "sample": [0.3182411193847656, 1.427703857421875, -0.18590545654296875, 0.5142173767089844, 2.040283203125, 0.6020965576171875, 0.00701904296875, 0.09102630615234375, 0.08607864379882812, -0.3568878173828125, 2.940093994140625, 0.034912109375, -0.0241851806640625, 0.30795860290527344, -0.09017181396484375, 0.1771411895751953, 0.90386962890625, 0.4360542297363281, 1.3033294677734375, 0.7772712707519531, -0.17089080810546875, 0.06509780883789062, 0.13835906982421875, 0.35826873779296875, -0.19287872314453125, 0.3592414855957031, 0.591156005859375, 0.4707603454589844, 0.5934200286865234, 0.1453857421875, 1.625762939453125, 0.7523574829101562, 2.1017303466796875, 1.2422027587890625, -0.0054302215576171875, -1.0256805419921875, 0.139007568359375, 0.6290664672851562, -0.17429351806640625, 1.211517333984375, 0.93072509765625, 0.6501388549804688, -0.2117767333984375, 0.2517280578613281, -0.0552978515625, -0.57427978515625, 0.42090606689453125, 0.5308456420898438, 0.1659088134765625, -0.18251609802246094, 0.2623176574707031, 1.6089401245117188, 1.7441558837890625, 1.0493927001953125, -0.22910308837890625, 0.5398483276367188, 0.7239532470703125, 0.8177642822265625, 0.9296073913574219, 0.09682846069335938, 1.523529052734375, 1.7419548034667969, 0.409423828125, 0.4571990966796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000031.npy"} +{"epoch": 0.04552129221732746, "step": 32, "batch_size": 64, "mean": 0.7771281003952026, "std": 0.9538834095001221, "min": -0.47658538818359375, "p10": -0.05105133056640621, "median": 0.5420560836791992, "p90": 2.123696899414063, "max": 4.82568359375, "pos_frac": 0.859375, "sample": [0.9959259033203125, 0.3302040100097656, 1.0192413330078125, 1.0769882202148438, 0.8199424743652344, 0.7337799072265625, -0.27393341064453125, 0.980682373046875, 0.5524959564208984, 0.6914596557617188, 1.4624710083007812, 0.7009925842285156, 0.15567779541015625, 0.09993362426757812, 0.02197265625, 1.3996505737304688, -0.000972747802734375, 0.95635986328125, 0.08842849731445312, 2.7681427001953125, 0.23863983154296875, 1.45135498046875, 2.1896514892578125, 0.088226318359375, 0.9182052612304688, 0.5241775512695312, 0.061248779296875, 0.24793243408203125, -0.17617416381835938, 1.0587539672851562, 0.21556854248046875, 0.05178642272949219, 0.2982921600341797, 3.305450439453125, 1.4681320190429688, 0.6487407684326172, 1.11468505859375, 0.3124237060546875, -0.06660842895507812, 0.04695892333984375, 2.3912124633789062, 1.011220932006836, -0.20952606201171875, -0.014751434326171875, 4.82568359375, 0.5316162109375, 0.03543853759765625, 2.1880035400390625, 2.5827178955078125, -0.47658538818359375, -0.26285362243652344, 0.132110595703125, 1.9736480712890625, 0.4666900634765625, -0.4429664611816406, 0.048488616943359375, 1.2413253784179688, 1.360382080078125, 0.32300567626953125, 0.804931640625, 0.24015235900878906, 1.0026321411132812, 1.162750244140625, 0.24398040771484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000032.npy"} +{"epoch": 0.04698972099853157, "step": 33, "batch_size": 64, "mean": 0.6502406597137451, "std": 0.6424391865730286, "min": -0.5984420776367188, "p10": -0.060972595214843564, "median": 0.6674442291259766, "p90": 1.379373931884766, "max": 2.779571533203125, "pos_frac": 0.890625, "sample": [-0.24915313720703125, 0.4920635223388672, 0.8399677276611328, 0.753387451171875, 2.037750244140625, -0.13872146606445312, 0.44989013671875, 0.235809326171875, 0.1460590362548828, 0.81451416015625, 1.1784286499023438, 1.2390518188476562, 1.2015380859375, 0.7401580810546875, 1.6524505615234375, 0.5310211181640625, 0.844512939453125, -0.5593643188476562, 0.6754913330078125, -0.4715118408203125, 0.1312103271484375, 1.4083480834960938, 0.9441413879394531, 1.1463851928710938, 0.8568038940429688, 0.198486328125, 1.5921173095703125, 0.14862823486328125, 0.1996307373046875, 2.779571533203125, 0.17977142333984375, 0.1916961669921875, 1.921051025390625, 0.1531829833984375, 0.3746986389160156, 0.6593971252441406, 0.20462799072265625, 0.7693290710449219, 0.8638839721679688, 0.7999191284179688, 0.4795722961425781, 1.2891311645507812, 0.14617919921875, 0.12044143676757812, 0.6944046020507812, 1.0026779174804688, 0.2164764404296875, 1.80426025390625, 0.5185222625732422, -0.5984420776367188, 0.72991943359375, -0.3558540344238281, 0.6945343017578125, 0.38907623291015625, 1.311767578125, 0.800994873046875, 1.0286788940429688, -0.576690673828125, 0.6872024536132812, 0.6527023315429688, 0.40756988525390625, 0.441436767578125, 0.5739517211914062, 1.2206649780273438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000033.npy"} +{"epoch": 0.048458149779735685, "step": 34, "batch_size": 64, "mean": 0.746705949306488, "std": 0.8102946877479553, "min": -0.6964111328125, "p10": -0.1118879318237304, "median": 0.5267505645751953, "p90": 1.9621887207031254, "max": 2.7895660400390625, "pos_frac": 0.84375, "sample": [2.2600555419921875, -0.382476806640625, 0.0338134765625, 0.17436981201171875, 2.7895660400390625, 0.22679519653320312, 1.836151123046875, 0.1152191162109375, 0.2958984375, 0.21121978759765625, 0.5993423461914062, -0.04025077819824219, 0.7791061401367188, 0.04611968994140625, 1.2389678955078125, 2.6451568603515625, 0.40437889099121094, -0.0302581787109375, 1.8288421630859375, 0.352386474609375, 0.9916000366210938, -0.34340667724609375, 1.1016788482666016, 1.3715934753417969, 0.767303466796875, 1.2220535278320312, 1.5850830078125, -0.3909149169921875, 0.4646759033203125, 1.05535888671875, 0.3616790771484375, 0.2365264892578125, 1.4599800109863281, 1.2305450439453125, 2.499114990234375, 1.1837387084960938, 0.7829151153564453, 2.016204833984375, 0.11853408813476562, -0.14258956909179688, 0.5471763610839844, 0.4322090148925781, 0.8033866882324219, 1.3935165405273438, 0.5063247680664062, 0.7366256713867188, 0.04695701599121094, 0.2702674865722656, 1.2300567626953125, -0.17953109741210938, 0.4559974670410156, -0.6964111328125, 0.9442825317382812, 0.5979461669921875, 0.260711669921875, -0.14369964599609375, 1.6713638305664062, 0.21090316772460938, 0.50103759765625, 0.6701431274414062, 2.1377105712890625, 2.41534423828125, 0.05348014831542969, -0.032695770263671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000034.npy"} +{"epoch": 0.049926578560939794, "step": 35, "batch_size": 64, "mean": 1.1969325542449951, "std": 1.0398361682891846, "min": -0.39159393310546875, "p10": 0.24703006744384767, "median": 0.8615331649780273, "p90": 2.5184783935546875, "max": 5.392669677734375, "pos_frac": 0.9375, "sample": [0.9136810302734375, 0.5919227600097656, 2.534637451171875, 1.7511711120605469, 0.27216339111328125, 0.7329120635986328, 0.44903564453125, -0.39159393310546875, -0.35486602783203125, 1.2593841552734375, 2.0475616455078125, 0.2452259063720703, 0.8866043090820312, 0.8664302825927734, 0.8566360473632812, 0.45723724365234375, 1.8353538513183594, 3.3847885131835938, 0.585723876953125, 0.5452785491943359, 0.7119560241699219, 2.1163330078125, 3.7198486328125, 0.5039901733398438, 0.6290512084960938, 1.3246383666992188, 0.5266895294189453, 1.4467315673828125, 0.42350006103515625, 0.8121795654296875, 5.392669677734375, 1.4181709289550781, 1.2644805908203125, 1.5477828979492188, 0.4375591278076172, 3.1197357177734375, 0.2512397766113281, 1.59246826171875, 2.1557769775390625, 1.7052459716796875, 0.6746559143066406, 2.48077392578125, 0.7834396362304688, 1.868408203125, 0.5668754577636719, 0.0865936279296875, 1.522705078125, 2.7360992431640625, 0.8537063598632812, 0.4889984130859375, 1.0793037414550781, 0.1898193359375, 0.7211170196533203, 1.8010711669921875, 0.9594573974609375, 0.8452854156494141, 1.6186065673828125, -0.20038223266601562, 1.327850341796875, -0.3899078369140625, 0.7706146240234375, 3.0047836303710938, 1.6714935302734375, 0.5729751586914062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000035.npy"} +{"epoch": 0.0513950073421439, "step": 36, "batch_size": 64, "mean": 1.0765833854675293, "std": 1.2660794258117676, "min": -2.039825439453125, "p10": -0.1757331848144531, "median": 0.8190517425537109, "p90": 2.945435333251954, "max": 4.1096649169921875, "pos_frac": 0.796875, "sample": [1.2470016479492188, -0.2464580535888672, 0.8471717834472656, 0.534332275390625, 1.1215667724609375, 0.6744594573974609, 0.4143524169921875, 0.07220458984375, -0.00981903076171875, 2.6346588134765625, 1.9031906127929688, -0.12758445739746094, -0.18674468994140625, 0.42926597595214844, 0.12749481201171875, 1.1361618041992188, -0.32384681701660156, 1.402313232421875, 1.6745147705078125, 0.245758056640625, -0.0168609619140625, -0.3453083038330078, -0.1500396728515625, 0.3146839141845703, 3.7512664794921875, 3.0732765197753906, 1.3279953002929688, -0.6160964965820312, 3.0111083984375, 0.0812530517578125, 0.7633266448974609, 1.266448974609375, -0.2580604553222656, 3.4254150390625, -0.06280136108398438, 0.08790969848632812, 3.73492431640625, 0.131500244140625, 2.7111053466796875, 0.22112274169921875, 2.7921981811523438, 1.0425224304199219, 2.683258056640625, 1.1128349304199219, 2.566558837890625, 0.8632850646972656, -0.016178131103515625, 4.1096649169921875, 0.7909317016601562, -2.039825439453125, 1.924530029296875, 1.0083847045898438, 2.3319625854492188, 0.32218170166015625, 2.1575164794921875, 1.612142562866211, 0.306549072265625, 0.6573562622070312, 2.62005615234375, 0.31793212890625, 0.9768753051757812, 3.379486083984375, 1.0265579223632812, 0.33238983154296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000036.npy"} +{"epoch": 0.05286343612334802, "step": 37, "batch_size": 64, "mean": 0.9973729848861694, "std": 1.3546226024627686, "min": -1.63958740234375, "p10": -0.1776153564453124, "median": 0.567108154296875, "p90": 2.824113464355469, "max": 6.561920166015625, "pos_frac": 0.828125, "sample": [1.771148681640625, 0.09513282775878906, 1.3722076416015625, 1.5762176513671875, -0.21652984619140625, 0.14624786376953125, -0.37265777587890625, 0.8405418395996094, 1.2815780639648438, 0.08441162109375, 0.151397705078125, 2.82421875, 0.5826950073242188, -0.39111328125, -0.5906829833984375, 0.32269287109375, 0.8065814971923828, 0.23133277893066406, 1.9109249114990234, 6.561920166015625, 0.7534866333007812, 4.8064727783203125, 1.2612838745117188, 1.0320663452148438, 1.2748031616210938, 1.4491729736328125, 0.5515213012695312, 0.2530555725097656, 1.2029342651367188, 0.5291728973388672, 0.3574485778808594, -0.0721435546875, 0.42620849609375, -1.0369644165039062, 0.3907470703125, 0.289337158203125, 0.4066276550292969, 1.9856414794921875, 2.1691818237304688, 3.168243408203125, -0.016994476318359375, 2.8781509399414062, 0.4447021484375, 1.2469329833984375, 0.14479827880859375, 0.5400276184082031, -0.08681488037109375, -0.639190673828125, 0.5925827026367188, 1.5365447998046875, 1.8402786254882812, 0.01959991455078125, 2.1740798950195312, 3.4232254028320312, 0.33983612060546875, 2.8238677978515625, 1.59307861328125, -1.63958740234375, -0.06830024719238281, 0.9882888793945312, 1.97747802734375, 3.2366180419921875, 0.08591842651367188, 0.21018409729003906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000037.npy"} +{"epoch": 0.05433186490455213, "step": 38, "batch_size": 64, "mean": 1.3659417629241943, "std": 1.6389647722244263, "min": -1.14990234375, "p10": 0.014782333374023543, "median": 0.7885608673095703, "p90": 3.2000808715820335, "max": 8.080780029296875, "pos_frac": 0.890625, "sample": [0.6635169982910156, 8.080780029296875, 4.6068115234375, 1.9689979553222656, 0.22835540771484375, 0.12179946899414062, 0.9399642944335938, 0.3187713623046875, 1.2687454223632812, 0.77935791015625, 1.5674476623535156, 0.6243095397949219, 0.3441925048828125, 2.6112823486328125, 1.2030258178710938, -0.24592018127441406, 0.36954498291015625, 0.37017822265625, -0.3471660614013672, 1.756927490234375, 0.8768539428710938, 1.3774871826171875, 2.5783767700195312, 0.15166854858398438, 3.4215545654296875, 0.2665576934814453, 2.2142257690429688, 2.4918441772460938, 1.6430549621582031, 0.12809371948242188, 0.6522617340087891, 2.6998291015625, -1.0324554443359375, 0.7930679321289062, 0.6639461517333984, 0.3870086669921875, 0.5740966796875, 5.8039398193359375, 0.7773017883300781, 0.7197189331054688, 3.4144744873046875, 5.61309814453125, 1.9157867431640625, 0.7840538024902344, 0.6161899566650391, 0.157928466796875, -0.0310821533203125, 0.334716796875, 0.7182693481445312, 1.4850997924804688, -1.14990234375, 0.6238975524902344, -0.0604400634765625, 1.4867286682128906, 1.3578205108642578, 1.828704833984375, 1.5310897827148438, 5.1415863037109375, 1.8444976806640625, 0.35773468017578125, 1.6680145263671875, -0.346893310546875, 1.3699493408203125, 2.339588165283203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000038.npy"} +{"epoch": 0.055800293685756244, "step": 39, "batch_size": 64, "mean": 1.5928758382797241, "std": 1.4390220642089844, "min": -3.077972412109375, "p10": 0.159661865234375, "median": 1.3081684112548828, "p90": 3.6072647094726564, "max": 5.1002655029296875, "pos_frac": 0.9375, "sample": [2.3711471557617188, 2.3833465576171875, 5.1002655029296875, 2.007232666015625, 1.4590301513671875, -0.7244720458984375, 1.9925918579101562, 2.503589630126953, 1.058206558227539, 2.176607131958008, 0.4338226318359375, 3.1937484741210938, 1.7240447998046875, 0.99615478515625, 3.61083984375, 0.943359375, 0.5213966369628906, 0.023822784423828125, 0.152099609375, 0.7901382446289062, 0.8360366821289062, 1.5935821533203125, 2.3125457763671875, 1.1271076202392578, 0.7700176239013672, 2.09686279296875, -0.221435546875, 1.5769405364990234, 4.54693603515625, 0.17730712890625, 0.84735107421875, 0.4602947235107422, 3.2364578247070312, 2.0514373779296875, -3.077972412109375, 1.9178581237792969, 0.48328590393066406, 2.2504348754882812, 3.8771018981933594, 4.4530487060546875, 0.9497394561767578, 1.8200225830078125, 3.9138870239257812, 0.84368896484375, 1.2179908752441406, 0.49642181396484375, -0.38555908203125, 0.7885818481445312, 1.398345947265625, 1.0406856536865234, 0.84185791015625, 3.505084991455078, 3.4199066162109375, 2.62664794921875, 0.6049289703369141, 4.143898010253906, 3.5989227294921875, 0.9130954742431641, 1.8572578430175781, 0.5944919586181641, 0.5456352233886719, 0.07883453369140625, 0.8395538330078125, 2.2579593658447266], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000039.npy"} +{"epoch": 0.05726872246696035, "step": 40, "batch_size": 64, "mean": 1.4550960063934326, "std": 1.6168969869613647, "min": -0.796173095703125, "p10": -0.15202865600585933, "median": 1.1399431228637695, "p90": 3.8512977600097664, "max": 7.0899810791015625, "pos_frac": 0.796875, "sample": [-0.0041961669921875, -0.171600341796875, 0.9000473022460938, 1.1478424072265625, 1.7231674194335938, 0.40460205078125, 0.453948974609375, 0.3822784423828125, 0.24963951110839844, 2.3627243041992188, 5.5780792236328125, 3.9507293701171875, 0.4897727966308594, 3.5570335388183594, 1.6430511474609375, -0.3286476135253906, 1.7451705932617188, 2.260345458984375, 0.29067039489746094, -0.796173095703125, 2.7915077209472656, -0.1794910430908203, 0.867767333984375, -0.0429229736328125, 2.341644287109375, 1.4640045166015625, 0.2652740478515625, 1.1320438385009766, 0.2735137939453125, 0.020477294921875, 3.943878173828125, -0.063934326171875, 3.2657241821289062, -0.4401092529296875, 0.3569774627685547, 4.519287109375, -0.10636138916015625, 3.6352767944335938, 0.6914749145507812, 1.159799575805664, 4.614284515380859, 0.7538967132568359, 0.7725715637207031, 7.0899810791015625, -0.25807952880859375, 1.7203750610351562, -0.29681396484375, 1.664703369140625, 2.6889190673828125, 2.0080032348632812, 1.157257080078125, 2.345592498779297, 3.1494216918945312, 1.5474910736083984, 0.8373031616210938, 1.4275131225585938, 4.5256500244140625, -0.1024322509765625, 1.5690231323242188, 0.3752899169921875, -0.00429534912109375, 1.6073341369628906, 1.7033615112304688, 0.4954719543457031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000040.npy"} +{"epoch": 0.05873715124816446, "step": 41, "batch_size": 64, "mean": 1.8601820468902588, "std": 1.9838576316833496, "min": -3.1455230712890625, "p10": 0.17854251861572268, "median": 1.3451805114746094, "p90": 4.399060058593751, "max": 8.156150817871094, "pos_frac": 0.921875, "sample": [0.7600860595703125, 0.6204605102539062, 3.444580078125, 1.1609878540039062, 1.1115875244140625, 2.1210098266601562, 3.1347198486328125, 2.0017242431640625, -0.7457733154296875, 3.6581192016601562, 5.7264404296875, 0.8606796264648438, 2.614288330078125, 8.156150817871094, 0.20485687255859375, 0.4119415283203125, 1.08642578125, 1.4584579467773438, 0.40570831298828125, -0.792755126953125, 1.4449882507324219, 3.3947792053222656, 4.5734710693359375, 1.0725078582763672, 6.8910064697265625, 1.4473381042480469, 4.251617431640625, 4.462249755859375, 0.7181377410888672, 0.2870979309082031, 2.2901153564453125, 0.14923095703125, 6.57989501953125, 0.5816879272460938, 0.9088363647460938, 3.3261795043945312, 1.5010757446289062, 2.63946533203125, 0.64715576171875, 2.6401748657226562, 3.9508056640625, 1.5170211791992188, -3.1455230712890625, 3.851348876953125, 0.6175155639648438, 0.5323486328125, 2.1852035522460938, 1.7238578796386719, 2.365224838256836, 0.5721855163574219, 1.346649169921875, 1.3437118530273438, 0.9392547607421875, 0.844879150390625, 0.5970306396484375, 1.2918968200683594, 0.32247161865234375, 3.25054931640625, 0.40691375732421875, 0.1672649383544922, -0.3264427185058594, -0.26717185974121094, 1.8956222534179688, 5.8623199462890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000041.npy"} +{"epoch": 0.06020558002936858, "step": 42, "batch_size": 64, "mean": 2.4012622833251953, "std": 2.264364719390869, "min": -1.7234725952148438, "p10": 0.14524459838867199, "median": 1.9027366638183594, "p90": 5.38767547607422, "max": 10.277374267578125, "pos_frac": 0.90625, "sample": [2.1001663208007812, -0.31462860107421875, 1.7999210357666016, 9.008941650390625, 10.277374267578125, 2.5292587280273438, 0.2439117431640625, 6.35498046875, 2.099254608154297, 3.0672149658203125, 1.5278472900390625, 5.944740295410156, 2.6706008911132812, 7.149749755859375, 0.8226165771484375, 2.5368804931640625, 3.007537841796875, 0.10295867919921875, 2.6710433959960938, 3.1967697143554688, 0.9283828735351562, 1.1222801208496094, 3.48785400390625, 0.7785110473632812, 0.95635986328125, 2.3324356079101562, -0.3743438720703125, 2.38702392578125, 1.8348274230957031, -0.2699432373046875, 1.9706459045410156, 5.4720458984375, 0.35210418701171875, 5.555328369140625, 2.67803955078125, 1.4669036865234375, 0.7678871154785156, 1.5419559478759766, 1.604705810546875, -0.35558319091796875, 4.1780242919921875, -0.102386474609375, 1.592620849609375, 5.1908111572265625, 1.3199348449707031, 4.362541198730469, 0.7113990783691406, 0.2873802185058594, 1.3081245422363281, 1.3293914794921875, 0.8432807922363281, 5.063720703125, -1.7234725952148438, 0.5830059051513672, 2.1158981323242188, 3.26177978515625, 0.4300041198730469, 2.128082275390625, 4.3185272216796875, 1.1564483642578125, 3.9297637939453125, 5.125633239746094, 0.9566249847412109, 4.279083251953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000042.npy"} +{"epoch": 0.06167400881057269, "step": 43, "batch_size": 64, "mean": 2.2915761470794678, "std": 1.8442620038986206, "min": -1.3382720947265625, "p10": 0.20471076965332036, "median": 2.2409238815307617, "p90": 4.2679183959960945, "max": 9.910430908203125, "pos_frac": 0.953125, "sample": [0.17993545532226562, 0.2793388366699219, 0.04212379455566406, 3.0245819091796875, 3.8369293212890625, 1.1729583740234375, 3.1352767944335938, 2.0756683349609375, 2.4247207641601562, 6.244384765625, 0.84954833984375, 2.090494155883789, 0.6218185424804688, 1.8914566040039062, 2.8049583435058594, 0.6611747741699219, 3.2745513916015625, 4.906486511230469, 1.9125442504882812, 0.1100616455078125, 5.8448486328125, 2.9197235107421875, 4.715629577636719, 0.9420890808105469, 2.5420150756835938, 2.2569656372070312, 0.4594459533691406, 2.238210678100586, -0.0892181396484375, 4.3333740234375, 1.0724258422851562, 2.414926528930664, 4.1151885986328125, 2.717041015625, 3.1447601318359375, 3.3616905212402344, 0.7147903442382812, 2.2436370849609375, 1.5338554382324219, 0.26251983642578125, 1.2592926025390625, 1.8533973693847656, 5.473670959472656, 3.3400344848632812, 0.9420394897460938, -1.3382720947265625, 0.8331813812255859, 2.9290313720703125, 2.4869842529296875, 3.13623046875, 3.0084190368652344, 3.4650039672851562, 2.0988006591796875, 0.1757984161376953, 2.0932159423828125, 9.910430908203125, 3.722412109375, 1.3335742950439453, -0.13690757751464844, 0.5988121032714844, 3.1652069091796875, 4.072071075439453, 2.6092910766601562, 0.3462200164794922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000043.npy"} +{"epoch": 0.0631424375917768, "step": 44, "batch_size": 64, "mean": 2.59177565574646, "std": 2.016450881958008, "min": -0.8762359619140625, "p10": 0.38601455688476566, "median": 2.179595947265625, "p90": 5.080787658691406, "max": 9.62158203125, "pos_frac": 0.953125, "sample": [3.7275161743164062, 2.5944671630859375, 1.6561145782470703, 0.5362453460693359, 6.453006744384766, 2.4590072631835938, 2.3112716674804688, 1.53155517578125, 1.7818260192871094, 4.9842071533203125, 5.938667297363281, 0.6170539855957031, 4.184028625488281, 5.0717620849609375, 4.694427490234375, 3.115447998046875, 0.8129081726074219, 1.96044921875, 1.9695167541503906, 5.0089569091796875, 0.2933483123779297, 2.6399993896484375, 2.5745697021484375, 6.73895263671875, 2.1693572998046875, 2.9775390625, 0.42157745361328125, 1.4966354370117188, 5.08465576171875, 1.7102489471435547, 2.4789352416992188, -0.2145843505859375, 1.6813850402832031, 1.3010025024414062, 2.0637550354003906, 2.9074859619140625, 1.1730575561523438, 1.3952102661132812, 4.4058380126953125, 1.4979248046875, 1.263580322265625, 2.5147552490234375, 3.7124176025390625, 3.3727874755859375, 1.2836036682128906, 3.2575531005859375, 3.16265869140625, 2.155609130859375, 0.3707733154296875, 0.2785682678222656, 5.4033660888671875, 3.7871017456054688, 2.1898345947265625, 3.5688400268554688, 1.32330322265625, 9.62158203125, 0.6666374206542969, -0.5729217529296875, 3.3179397583007812, 1.1030006408691406, -0.8762359619140625, 0.2302532196044922, 1.0339508056640625, 7.499351501464844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000044.npy"} +{"epoch": 0.06461086637298091, "step": 45, "batch_size": 64, "mean": 2.0580525398254395, "std": 2.3884527683258057, "min": -1.5616302490234375, "p10": -0.14740734100341796, "median": 1.5436782836914062, "p90": 5.3272441864013675, "max": 11.2528076171875, "pos_frac": 0.84375, "sample": [3.1046524047851562, 1.011962890625, -0.0214996337890625, 2.1346473693847656, 0.18279647827148438, 1.4576549530029297, 2.715160369873047, 6.28839111328125, 3.9791259765625, 2.9083328247070312, 0.6134185791015625, 7.134590148925781, 3.1790390014648438, 0.36995697021484375, 0.00685882568359375, 1.7134475708007812, -0.1494598388671875, 1.683074951171875, 0.08579254150390625, 5.156497955322266, 0.06158447265625, 0.4638671875, 2.7192230224609375, 1.0085487365722656, 0.6035842895507812, 4.764617919921875, 6.135795593261719, 4.392608642578125, 0.20955657958984375, 0.5912628173828125, 4.492034912109375, 2.1043319702148438, 4.9505615234375, -0.293182373046875, -0.672698974609375, 1.5665130615234375, 6.4234771728515625, 3.3507003784179688, -0.042755126953125, 1.5697402954101562, 1.29693603515625, 0.2271728515625, -1.1969223022460938, -0.9599761962890625, 1.520843505859375, 11.2528076171875, 0.9487991333007812, 6.2930755615234375, 3.1636734008789062, -0.6758041381835938, 2.392333984375, 0.24912261962890625, 1.8039989471435547, 0.4456787109375, 1.3539161682128906, 5.400421142578125, 0.9219207763671875, 2.1238861083984375, 2.168153762817383, 2.006561279296875, -0.14261817932128906, 4.042087554931641, 0.6871185302734375, -1.5616302490234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000045.npy"} +{"epoch": 0.06607929515418502, "step": 46, "batch_size": 64, "mean": 2.683544158935547, "std": 2.872194766998291, "min": -2.4341888427734375, "p10": -0.03670501708984372, "median": 1.8838224411010742, "p90": 6.296036911010743, "max": 13.736114501953125, "pos_frac": 0.875, "sample": [0.6534500122070312, 0.16007232666015625, 5.4079742431640625, 6.82208251953125, 3.903533935546875, -2.4341888427734375, 1.4024887084960938, 1.6474590301513672, 6.412384033203125, 3.469125747680664, 6.378353118896484, 1.3703804016113281, 0.6748580932617188, 2.3860416412353516, 4.32635498046875, 0.3746070861816406, -0.6426239013671875, 0.7053070068359375, 3.7569198608398438, 10.354736328125, -0.6335697174072266, 0.168304443359375, 0.14716720581054688, 3.5892868041992188, 0.40076446533203125, 4.192359924316406, 3.7990570068359375, 1.0782546997070312, 4.0302734375, 2.3488292694091797, 3.5874252319335938, 0.2877655029296875, -1.014251708984375, 0.6239356994628906, 2.5388946533203125, 5.587726593017578, 5.059410095214844, 0.233673095703125, 7.01715087890625, -0.0489959716796875, 0.3671092987060547, 2.4862213134765625, 5.0970001220703125, -0.008026123046875, 13.736114501953125, 0.2815399169921875, 5.7935333251953125, 5.4744415283203125, 0.7355422973632812, 2.4919891357421875, 1.7243309020996094, 7.4520111083984375, 2.972412109375, -0.1126556396484375, 1.6361656188964844, 6.103965759277344, 1.1144790649414062, 2.043313980102539, 2.8827896118164062, 5.5449676513671875, 1.3697128295898438, 0.9201297760009766, 1.6157073974609375, -0.0987396240234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000046.npy"} +{"epoch": 0.06754772393538913, "step": 47, "batch_size": 64, "mean": 2.6440374851226807, "std": 2.436188220977783, "min": -2.0778274536132812, "p10": -0.3144378662109374, "median": 2.044175148010254, "p90": 5.9337810516357425, "max": 9.037277221679688, "pos_frac": 0.875, "sample": [2.0107269287109375, 2.8916969299316406, 0.7004318237304688, 1.4367828369140625, 4.626644134521484, 1.8734378814697266, 1.2434234619140625, 1.68170166015625, 6.170848846435547, 2.594707489013672, -0.4102325439453125, 5.838764190673828, 5.77117919921875, 1.1662483215332031, 3.4341812133789062, 2.172576904296875, -0.4951019287109375, 1.4191436767578125, 1.61907958984375, 4.828239440917969, 6.338218688964844, 1.3417816162109375, 0.6892929077148438, 2.817819595336914, 3.1964492797851562, 1.003072738647461, -0.8628311157226562, 5.9745025634765625, 7.43927001953125, 2.363433837890625, -0.36870574951171875, 2.9457454681396484, 5.625637054443359, 3.8297271728515625, -2.0778274536132812, 0.2804737091064453, 0.240264892578125, 1.2977638244628906, 0.7446441650390625, 8.74786376953125, 4.98248291015625, 1.9846458435058594, 0.9895172119140625, 2.55023193359375, 8.3883056640625, 4.994384765625, -0.18781280517578125, 3.459646224975586, 9.037277221679688, -1.2849197387695312, 1.3301162719726562, 1.89019775390625, -0.428253173828125, 1.3579158782958984, 2.4763641357421875, 1.233154296875, 5.222541809082031, 1.5460433959960938, 4.363006591796875, 1.9827499389648438, 3.451171875, 3.3071136474609375, 2.0776233673095703, 2.353811264038086], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000047.npy"} +{"epoch": 0.06901615271659324, "step": 48, "batch_size": 64, "mean": 2.618363618850708, "std": 2.697575092315674, "min": -6.274169921875, "p10": 0.04299640655517581, "median": 1.9798402786254883, "p90": 6.479444885253907, "max": 10.422393798828125, "pos_frac": 0.921875, "sample": [3.62164306640625, 0.06938552856445312, 6.5425567626953125, 7.346382141113281, 5.86767578125, 2.7173843383789062, -0.3670196533203125, 2.1086044311523438, 5.794342041015625, 3.8012466430664062, 1.244873046875, 4.390777587890625, 1.4231109619140625, 0.6008148193359375, -0.03228759765625, 1.221099853515625, 6.332183837890625, 0.03168678283691406, 1.4495925903320312, 0.029964447021484375, 0.2915077209472656, 8.024215698242188, -0.30505943298339844, 1.873779296875, 5.407218933105469, 1.5107192993164062, 2.4865875244140625, 3.365468978881836, 0.37664031982421875, 3.061185836791992, 1.8073158264160156, 1.5336074829101562, 3.660369873046875, 1.669790267944336, 7.530364990234375, 3.478504180908203, 4.298866271972656, 8.174140930175781, 2.93084716796875, 1.2791290283203125, 0.336181640625, 1.6781425476074219, 0.6991348266601562, 2.6858978271484375, 0.1956787109375, 3.3679447174072266, 2.845155715942383, 1.6022796630859375, -1.5492897033691406, 7.2345123291015625, 2.0859012603759766, 2.96038818359375, 1.7535552978515625, 10.422393798828125, 3.6663894653320312, -6.274169921875, 4.630802154541016, 1.4327621459960938, 0.776519775390625, 0.5687713623046875, 4.704559326171875, 1.6186065673828125, 2.086688995361328, 1.397247314453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000048.npy"} +{"epoch": 0.07048458149779736, "step": 49, "batch_size": 64, "mean": 3.106010675430298, "std": 3.1618149280548096, "min": -3.00469970703125, "p10": 0.08932342529296888, "median": 2.67901611328125, "p90": 7.0016098022460955, "max": 12.861572265625, "pos_frac": 0.90625, "sample": [2.6644821166992188, 0.6275634765625, 0.2225360870361328, 3.1815032958984375, 2.800924301147461, 4.661121368408203, 0.5832366943359375, 1.9867210388183594, -0.9292449951171875, 0.62567138671875, 0.24828147888183594, 1.5381546020507812, 1.8750457763671875, 7.582313537597656, 6.262992858886719, 2.7088470458984375, 3.5463294982910156, 1.6248207092285156, 2.5038223266601562, 1.1536102294921875, 2.179107666015625, -2.592184066772461, 3.05377197265625, 0.03223228454589844, 10.81707763671875, 8.347000122070312, 0.5868988037109375, 5.275138854980469, 3.89569091796875, 0.3891105651855469, 6.625205993652344, 12.861572265625, 3.9819564819335938, 0.9897613525390625, -1.2103500366210938, -3.00469970703125, 3.1585960388183594, 3.350025177001953, 3.8369903564453125, 4.689746856689453, 2.518625259399414, -0.12350845336914062, 0.261810302734375, 4.0593414306640625, 4.370506286621094, 11.59136962890625, 2.5185813903808594, 1.832122802734375, 7.162925720214844, 6.2316436767578125, 4.8246612548828125, 2.6935501098632812, 4.55242919921875, 4.381309509277344, 10.767333984375, 0.41550445556640625, -0.697906494140625, 2.5496749877929688, 1.4276599884033203, 4.007164001464844, 4.340972900390625, 1.7870712280273438, 3.983489990234375, 0.5969696044921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000049.npy"} +{"epoch": 0.07195301027900147, "step": 50, "batch_size": 64, "mean": 3.4672958850860596, "std": 3.172093629837036, "min": -2.098276138305664, "p10": -0.3283157348632812, "median": 2.6837759017944336, "p90": 7.828643035888673, "max": 11.830322265625, "pos_frac": 0.84375, "sample": [6.118438720703125, -0.42040252685546875, -0.28179931640625, 8.312702178955078, 1.0746994018554688, 2.22637939453125, 6.263160705566406, 1.5500984191894531, 3.704761505126953, 1.7718839645385742, -0.3482513427734375, 6.560089111328125, 2.719451904296875, 11.5499267578125, 0.8858184814453125, 2.445068359375, 6.0536956787109375, 2.648099899291992, 8.51153564453125, 3.0682373046875, 0.6090259552001953, 4.5501861572265625, 7.652900695800781, 2.2680511474609375, 2.094165802001953, 4.329006195068359, -0.4041175842285156, -0.44062042236328125, 2.9274673461914062, 7.903961181640625, 5.607086181640625, 4.7144317626953125, 8.57071304321289, 11.830322265625, -2.098276138305664, 0.5778484344482422, 1.15985107421875, 1.9690933227539062, 0.1652545928955078, 3.1579513549804688, 2.2338027954101562, 1.962982177734375, 5.832763671875, 2.9759140014648438, 6.489727020263672, 6.190704345703125, -0.17104339599609375, 1.7848548889160156, 4.6627349853515625, -0.10736083984375, 5.8458251953125, 2.5570526123046875, 4.277622222900391, 5.004127502441406, 2.376110076904297, 5.142967224121094, 6.46405029296875, 2.270294189453125, 10.071388244628906, -0.7724800109863281, 5.7305145263671875, 1.1872024536132812, -1.7078704833984375, 0.04716300964355469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000050.npy"} +{"epoch": 0.07342143906020558, "step": 51, "batch_size": 64, "mean": 3.4574432373046875, "std": 4.225892066955566, "min": -4.7260894775390625, "p10": -0.5537408828735351, "median": 2.3147716522216797, "p90": 8.899594879150392, "max": 16.56494140625, "pos_frac": 0.8125, "sample": [8.433036804199219, 4.033821105957031, 1.1139850616455078, -0.4462471008300781, 0.891754150390625, 0.384735107421875, 7.78387451171875, -0.8133144378662109, 1.63525390625, -0.0571441650390625, 9.311767578125, 6.775768280029297, 1.9082374572753906, 3.749786376953125, 2.029470443725586, 4.090484619140625, 3.1354446411132812, -0.5998096466064453, 8.663909912109375, 5.159908294677734, 3.7125282287597656, -1.034912109375, 0.031948089599609375, 4.016387939453125, 1.96160888671875, 6.813358306884766, 0.6442489624023438, 4.819217681884766, 0.8887519836425781, 4.520851135253906, 16.56494140625, 2.476245880126953, 0.8954505920410156, 4.770408630371094, 2.8694324493408203, 12.327987670898438, 1.2663192749023438, 13.145462036132812, 4.8486175537109375, 1.8034687042236328, 2.1532974243164062, 1.2331619262695312, 3.271881103515625, 9.000602722167969, 1.1904106140136719, 0.24634933471679688, 5.525871276855469, -3.3704833984375, -0.3130207061767578, 14.812179565429688, 12.748245239257812, 6.2922515869140625, 6.369556427001953, -0.6370468139648438, 1.1324481964111328, 5.662864685058594, 0.1133270263671875, -0.05405426025390625, 2.1391754150390625, -4.7260894775390625, 2.749664306640625, -1.23480224609375, 2.5347366333007812, -0.09121322631835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000051.npy"} +{"epoch": 0.07488986784140969, "step": 52, "batch_size": 64, "mean": 5.342073440551758, "std": 4.270158290863037, "min": -2.2415924072265625, "p10": 0.6730659484863284, "median": 4.45838737487793, "p90": 11.044255828857423, "max": 18.782638549804688, "pos_frac": 0.953125, "sample": [1.4699058532714844, 11.164192199707031, 4.462562561035156, 10.764404296875, 6.995216369628906, 8.786972045898438, 2.36419677734375, 6.32122802734375, 8.20611572265625, 12.08563232421875, 0.3324737548828125, 11.26324462890625, 5.171352386474609, 5.065666198730469, 7.324594497680664, 7.604581832885742, 2.3317337036132812, 4.253852844238281, 15.017105102539062, 8.391799926757812, 3.2185497283935547, 0.9737777709960938, 10.292648315429688, 2.4985408782958984, 0.05667686462402344, 6.353721618652344, 2.4054031372070312, 1.6717472076416016, 8.016836166381836, 18.782638549804688, 8.28912353515625, 2.0989856719970703, 9.221229553222656, 4.243061065673828, 3.917064666748047, 11.914886474609375, 4.161388397216797, 0.544189453125, 4.071434020996094, -2.0187606811523438, -2.2415924072265625, 1.9023208618164062, 6.625343322753906, 5.566764831542969, 1.7009963989257812, 8.038314819335938, 4.454212188720703, 10.05780029296875, 6.418743133544922, 1.604879379272461, 0.2956390380859375, 3.222503662109375, 2.8903541564941406, 4.484245300292969, 2.9068832397460938, 8.941539764404297, 4.972553253173828, 1.3052444458007812, 15.689437866210938, -0.7672576904296875, 5.479408264160156, 4.359920501708984, 2.735492706298828, 1.1590023040771484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000052.npy"} +{"epoch": 0.0763582966226138, "step": 53, "batch_size": 64, "mean": 5.282422065734863, "std": 5.726108074188232, "min": -3.1252670288085938, "p10": 0.2155189514160159, "median": 3.555267333984375, "p90": 13.283502960205078, "max": 24.554946899414062, "pos_frac": 0.90625, "sample": [-1.2458305358886719, 2.8748779296875, 3.0995864868164062, 1.509368896484375, 0.9586944580078125, 3.5723190307617188, 11.708984375, -0.32977294921875, 7.144538879394531, -1.622467041015625, 1.7731857299804688, 24.526611328125, -2.2942428588867188, 0.6493988037109375, 6.0348358154296875, 6.788951873779297, 2.155487060546875, 9.318801879882812, 2.2791671752929688, 11.124160766601562, 4.207523345947266, 4.5419158935546875, 11.242401123046875, 6.265071868896484, 13.789909362792969, 0.8279571533203125, 4.6675262451171875, 0.5057525634765625, 5.057735443115234, 1.632223129272461, 16.470489501953125, 7.439510345458984, 6.303127288818359, 8.099090576171875, 3.4083518981933594, 0.09113311767578125, 0.8756561279296875, 3.7122154235839844, 1.1455001831054688, 4.269746780395508, 3.1815032958984375, 6.782899856567383, 1.5660686492919922, 13.045585632324219, -3.1252670288085938, 16.067001342773438, 16.1153564453125, 3.5382156372070312, 3.908416748046875, 2.937490463256836, 1.985769271850586, 3.74774169921875, 2.4104232788085938, 0.87872314453125, 3.4529647827148438, 4.150058746337891, 2.8480052947998047, 24.554946899414062, 2.227588653564453, 12.23919677734375, 4.435878753662109, 3.2766494750976562, 13.385467529296875, -0.11518096923828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000053.npy"} +{"epoch": 0.07782672540381791, "step": 54, "batch_size": 64, "mean": 4.68695068359375, "std": 4.391002178192139, "min": -0.9171085357666016, "p10": 0.4863510131835938, "median": 3.727235794067383, "p90": 10.527245330810548, "max": 19.796707153320312, "pos_frac": 0.9375, "sample": [6.206634521484375, 4.5679931640625, 7.9571533203125, 0.6313095092773438, 0.7806835174560547, 3.0215530395507812, 1.0548477172851562, 2.2219104766845703, 5.3105010986328125, 7.710605621337891, 1.0287284851074219, 4.593345642089844, 10.590103149414062, 1.959066390991211, 3.651355743408203, 10.380577087402344, -0.37940406799316406, 4.1916351318359375, 4.97089958190918, 5.300331115722656, -0.17218780517578125, 1.1333236694335938, 14.592720031738281, 0.7683258056640625, 6.831260681152344, 0.45357513427734375, 4.316310882568359, 4.5063629150390625, 5.384925842285156, 5.578765869140625, 1.4474124908447266, 0.0836944580078125, 7.066169738769531, -0.20058822631835938, 0.6688365936279297, 3.2072677612304688, 11.583045959472656, 3.2788543701171875, 4.751190185546875, 12.413749694824219, 1.3858203887939453, 2.0055007934570312, 19.796707153320312, 9.8756103515625, -0.9171085357666016, 2.5343170166015625, 0.14381790161132812, 11.564559936523438, 0.5628280639648438, 3.8031158447265625, 1.39404296875, 4.3717193603515625, 9.189506530761719, 1.3123397827148438, 2.54052734375, 15.841583251953125, 1.2464447021484375, 4.087833404541016, 3.0759658813476562, 10.123016357421875, 2.228565216064453, 9.84783935546875, 1.0555706024169922, 9.451881408691406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000054.npy"} +{"epoch": 0.07929515418502203, "step": 55, "batch_size": 64, "mean": 5.594723701477051, "std": 6.283792495727539, "min": -9.2359619140625, "p10": -1.0808685302734373, "median": 5.042801856994629, "p90": 14.160139465332032, "max": 24.687164306640625, "pos_frac": 0.828125, "sample": [7.104835510253906, 8.290130615234375, 5.148384094238281, -3.5238800048828125, 20.868988037109375, -1.9232177734375, 6.758319854736328, 3.752086639404297, 14.349502563476562, 0.21209716796875, 5.733222961425781, 5.600101470947266, 1.5887680053710938, 9.83367919921875, 17.27435302734375, 8.500045776367188, 4.101615905761719, 8.517402648925781, 24.687164306640625, 3.6086082458496094, 10.484634399414062, -1.4562530517578125, 4.023979187011719, 7.157722473144531, 21.531448364257812, -0.12837982177734375, 13.8868408203125, 0.515350341796875, 0.36980628967285156, 7.016849517822266, 3.9897689819335938, -1.477569580078125, 5.133430480957031, 0.24317169189453125, 9.991363525390625, 4.123710632324219, -1.1470108032226562, 0.3438568115234375, 0.6917781829833984, -4.192924499511719, 8.661724090576172, 2.556612014770508, 1.85540771484375, 13.288505554199219, 6.4734954833984375, -0.9265365600585938, 1.167337417602539, 9.2353515625, 4.952173233032227, -9.2359619140625, 2.4984493255615234, 14.277267456054688, 6.092315673828125, 3.4688758850097656, 6.560405731201172, 5.142240524291992, -0.7726421356201172, 8.241485595703125, 6.753135681152344, 12.356880187988281, 4.7299346923828125, -0.21563720703125, 4.680107116699219, 14.637588500976562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000055.npy"} +{"epoch": 0.08076358296622614, "step": 56, "batch_size": 64, "mean": 5.401305675506592, "std": 6.240213394165039, "min": -8.274402618408203, "p10": -1.285307693481445, "median": 5.024249076843262, "p90": 13.52677154541016, "max": 23.052413940429688, "pos_frac": 0.796875, "sample": [-0.894775390625, 6.329704284667969, -3.18389892578125, 7.581382751464844, -0.3791389465332031, -8.274402618408203, 9.371391296386719, -1.4526786804199219, 3.2925643920898438, -3.2775115966796875, 3.1656875610351562, 2.055583953857422, 5.459739685058594, 13.954681396484375, 2.178396224975586, -0.1470489501953125, -0.38501739501953125, 9.882522583007812, 9.18198013305664, 7.390625, -2.1727218627929688, 9.716842651367188, 2.8251571655273438, 20.2235107421875, 7.9698638916015625, 13.83990478515625, 1.1342334747314453, 0.384613037109375, 8.44906234741211, 2.493558883666992, 6.6365509033203125, 6.986396789550781, 8.304832458496094, 1.5541763305664062, 9.795295715332031, 10.894779205322266, 0.4507865905761719, 17.075592041015625, 6.927577972412109, 6.040962219238281, 5.319330215454102, 4.421543121337891, 2.9780826568603516, 17.64276123046875, 5.973564147949219, 11.971328735351562, 5.729541778564453, 10.28369140625, 0.5908737182617188, 23.052413940429688, 1.9586029052734375, -0.15682601928710938, 3.8636741638183594, 12.796127319335938, -6.1269378662109375, 2.8462352752685547, -0.5472564697265625, 19.733078002929688, 4.729167938232422, -2.615631103515625, 3.7771224975585938, 1.9891853332519531, 5.7949371337890625, 8.298187255859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000056.npy"} +{"epoch": 0.08223201174743025, "step": 57, "batch_size": 64, "mean": 5.9908952713012695, "std": 5.517789363861084, "min": -8.17169189453125, "p10": 0.06918010711669925, "median": 5.874476432800293, "p90": 13.257407760620117, "max": 20.555191040039062, "pos_frac": 0.90625, "sample": [7.528026580810547, 3.3008785247802734, 0.10302734375, -1.3163299560546875, 9.198089599609375, -8.17169189453125, 10.337295532226562, 0.3446521759033203, 13.13714599609375, 6.216053009033203, 14.70745849609375, 3.361848831176758, 20.555191040039062, 2.9178924560546875, 6.494209289550781, 5.2714996337890625, -1.1713829040527344, 2.5698318481445312, 6.1651153564453125, 18.660964965820312, 10.5634765625, 14.227813720703125, 10.37451171875, 3.6066665649414062, 6.039863586425781, 1.3224201202392578, 13.308948516845703, 5.709089279174805, 3.7193775177001953, 2.1890411376953125, 0.8462066650390625, 4.109804153442383, 2.9964523315429688, 0.05467414855957031, 6.456634521484375, 6.56146240234375, -0.1719512939453125, 1.2735137939453125, 2.6796188354492188, 8.055381774902344, 10.190338134765625, 1.9456615447998047, 9.010322570800781, 4.964881896972656, 7.413177490234375, -2.26702880859375, 8.614349365234375, 6.353862762451172, 9.25494384765625, 3.169870376586914, 2.387605667114258, 15.732192993164062, 4.9757232666015625, 2.6106338500976562, 8.063217163085938, 4.2883758544921875, 0.9016780853271484, 11.307220458984375, 10.276786804199219, -5.2220458984375, 12.670578002929688, 17.602920532226562, 7.02302360534668, 8.016227722167969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000057.npy"} +{"epoch": 0.08370044052863436, "step": 58, "batch_size": 64, "mean": 6.841357231140137, "std": 7.0269622802734375, "min": -7.783966064453125, "p10": -0.14983768463134756, "median": 5.293179512023926, "p90": 16.892082977294933, "max": 25.17742919921875, "pos_frac": 0.875, "sample": [14.053573608398438, -1.3966808319091797, 0.6813087463378906, 4.406871795654297, 3.6741104125976562, 3.796537399291992, 18.758018493652344, 3.2460250854492188, 13.260627746582031, 2.8306732177734375, 5.684436798095703, 7.0430145263671875, 7.4449005126953125, 3.740978240966797, 2.1091537475585938, 12.841110229492188, 9.978927612304688, 8.900043487548828, 4.901922225952148, 2.789308547973633, 6.989967346191406, 13.911617279052734, 13.265754699707031, -1.2629852294921875, 11.119247436523438, 2.22735595703125, 8.472953796386719, 6.53892707824707, 21.054771423339844, 3.953001022338867, 25.17742919921875, 7.687244415283203, 6.080600738525391, -0.1931781768798828, 14.439353942871094, 1.0703754425048828, 13.290916442871094, 3.4192657470703125, 1.9995651245117188, -1.773681640625, 1.7152976989746094, 2.5392532348632812, 21.312240600585938, -1.9584274291992188, 9.860565185546875, -0.048709869384765625, 1.1014747619628906, 6.76556396484375, -4.61773681640625, 21.933929443359375, 1.9952468872070312, 5.98419189453125, 0.8616352081298828, 13.572589874267578, 10.66207504272461, -7.783966064453125, 21.959442138671875, 1.511627197265625, 2.805206298828125, 6.89837646484375, 13.960273742675781, 1.6928424835205078, 0.9672470092773438, 17.943252563476562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000058.npy"} +{"epoch": 0.08516886930983847, "step": 59, "batch_size": 64, "mean": 7.2461676597595215, "std": 7.300617218017578, "min": -6.076690673828125, "p10": -0.433093643188476, "median": 6.3211517333984375, "p90": 18.495296478271488, "max": 30.740615844726562, "pos_frac": 0.890625, "sample": [8.125591278076172, 7.9040679931640625, 14.445068359375, 19.02667236328125, 15.469947814941406, 3.890869140625, 30.740615844726562, 19.559112548828125, 6.935672760009766, 15.028350830078125, 0.9504203796386719, 7.835361480712891, -3.2321128845214844, 2.2710704803466797, 20.290008544921875, -0.6845512390136719, 2.2743043899536133, 13.524795532226562, 6.34893798828125, 18.872573852539062, 0.8730697631835938, 2.0139923095703125, 1.71502685546875, 1.2579765319824219, 3.4227981567382812, 22.137725830078125, 2.3172779083251953, 1.9368000030517578, 7.514019012451172, -6.076690673828125, 9.350746154785156, -1.8889884948730469, 7.217643737792969, 7.303398132324219, 8.936241149902344, 17.61498260498047, 2.4818572998046875, 11.640335083007812, 6.81829833984375, 3.613861083984375, -5.231971740722656, -2.646575927734375, 5.62397575378418, 0.1536407470703125, 3.9578609466552734, 9.648406982421875, 7.348600387573242, 2.719451904296875, 7.314674377441406, 2.5456886291503906, 2.319011688232422, 6.094268798828125, 2.995016098022461, 8.865825653076172, 5.440692901611328, 13.591033935546875, 9.198688507080078, 3.745006561279297, 2.681976318359375, -1.953125, 16.485816955566406, 17.354171752929688, 6.293365478515625, 19.43206787109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000059.npy"} +{"epoch": 0.08663729809104258, "step": 60, "batch_size": 64, "mean": 5.918500900268555, "std": 6.486839294433594, "min": -13.40045166015625, "p10": -1.2171134948730467, "median": 6.338956832885742, "p90": 12.138288879394532, "max": 24.89936065673828, "pos_frac": 0.8125, "sample": [7.357627868652344, -2.7562103271484375, 1.542245864868164, 11.600959777832031, 0.6374778747558594, 3.207683563232422, 10.967620849609375, 7.699211120605469, 1.5421295166015625, -13.40045166015625, 6.586397171020508, -1.028564453125, 1.9801559448242188, 4.071788787841797, 10.746337890625, 6.08538818359375, 5.32269287109375, 10.173324584960938, 3.7440032958984375, 9.7618408203125, 13.306175231933594, 11.831085205078125, 17.149322509765625, 8.802734375, 8.198944091796875, 7.697257995605469, -0.5550689697265625, 6.075408935546875, 9.859512329101562, -0.027177810668945312, 8.85525894165039, 17.733856201171875, 4.272941589355469, 6.2181243896484375, 7.221965789794922, -1.5331573486328125, 10.02728271484375, 8.933258056640625, 1.5339508056640625, -4.9857330322265625, 11.848007202148438, 2.8887977600097656, 5.555091857910156, 7.969200134277344, 3.6063613891601562, -0.902923583984375, -9.575645446777344, 15.565841674804688, 18.682296752929688, 11.014678955078125, 12.2626953125, 2.579132080078125, -1.8188285827636719, -1.2979202270507812, 10.901872634887695, 6.459789276123047, 24.89936065673828, 8.391525268554688, 1.8313217163085938, 9.118215560913086, -1.0222244262695312, 7.8137359619140625, 4.468107223510742, 1.0879573822021484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000060.npy"} +{"epoch": 0.0881057268722467, "step": 61, "batch_size": 64, "mean": 7.234993934631348, "std": 9.665848731994629, "min": -10.121078491210938, "p10": -1.0125953674316406, "median": 5.258674621582031, "p90": 16.99438934326172, "max": 54.455108642578125, "pos_frac": 0.828125, "sample": [2.597055435180664, 1.9797630310058594, -0.9979782104492188, 2.3847198486328125, 13.497146606445312, 0.07498359680175781, 0.5705909729003906, 7.312076568603516, -0.8020401000976562, 26.378997802734375, 20.565902709960938, 18.272674560546875, 25.645263671875, 12.769142150878906, 7.810066223144531, 9.252742767333984, -1.4332275390625, 1.8917865753173828, 11.660655975341797, 11.035316467285156, 3.3927230834960938, 5.305728912353516, 15.93487548828125, 8.260944366455078, 6.087913513183594, 6.811574935913086, 5.211620330810547, 5.1347198486328125, 8.293228149414062, 8.55078125, 0.056884765625, -0.1553955078125, 13.123603820800781, 16.472442626953125, 15.4803466796875, 21.206222534179688, 17.072433471679688, 1.393829345703125, -1.378509521484375, 12.664535522460938, 3.0389633178710938, 0.11977577209472656, 11.893875122070312, 2.7593727111816406, 1.2110729217529297, 16.812286376953125, 1.3246917724609375, 3.058856964111328, 54.455108642578125, 12.529403686523438, 11.927375793457031, -0.8235702514648438, -10.121078491210938, 11.705703735351562, 2.4241714477539062, -4.948997497558594, 5.533332824707031, 3.8033370971679688, -6.927131652832031, 2.2256927490234375, 6.558258056640625, -5.4430999755859375, -1.01885986328125, 1.5549449920654297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000061.npy"} +{"epoch": 0.08957415565345081, "step": 62, "batch_size": 64, "mean": 5.499805450439453, "std": 6.936376094818115, "min": -8.996490478515625, "p10": -3.003179740905761, "median": 5.13128662109375, "p90": 14.154901123046878, "max": 28.890579223632812, "pos_frac": 0.84375, "sample": [9.003116607666016, 1.6455764770507812, 0.7761669158935547, 10.323074340820312, 6.068868637084961, 10.900482177734375, 6.562097549438477, 1.0193099975585938, 9.349903106689453, -5.1210479736328125, 14.410964965820312, 6.057609558105469, 1.6878089904785156, -0.0146484375, 8.993354797363281, 3.3260040283203125, 8.367385864257812, 5.361785888671875, 0.1278858184814453, 5.016670227050781, 6.46234130859375, 7.9366302490234375, 2.15887451171875, 5.3757476806640625, -3.8869361877441406, 8.1944580078125, 4.265529632568359, 8.010459899902344, -0.2246856689453125, 28.890579223632812, 16.691795349121094, 8.456916809082031, 5.882011413574219, 2.148487091064453, 1.84344482421875, 5.137208938598633, 0.8885040283203125, 17.214065551757812, -8.996490478515625, -3.4571990966796875, -5.567962646484375, 0.7689971923828125, 1.7340965270996094, 0.9676780700683594, 1.6198654174804688, 8.594802856445312, 16.041057586669922, 9.489139556884766, -3.3240909576416016, 13.557418823242188, 7.954708099365234, 5.125364303588867, -5.48211669921875, 3.6958446502685547, 12.654045104980469, 3.7667465209960938, 3.1468658447265625, 4.196601867675781, 25.408279418945312, 19.344924926757812, 6.805614471435547, 0.5625190734863281, -2.2543869018554688, 6.3274383544921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000062.npy"} +{"epoch": 0.09104258443465492, "step": 63, "batch_size": 64, "mean": 7.43485164642334, "std": 7.748790264129639, "min": -12.339393615722656, "p10": -0.5918756484985345, "median": 6.6623382568359375, "p90": 16.004951477050785, "max": 31.493316650390625, "pos_frac": 0.890625, "sample": [24.23431396484375, 7.519203186035156, 4.742738723754883, 8.539382934570312, 3.8345947265625, 1.3347930908203125, 4.507438659667969, -6.236824035644531, 5.747215270996094, 6.350563049316406, -12.339393615722656, 14.681617736816406, 8.640708923339844, 7.121337890625, 2.881816864013672, -0.8816051483154297, 16.4527587890625, 4.45867919921875, 12.641952514648438, 20.511940002441406, 12.194160461425781, 14.088623046875, 6.0806732177734375, 0.08415985107421875, -1.110443115234375, 11.849573135375977, -4.0507965087890625, 0.18497657775878906, 4.8161163330078125, 3.1766223907470703, 11.362747192382812, 7.466850280761719, 6.974113464355469, 9.316669464111328, 10.71197509765625, 11.058425903320312, 13.605865478515625, 13.590217590332031, 5.225292205810547, 2.1175613403320312, 0.1864776611328125, 14.960067749023438, 2.362062454223633, 5.944160461425781, -9.474884033203125, 12.911916732788086, 7.1904144287109375, 1.4167633056640625, 2.63201904296875, 6.308837890625, 2.47760009765625, 13.363876342773438, 16.982498168945312, 8.055606842041016, 17.372879028320312, 1.0458984375, 8.574163436889648, 31.493316650390625, -3.312164306640625, 14.626800537109375, 5.10992431640625, 25.587623596191406, 5.616004943847656, 10.912010192871094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000063.npy"} +{"epoch": 0.09251101321585903, "step": 64, "batch_size": 64, "mean": 8.647798538208008, "std": 9.687501907348633, "min": -15.902725219726562, "p10": -0.5983268737792966, "median": 7.064637184143066, "p90": 20.69824676513672, "max": 45.84747314453125, "pos_frac": 0.8125, "sample": [0.5482559204101562, 17.798309326171875, 25.020370483398438, 7.587436676025391, 5.377939224243164, 11.613136291503906, 3.248392105102539, 15.307937622070312, -0.7014026641845703, 6.535608291625977, 1.8784904479980469, 10.280288696289062, 1.1004905700683594, 5.059196472167969, 11.267829895019531, 29.569717407226562, 4.2610931396484375, -1.539459228515625, 17.565887451171875, 7.131200790405273, 11.149417877197266, -2.7390518188476562, 27.9649658203125, 8.099987030029297, -0.14403724670410156, 6.284080505371094, -0.7950839996337891, 6.594072341918945, -15.902725219726562, -0.23607254028320312, 7.490806579589844, -0.31134796142578125, 45.84747314453125, 5.341520309448242, 1.9637012481689453, 9.67251205444336, 16.676071166992188, 20.236190795898438, 10.755401611328125, 6.113658905029297, 6.0205078125, 22.59807586669922, 10.574729919433594, 8.247352600097656, -0.34857177734375, -8.972000122070312, 23.737335205078125, 3.5614013671875, 20.896270751953125, 0.512237548828125, 13.81268310546875, 12.791412353515625, 12.320640563964844, -1.4083175659179688, 12.476203918457031, -0.3578166961669922, 5.336067199707031, 6.998073577880859, 13.070571899414062, 12.223068237304688, 1.7231674194335938, 11.227142333984375, 18.64368438720703, 4.80291748046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000064.npy"} +{"epoch": 0.09397944199706314, "step": 65, "batch_size": 64, "mean": 8.72087287902832, "std": 9.570690155029297, "min": -21.67119598388672, "p10": -0.9569469451904293, "median": 7.41032600402832, "p90": 21.971057891845707, "max": 30.466827392578125, "pos_frac": 0.84375, "sample": [30.466827392578125, 24.398056030273438, 16.84210205078125, 13.427871704101562, 5.40620231628418, 8.901473999023438, 0.7904815673828125, 12.853523254394531, 3.2370471954345703, 20.11968231201172, 15.117576599121094, 1.4248504638671875, -0.3220329284667969, 22.394775390625, 26.645278930664062, 1.0547294616699219, 9.800048828125, 2.5714664459228516, -4.562141418457031, 4.8486785888671875, 1.0724849700927734, 17.210952758789062, 5.9335479736328125, -21.67119598388672, 16.076934814453125, 8.403701782226562, -3.6001129150390625, 7.430339813232422, 12.955520629882812, 4.953624725341797, 5.9239044189453125, 6.532413482666016, -2.180828094482422, 5.048667907714844, 27.235336303710938, 7.455623626708984, 20.982383728027344, 15.236900329589844, 24.88452911376953, 7.390312194824219, -1.0902099609375, -11.140289306640625, 9.023025512695312, 2.1015548706054688, 7.107513427734375, 17.676589965820312, 23.53333282470703, -1.0811691284179688, 5.412689208984375, 19.134239196777344, 12.140472412109375, 4.207366943359375, 19.341354370117188, -0.6670951843261719, 11.52703857421875, 7.55389404296875, 5.742988586425781, 1.8737030029296875, 17.117767333984375, 11.019287109375, -0.2923164367675781, 7.934133529663086, 0.1371917724609375, 7.131307601928711], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000065.npy"} +{"epoch": 0.09544787077826726, "step": 66, "batch_size": 64, "mean": 8.126091003417969, "std": 9.668362617492676, "min": -8.4381103515625, "p10": -0.7879472732543942, "median": 6.606025695800781, "p90": 19.169573593139653, "max": 45.73918151855469, "pos_frac": 0.84375, "sample": [7.4155731201171875, 45.73918151855469, 1.4171981811523438, 14.6307373046875, 13.282318115234375, 11.64923095703125, 1.9528541564941406, -0.15094757080078125, 10.240056991577148, 6.5942840576171875, -7.453155517578125, 2.3410816192626953, 5.998233795166016, -0.9534912109375, 12.207660675048828, 3.3265304565429688, 4.450613021850586, 0.15003013610839844, 2.1835670471191406, 19.465438842773438, 7.503379821777344, 10.230422973632812, 0.5877113342285156, 10.603385925292969, 16.922706604003906, 2.5684432983398438, 3.7482757568359375, -2.651885986328125, -3.9842300415039062, -8.4381103515625, 9.537971496582031, 17.624855041503906, 18.47922134399414, 7.320396423339844, 3.2715320587158203, 11.39520263671875, 6.617767333984375, -0.40167808532714844, 2.00067138671875, 14.094474792480469, 10.204109191894531, 3.4092941284179688, -0.26153564453125, 7.473030090332031, 5.767023086547852, -7.7455902099609375, 9.878868103027344, 1.884664535522461, -2.888763427734375, 13.952812194824219, 3.8601226806640625, 20.360122680664062, 26.265426635742188, 21.123023986816406, 4.358848571777344, 17.35022735595703, 1.6889228820800781, 7.717342376708984, 15.066520690917969, 2.4561824798583984, 32.522430419921875, 1.4632186889648438, 29.74249267578125, 12.903533935546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000066.npy"} +{"epoch": 0.09691629955947137, "step": 67, "batch_size": 64, "mean": 8.405787467956543, "std": 9.644529342651367, "min": -12.236480712890625, "p10": -1.0120384216308589, "median": 6.348476409912109, "p90": 20.069532012939455, "max": 45.02313232421875, "pos_frac": 0.859375, "sample": [21.604873657226562, 3.2599945068359375, 21.428436279296875, 1.2414398193359375, 2.2302207946777344, -0.40283203125, 15.406295776367188, 5.383354187011719, -6.2746429443359375, 2.3457794189453125, 17.016338348388672, 9.605018615722656, 3.7919387817382812, 15.874488830566406, -0.5295333862304688, -4.499063491821289, 10.192535400390625, 8.664985656738281, 1.3498611450195312, -12.236480712890625, 20.046722412109375, 8.674118041992188, 0.23089599609375, 0.5065460205078125, 5.2785186767578125, 2.62591552734375, 20.079307556152344, 6.660734176635742, -1.2188262939453125, 6.8415069580078125, 4.2195281982421875, 13.845218658447266, 6.5012054443359375, 2.3004283905029297, 2.5805206298828125, 9.813240051269531, 14.815679550170898, 5.693117141723633, 1.444122314453125, -5.2248382568359375, 4.903388977050781, 17.507659912109375, 4.2989501953125, 3.5573043823242188, 26.3236083984375, 14.8209228515625, 6.725593566894531, 17.542648315429688, 13.943870544433594, 14.075119018554688, 12.621345520019531, 5.367536544799805, 11.873052597045898, 31.924026489257812, 6.195747375488281, 1.3681964874267578, 45.02313232421875, 3.4608497619628906, -2.4450531005859375, 24.97303009033203, 19.245452880859375, 6.878856658935547, -1.9604034423828125, 8.578893661499023], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000067.npy"} +{"epoch": 0.09838472834067548, "step": 68, "batch_size": 64, "mean": 8.072154998779297, "std": 8.153580665588379, "min": -6.998992919921875, "p10": 0.3214328765869142, "median": 5.979337692260742, "p90": 19.700840759277344, "max": 36.738006591796875, "pos_frac": 0.90625, "sample": [-3.6640625, 3.652801513671875, 23.621551513671875, 7.900733947753906, 5.293611526489258, -5.7846527099609375, 8.975982666015625, 22.6048583984375, 0.6870956420898438, 0.4654121398925781, 14.175018310546875, 15.27362060546875, 16.293479919433594, 11.702301025390625, 10.895004272460938, 0.25972747802734375, 5.895664215087891, 12.24459457397461, 3.3386306762695312, 8.350082397460938, -0.36865997314453125, 4.828617095947266, 5.336675643920898, 3.617431640625, 7.124357223510742, 13.16754150390625, 18.457046508789062, 8.25030517578125, 8.555416107177734, 19.207916259765625, 19.912094116210938, 21.684139251708984, 1.3344955444335938, 6.205635070800781, 9.185264587402344, 1.2225379943847656, -0.21123123168945312, 5.577281951904297, 2.389312744140625, 4.8373565673828125, 16.480838775634766, 2.320476531982422, 10.546075820922852, -6.998992919921875, 6.669517517089844, 6.063011169433594, 3.807188034057617, 21.862464904785156, 2.6302413940429688, 1.623983383178711, 1.8057308197021484, 36.738006591796875, 24.90520477294922, 16.833831787109375, 12.512321472167969, 1.6516189575195312, 2.026599884033203, 5.029022216796875, 11.394081115722656, -3.0075607299804688, 4.580535888671875, 2.3401145935058594, 4.8917694091796875, 7.416877746582031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000068.npy"} +{"epoch": 0.09985315712187959, "step": 69, "batch_size": 64, "mean": 9.67041301727295, "std": 9.418717384338379, "min": -4.722747802734375, "p10": 0.0623584747314459, "median": 7.1080474853515625, "p90": 20.281210327148443, "max": 44.1767578125, "pos_frac": 0.890625, "sample": [4.4684906005859375, 6.465248107910156, 30.120574951171875, 8.937858581542969, 12.120025634765625, 15.844223022460938, -4.09429931640625, -0.5853118896484375, -0.19494247436523438, 3.73406982421875, 5.9187774658203125, 8.81119155883789, -0.6080322265625, 8.472541809082031, 7.3455657958984375, 14.54812240600586, 25.194732666015625, 6.8705291748046875, 16.350540161132812, 15.438972473144531, -0.2808876037597656, 23.747177124023438, 12.977874755859375, 10.494316101074219, 18.37139129638672, 13.622344970703125, 4.451881408691406, 16.06311798095703, 13.044410705566406, 5.219278335571289, 1.0956459045410156, 0.8082466125488281, 19.082977294921875, 44.1767578125, 4.7476959228515625, 19.067306518554688, 5.495124816894531, 16.570419311523438, 16.198516845703125, 1.098541259765625, 1.6050033569335938, 16.353363037109375, 11.65713119506836, 1.2287826538085938, -0.7091197967529297, 14.70095443725586, 6.318058013916016, 35.88185119628906, 4.1750030517578125, 7.7617950439453125, 6.451959609985352, -4.722747802734375, 5.191642761230469, 3.0335311889648438, 2.0637969970703125, 7.3499298095703125, 20.79473876953125, 21.328826904296875, 1.126129150390625, 18.326004028320312, 1.7182979583740234, 1.3261795043945312, 4.071573257446289, 0.6627273559570312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000069.npy"} +{"epoch": 0.1013215859030837, "step": 70, "batch_size": 64, "mean": 10.396703720092773, "std": 11.003621101379395, "min": -8.4107666015625, "p10": -1.315313339233398, "median": 10.282015800476074, "p90": 25.77352981567384, "max": 40.713653564453125, "pos_frac": 0.84375, "sample": [6.428897857666016, 0.6703338623046875, 11.002208709716797, 19.273094177246094, 22.232864379882812, 12.152116775512695, -7.4710693359375, -1.5034027099609375, 13.275657653808594, 3.963357925415039, -2.3240280151367188, 12.771682739257812, -2.7922935485839844, 7.983978271484375, 37.64056396484375, 1.6192893981933594, 10.767333984375, 27.814071655273438, 10.787099838256836, 40.713653564453125, 11.644287109375, 10.231094360351562, 2.04412841796875, 29.138031005859375, 0.35724639892578125, 17.29912567138672, 19.07410430908203, 13.48277473449707, -2.133970260620117, 6.899261474609375, 5.860191345214844, 26.901145935058594, 2.198272705078125, 1.0438880920410156, 0.9333267211914062, -0.31858062744140625, 13.420032501220703, 3.0422821044921875, 11.263755798339844, -1.7005367279052734, 3.2923736572265625, 4.054994583129883, -0.7750396728515625, 2.265960693359375, 10.332937240600586, 1.372772216796875, 19.607139587402344, 23.142425537109375, -8.4107666015625, 5.110801696777344, 18.126487731933594, 2.8402481079101562, 1.7899208068847656, 31.935714721679688, 13.03537368774414, -0.8764381408691406, 34.9466552734375, 10.391742706298828, 20.765281677246094, 16.605297088623047, 3.789947509765625, 21.908714294433594, 13.348594665527344, 21.10259246826172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000070.npy"} +{"epoch": 0.1027900146842878, "step": 71, "batch_size": 64, "mean": 12.112241744995117, "std": 11.967255592346191, "min": -9.733453750610352, "p10": 0.6317092895507823, "median": 10.262733459472656, "p90": 22.87934188842774, "max": 50.334136962890625, "pos_frac": 0.90625, "sample": [-5.991004943847656, 41.4300537109375, 6.083122253417969, 1.7551422119140625, 5.620021820068359, 12.496971130371094, 6.4303741455078125, 12.5518798828125, 18.104339599609375, 5.586124420166016, 22.15936279296875, 6.751131057739258, 17.442840576171875, 4.637535095214844, 3.258056640625, 7.3160400390625, 11.253231048583984, 15.637237548828125, -0.45610809326171875, 2.0313262939453125, 4.701324462890625, 43.79667663574219, 7.513542175292969, 7.209846496582031, 16.316566467285156, 16.243331909179688, 6.092311859130859, 12.431777954101562, 13.376556396484375, -8.195388793945312, 4.5580596923828125, 14.596343994140625, 10.497528076171875, -0.049747467041015625, 5.559572219848633, 18.188087463378906, 18.62957763671875, 0.150238037109375, 21.493301391601562, 16.000045776367188, 5.092769622802734, 13.006973266601562, 6.730583190917969, 18.216796875, 15.205093383789062, 17.09040069580078, 4.519775390625, 36.26173400878906, 46.865814208984375, 50.334136962890625, 2.0182342529296875, 23.187904357910156, -9.733453750610352, -0.783843994140625, 12.962287902832031, 6.22125244140625, 7.918392181396484, 11.371402740478516, 19.50146484375, 15.433868408203125, 31.03338623046875, 3.3791427612304688, 6.114236831665039, 10.027938842773438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000071.npy"} +{"epoch": 0.10425844346549193, "step": 72, "batch_size": 64, "mean": 12.90890121459961, "std": 13.027764320373535, "min": -14.447647094726562, "p10": -1.8090076446533174, "median": 10.432766914367676, "p90": 32.45179443359377, "max": 55.10162353515625, "pos_frac": 0.890625, "sample": [18.439292907714844, 15.646295547485352, 5.970466613769531, 13.530593872070312, 6.318809509277344, 3.736591339111328, 9.056621551513672, 27.251113891601562, -14.447647094726562, 26.1080322265625, 14.92232894897461, 14.509300231933594, 7.164880752563477, 1.229644775390625, 17.04193115234375, 9.050636291503906, 11.70550537109375, 25.16802215576172, 15.288047790527344, 34.30552673339844, 38.87931823730469, -3.039876937866211, 8.178764343261719, 10.502222061157227, 17.028961181640625, 3.8289947509765625, 18.719703674316406, 9.827003479003906, 10.363311767578125, 17.272804260253906, 4.169380187988281, 5.576812744140625, 11.584671020507812, -12.766754150390625, 35.05076599121094, 28.512008666992188, 19.290969848632812, 7.473583221435547, 23.155364990234375, -3.8061294555664062, 9.000347137451172, 40.4381103515625, 55.10162353515625, -7.013721466064453, 9.899419784545898, 25.906661987304688, 7.6982421875, 8.082996368408203, 1.0630207061767578, 34.14027404785156, -9.37518310546875, 4.411746978759766, 2.331939697265625, 17.824981689453125, 14.171035766601562, 19.901260375976562, 9.580459594726562, 15.931346893310547, 35.342681884765625, 11.992801666259766, 6.681583404541016, 2.6603469848632812, -3.2868194580078125, 1.8866195678710938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000072.npy"} +{"epoch": 0.10572687224669604, "step": 73, "batch_size": 64, "mean": 12.989622116088867, "std": 18.848773956298828, "min": -25.713653564453125, "p10": -9.154793548583983, "median": 10.167367935180664, "p90": 33.94851570129395, "max": 66.59498596191406, "pos_frac": 0.78125, "sample": [31.648056030273438, -14.090553283691406, 7.7367095947265625, 14.853591918945312, 6.9399871826171875, 35.390174865722656, -5.939430236816406, 25.850082397460938, 7.2023162841796875, -0.8545875549316406, 28.021697998046875, 33.70450210571289, 24.407196044921875, -1.3648529052734375, 0.4633636474609375, -14.498870849609375, 56.970489501953125, -1.6250762939453125, 10.965616226196289, 19.342453002929688, 17.51782989501953, 7.981895446777344, 10.462608337402344, 1.2534828186035156, 2.073984146118164, 9.872127532958984, -19.467723846435547, 11.083992004394531, 30.03521728515625, 15.642921447753906, 13.245994567871094, 3.256053924560547, 59.775482177734375, -12.721023559570312, 5.107540130615234, 30.950782775878906, 30.610984802246094, -10.522281646728516, 4.643833160400391, 2.6187305450439453, -1.7927093505859375, -6.4478302001953125, -9.917045593261719, 7.894250869750977, 17.42810821533203, 16.402442932128906, 2.7447071075439453, 5.386499404907227, 22.358299255371094, 43.65901184082031, 14.2357177734375, 6.047477722167969, 23.071868896484375, 5.911460876464844, 14.303550720214844, -7.3762054443359375, 15.219955444335938, 57.90202331542969, 28.408554077148438, 66.59498596191406, 3.1644020080566406, 34.05309295654297, 19.25152587890625, -25.713653564453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000073.npy"} +{"epoch": 0.10719530102790015, "step": 74, "batch_size": 64, "mean": 14.162808418273926, "std": 20.921667098999023, "min": -39.02606964111328, "p10": -3.30047149658203, "median": 10.677483558654785, "p90": 33.21564483642579, "max": 123.06634521484375, "pos_frac": 0.78125, "sample": [27.081329345703125, 9.379585266113281, 12.572189331054688, -0.13346290588378906, 24.977989196777344, 7.699287414550781, 10.225311279296875, 56.44910430908203, 2.757322311401367, 16.476051330566406, 16.064285278320312, 10.346296310424805, 19.976734161376953, 1.6292266845703125, 17.502166748046875, 123.06634521484375, 8.920394897460938, 30.93457794189453, 15.452404022216797, 3.4169769287109375, 1.8914241790771484, 51.89317321777344, 3.6048049926757812, 34.19324493408203, 25.816162109375, 9.918388366699219, -3.7947921752929688, 0.186431884765625, -0.5462436676025391, -18.973121643066406, 15.166000366210938, -1.6249237060546875, -0.24641990661621094, 9.293281555175781, -39.02606964111328, 21.629501342773438, -2.1470565795898438, 26.81167984008789, 4.966470718383789, -3.9042625427246094, -10.526718139648438, 11.008670806884766, 19.733322143554688, 24.947113037109375, -0.314727783203125, 46.189727783203125, 14.832405090332031, 3.519439697265625, 9.826736450195312, 8.82857894897461, 11.339317321777344, 37.092041015625, 18.87548828125, 23.567230224609375, 38.45885467529297, 16.273056030273438, 5.309474945068359, 16.648460388183594, 28.340240478515625, 26.863723754882812, -0.08452224731445312, -3.931365966796875, -4.740856170654297, 14.462291717529297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000074.npy"} +{"epoch": 0.10866372980910426, "step": 75, "batch_size": 64, "mean": 20.119970321655273, "std": 20.181682586669922, "min": -11.9459228515625, "p10": -2.753778839111326, "median": 16.025936126708984, "p90": 49.341851806640626, "max": 74.392822265625, "pos_frac": 0.859375, "sample": [-0.48046112060546875, 24.164260864257812, 17.85321807861328, 14.60455322265625, 19.107749938964844, 12.249128341674805, 10.54758071899414, 6.589424133300781, 46.185791015625, 66.9130859375, 32.906494140625, 15.329681396484375, -7.2509002685546875, 27.612159729003906, 74.392822265625, 15.277469635009766, 41.3836669921875, 63.26518249511719, 9.27808952331543, -0.29730224609375, 44.17158508300781, 24.711990356445312, 15.113079071044922, 10.83709716796875, 16.828903198242188, 49.44410705566406, 49.10325622558594, 3.455608367919922, 8.531078338623047, 32.4765739440918, 19.178443908691406, 17.300029754638672, 4.266265869140625, 64.48870849609375, -5.4083099365234375, -3.728057861328125, 25.522857666015625, -11.9459228515625, 21.02277374267578, 20.236507415771484, 6.583301544189453, 12.576194763183594, -8.759651184082031, 8.551956176757812, -7.708229064941406, 29.109588623046875, 2.7033538818359375, 11.665176391601562, 39.231353759765625, 26.643890380859375, 3.8924636840820312, -7.063385009765625, 8.226726531982422, 7.446254730224609, 16.783626556396484, 5.517303466796875, 7.1454620361328125, 23.27716064453125, 15.705535888671875, 16.346336364746094, 38.09386444091797, 58.536956787109375, 61.168365478515625, 16.766189575195312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000075.npy"} +{"epoch": 0.11013215859030837, "step": 76, "batch_size": 64, "mean": 13.126352310180664, "std": 15.859684944152832, "min": -43.87799072265625, "p10": -2.080088424682617, "median": 12.94638442993164, "p90": 31.209701919555666, "max": 67.75559997558594, "pos_frac": 0.859375, "sample": [14.063547134399414, 6.083927154541016, 2.390779495239258, 25.433120727539062, 3.0202407836914062, 17.866668701171875, 10.76446533203125, 26.95245361328125, 31.499492645263672, 14.050605773925781, 15.842185974121094, 21.90645980834961, 10.301044464111328, 31.94068145751953, 4.572021484375, 5.695985794067383, 8.842195510864258, 67.75559997558594, 33.66449737548828, 5.796516418457031, -6.587127685546875, 12.939277648925781, 0.1433238983154297, 12.9534912109375, -15.207931518554688, 6.166769027709961, 5.474334716796875, 15.979141235351562, 6.545967102050781, 7.840000152587891, 30.23401641845703, 18.862831115722656, 22.580902099609375, 9.034568786621094, 28.410667419433594, 4.344287872314453, -2.1120529174804688, 26.588729858398438, 3.5011749267578125, -10.747642517089844, 6.835247039794922, 1.9568767547607422, -6.21251106262207, -6.759193420410156, 16.806541442871094, 30.533523559570312, 49.56782531738281, 28.31353759765625, 8.622058868408203, 5.7047576904296875, 13.440086364746094, -2.005504608154297, 15.887855529785156, 16.846343994140625, 14.897727966308594, 13.197029113769531, -0.5048179626464844, 13.826675415039062, 36.339332580566406, 23.580604553222656, 22.518264770507812, 32.926361083984375, 12.258720397949219, -43.87799072265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000076.npy"} +{"epoch": 0.11160058737151249, "step": 77, "batch_size": 64, "mean": 21.019004821777344, "std": 21.917591094970703, "min": -39.15985107421875, "p10": 0.5307651519775398, "median": 21.517589569091797, "p90": 55.26313018798828, "max": 75.76898193359375, "pos_frac": 0.90625, "sample": [16.345901489257812, 43.56529998779297, 32.6035041809082, 41.46567916870117, 41.81415557861328, 0.22499847412109375, 58.254119873046875, 21.875125885009766, 8.999139785766602, 10.212451934814453, 6.216306686401367, 34.47508239746094, 39.15325927734375, 5.858966827392578, 3.5500030517578125, 25.18639373779297, -39.15985107421875, 21.853591918945312, 34.440330505371094, 31.401657104492188, 14.017309188842773, -2.562877655029297, 9.834915161132812, 1.2442207336425781, 3.1591339111328125, 55.48548889160156, 3.3585987091064453, 25.887046813964844, 11.581657409667969, -5.037473678588867, 32.503360748291016, 29.579559326171875, 13.41384506225586, 64.93733215332031, 33.86470031738281, 10.094268798828125, 56.602325439453125, 75.76898193359375, 21.18158721923828, 12.412490844726562, -32.74198913574219, 8.003238677978516, -14.82598876953125, 3.674306869506836, 12.446332931518555, 26.149131774902344, 32.030364990234375, 25.43684959411621, 25.358154296875, 26.559242248535156, 7.326038360595703, -6.2017822265625, 5.246013641357422, 25.511322021484375, 26.537330627441406, 23.40985107421875, 4.1486968994140625, 56.34556579589844, 68.55601501464844, 11.759902954101562, 23.25936508178711, 54.744293212890625, 20.69204330444336, 6.12939453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000077.npy"} +{"epoch": 0.1130690161527166, "step": 78, "batch_size": 64, "mean": 16.267372131347656, "std": 18.734283447265625, "min": -21.639328002929688, "p10": -2.868338775634766, "median": 16.143047332763672, "p90": 41.60837936401368, "max": 83.21652221679688, "pos_frac": 0.8125, "sample": [3.1621551513671875, 1.6321048736572266, 40.356414794921875, 17.689781188964844, -1.3661537170410156, 46.44624328613281, 17.050804138183594, 83.21652221679688, 27.280685424804688, 22.857471466064453, 20.506954193115234, 8.103256225585938, -4.681011199951172, 14.697866439819336, 22.53701400756836, 20.41244888305664, -0.6624603271484375, 8.296985626220703, -5.882743835449219, 2.7755355834960938, 7.185747146606445, 18.25221061706543, 26.009685516357422, 23.217025756835938, 19.650157928466797, 14.570819854736328, 17.29617691040039, 18.572021484375, 22.692108154296875, 5.740257263183594, 42.144935607910156, 16.126482009887695, 5.7281494140625, -11.174488067626953, 25.656112670898438, -12.3145751953125, 53.363311767578125, 20.808738708496094, 43.48942565917969, -0.7083663940429688, 37.6179084777832, 3.5963668823242188, 48.86537170410156, 32.13457107543945, 29.663665771484375, 0.0113525390625, 10.883384704589844, 3.890901565551758, 12.257793426513672, 7.869106292724609, 6.10560417175293, -1.2686634063720703, 16.15961265563965, -2.8496627807617188, 31.300765991210938, -21.639328002929688, 51.164031982421875, 4.656410217285156, -17.9432373046875, 26.673614501953125, -2.8763427734375, 35.431884765625, 2.1403732299804688, 26.530487060546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000078.npy"} +{"epoch": 0.1145374449339207, "step": 79, "batch_size": 64, "mean": 17.853469848632812, "std": 22.85171127319336, "min": -22.199539184570312, "p10": -6.32409324645996, "median": 12.758567810058594, "p90": 50.00017852783205, "max": 74.46176147460938, "pos_frac": 0.734375, "sample": [-6.6805572509765625, 54.370826721191406, 6.288034439086914, 68.26095581054688, 4.780067443847656, -0.017612457275390625, -2.187040328979492, -10.574920654296875, 43.16815185546875, 37.866119384765625, 19.382843017578125, -22.199539184570312, 24.846214294433594, 70.52488708496094, 9.883743286132812, 25.215530395507812, 73.96607971191406, 51.89088439941406, 17.52076530456543, -9.978042602539062, 74.46176147460938, 17.163360595703125, -3.3879852294921875, -11.224884033203125, 28.303497314453125, 1.2109088897705078, -1.06622314453125, 16.396942138671875, 13.720909118652344, 62.42327117919922, 3.5626068115234375, 8.643274307250977, 11.796226501464844, 27.24681854248047, -11.431915283203125, 10.730751037597656, -0.8031196594238281, 6.532829284667969, 41.050445556640625, 40.881744384765625, 19.60312271118164, 31.88372039794922, 33.57475280761719, 19.938907623291016, 26.287277221679688, -5.35188102722168, 2.9867401123046875, 31.968521118164062, 37.73186492919922, 10.841547012329102, 15.402921676635742, -5.492343902587891, -1.4475860595703125, 38.822540283203125, -1.2151947021484375, 21.963531494140625, 5.289371490478516, 6.645597457885742, 0.17804718017578125, -8.819786071777344, 7.315328598022461, 45.588531494140625, -3.592235565185547, 19.980056762695312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000079.npy"} +{"epoch": 0.11600587371512482, "step": 80, "batch_size": 64, "mean": 16.589031219482422, "std": 23.739376068115234, "min": -23.32978057861328, "p10": -6.744988250732422, "median": 11.739740371704102, "p90": 42.00306777954101, "max": 95.39871215820312, "pos_frac": 0.78125, "sample": [27.0631103515625, 8.628719329833984, -6.574180603027344, 24.553672790527344, 5.648332595825195, 3.875608444213867, -3.0653419494628906, -9.591171264648438, 31.862136840820312, 2.718414306640625, 4.2288970947265625, 16.393421173095703, -15.920093536376953, -20.669090270996094, -6.470558166503906, 23.90381622314453, 95.39871215820312, 38.334938049316406, 83.09039306640625, 45.858184814453125, 25.155059814453125, 6.899394989013672, 41.87947082519531, 42.05603790283203, 34.377655029296875, 15.633132934570312, -0.9210186004638672, 32.96031188964844, 14.778018951416016, 10.72784423828125, 2.6252670288085938, -16.656204223632812, 4.264385223388672, -6.8181915283203125, 25.254852294921875, -23.32978057861328, 59.36228942871094, 31.237144470214844, -7.815166473388672, 88.42425537109375, 10.50459098815918, 3.6124267578125, 2.609844207763672, 2.0628509521484375, 45.32946014404297, 12.751636505126953, 23.80097198486328, 29.197982788085938, 16.15595817565918, 10.394485473632812, -3.8429737091064453, -3.0315113067626953, 0.7374668121337891, 6.8107757568359375, 23.409643173217773, 22.914627075195312, 6.015281677246094, 3.6497802734375, 32.96954345703125, 24.66059112548828, 14.164260864257812, 38.125244140625, -5.619632720947266, 14.952003479003906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000080.npy"} +{"epoch": 0.11747430249632893, "step": 81, "batch_size": 64, "mean": 23.198793411254883, "std": 28.119565963745117, "min": -29.968536376953125, "p10": -9.227621459960936, "median": 19.40678596496582, "p90": 58.72273712158203, "max": 117.6217041015625, "pos_frac": 0.859375, "sample": [42.82927703857422, 32.93511962890625, -29.968536376953125, 40.11962127685547, 24.856311798095703, 58.77317810058594, 19.073810577392578, -11.949832916259766, 7.721519470214844, 33.43229675292969, 31.794525146484375, -22.632064819335938, 2.4100189208984375, 24.98345947265625, 87.9620361328125, 10.45986557006836, 23.469390869140625, 37.747650146484375, 58.90673828125, -16.127647399902344, 1.1670722961425781, 9.294677734375, 21.65593719482422, 72.5998764038086, 23.331951141357422, 11.99435806274414, 18.45184326171875, 8.183902740478516, 7.34625244140625, -8.127281188964844, 26.188507080078125, 7.778289794921875, 4.855442047119141, 12.917877197265625, 29.455703735351562, 117.6217041015625, 42.42374038696289, 19.739761352539062, 2.9915924072265625, 58.60504150390625, 42.881988525390625, 49.0244140625, 16.047454833984375, 0.3250160217285156, 4.903873443603516, -2.608234405517578, -19.584491729736328, 14.870384216308594, 14.510276794433594, 30.605636596679688, 14.933551788330078, 12.898683547973633, 21.005409240722656, -14.111274719238281, 38.79112243652344, 46.27384948730469, 109.5013427734375, 60.099609375, 9.869392395019531, 34.79712677001953, 3.2554168701171875, 27.297286987304688, -9.699195861816406, 33.5611572265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000081.npy"} +{"epoch": 0.11894273127753303, "step": 82, "batch_size": 64, "mean": 18.392139434814453, "std": 30.29347801208496, "min": -60.26139831542969, "p10": -16.237848663330073, "median": 16.736846923828125, "p90": 51.392615509033206, "max": 123.43014526367188, "pos_frac": 0.796875, "sample": [-8.479690551757812, 32.81327819824219, -19.816696166992188, 18.020000457763672, 21.05221939086914, 18.589815139770508, -26.352272033691406, 16.160350799560547, 26.013015747070312, 58.01152801513672, 69.05517578125, 13.27166748046875, -12.815853118896484, 7.8688812255859375, 15.747331619262695, 106.04322814941406, 13.496414184570312, -53.749542236328125, -9.511993408203125, 34.006378173828125, 2.0990982055664062, 32.77320098876953, 51.88971710205078, 3.7428951263427734, -17.704418182373047, -6.064605712890625, 3.1017074584960938, 23.702537536621094, 21.500328063964844, 13.140655517578125, 20.294532775878906, 36.46649169921875, 37.821022033691406, -0.9211330413818359, 74.00370788574219, 19.011310577392578, 50.23271179199219, 15.658184051513672, 9.329959869384766, 5.248382568359375, -1.9413833618164062, 29.207626342773438, 59.76763916015625, -18.295028686523438, 29.145736694335938, 22.42981719970703, 7.350439071655273, 47.183719635009766, 34.190528869628906, -21.693878173828125, 10.607933044433594, 7.405242919921875, -60.26139831542969, 7.503929138183594, 32.71094512939453, 22.168649673461914, 5.980155944824219, 26.651901245117188, 16.906890869140625, 123.43014526367188, 16.566802978515625, 18.16412925720215, 0.39875030517578125, 46.76811981201172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000082.npy"} +{"epoch": 0.12041116005873716, "step": 83, "batch_size": 64, "mean": 21.821531295776367, "std": 26.633377075195312, "min": -26.29471206665039, "p10": -9.775568389892575, "median": 17.994450569152832, "p90": 63.7740417480469, "max": 86.46343994140625, "pos_frac": 0.8125, "sample": [21.23792266845703, 25.225269317626953, 18.212329864501953, 72.4052734375, 45.61009979248047, 12.691444396972656, -7.2711639404296875, 70.67916870117188, 57.564788818359375, 5.479972839355469, 12.366058349609375, 17.77657127380371, 1.625396728515625, -4.7454376220703125, 18.517200469970703, 69.85057067871094, -14.021102905273438, 86.46343994140625, 11.208343505859375, 74.82930755615234, 38.56803894042969, 12.165901184082031, 51.995025634765625, -20.25049591064453, -26.29471206665039, -10.848884582519531, 55.3824462890625, -20.844276428222656, 17.381826400756836, 66.43515014648438, 11.962875366210938, 15.194900512695312, 5.542562484741211, 1.08258056640625, 25.47808074951172, 13.412559509277344, 26.605609893798828, -0.40076637268066406, 40.164649963378906, 13.441452026367188, 20.505035400390625, 18.599029541015625, 44.525482177734375, 72.72866821289062, 19.55359649658203, 48.59779357910156, 4.814891815185547, -19.05902862548828, -2.5124073028564453, 2.5320968627929688, -22.407882690429688, 51.556640625, 9.509025573730469, 42.44886779785156, 24.74822235107422, 24.05926513671875, -6.645111083984375, 8.128803253173828, 18.430484771728516, 32.09977722167969, 43.93415832519531, 17.71289825439453, 28.198829650878906, 2.638957977294922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000083.npy"} +{"epoch": 0.12187958883994127, "step": 84, "batch_size": 64, "mean": 19.046039581298828, "std": 20.50779151916504, "min": -43.45240783691406, "p10": -1.5618135452270507, "median": 16.34085178375244, "p90": 42.84121398925782, "max": 81.66996765136719, "pos_frac": 0.84375, "sample": [7.456657409667969, 15.85369873046875, 4.027252197265625, 5.429876327514648, 47.873748779296875, 5.2699737548828125, 5.1595916748046875, 20.702049255371094, 16.286108016967773, 9.114418029785156, 30.01663589477539, 35.931785583496094, -20.667938232421875, 14.792831420898438, 22.204795837402344, 30.675552368164062, 10.655660629272461, 81.66996765136719, 31.11532974243164, 2.502410888671875, 7.323518753051758, 29.939682006835938, 7.963737487792969, 6.621232986450195, -0.8724784851074219, 41.718994140625, 26.030540466308594, 32.45860290527344, -43.45240783691406, -1.5957927703857422, 54.53254699707031, 3.4486007690429688, 36.661014556884766, 16.39559555053711, 18.458770751953125, 7.379974365234375, 48.26905822753906, 43.26324462890625, 31.77587890625, 21.88916778564453, 2.9420394897460938, 32.000389099121094, -4.131219863891602, 15.8614501953125, 29.9552001953125, 29.424171447753906, -1.4825286865234375, -2.9820022583007812, 34.24287414550781, 33.51531982421875, -2.1777191162109375, 41.856475830078125, 55.77879333496094, 20.089561462402344, 61.8057861328125, 15.37628173828125, 30.54730987548828, 8.017967224121094, -16.560516357421875, 31.01915740966797, 12.933588027954102, 18.90399169921875, 8.015308380126953, -0.2850017547607422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000084.npy"} +{"epoch": 0.12334801762114538, "step": 85, "batch_size": 64, "mean": 19.524394989013672, "std": 24.67206573486328, "min": -30.72986602783203, "p10": -4.865496826171874, "median": 17.839513778686523, "p90": 58.81364364624025, "max": 69.93470764160156, "pos_frac": 0.71875, "sample": [43.24617004394531, 15.173118591308594, 30.307952880859375, 3.6665401458740234, -2.1275177001953125, 22.78853988647461, 26.041854858398438, 2.024242401123047, 60.335655212402344, 9.329032897949219, -2.4419727325439453, 4.633811950683594, 64.74588012695312, 25.21088409423828, 24.500120162963867, 21.58791732788086, -4.3871002197265625, -18.06635284423828, 20.407638549804688, 0.8235092163085938, -30.72986602783203, 69.93470764160156, 18.398971557617188, -1.9404850006103516, -4.45452880859375, 45.774322509765625, 7.496772766113281, -8.602096557617188, 18.726211547851562, 10.707748413085938, 47.55085754394531, -5.0416259765625, -17.561553955078125, -1.992776870727539, 14.205482482910156, 5.30012321472168, 64.85150146484375, 17.416973114013672, 38.1866455078125, -0.38374900817871094, -4.340639114379883, 69.42839050292969, -2.1169185638427734, 45.28724670410156, 60.8873291015625, 15.146453857421875, 55.26228332519531, 66.48115539550781, 10.15484619140625, 5.929767608642578, 32.19794464111328, 49.828163146972656, -0.3976593017578125, -7.397773742675781, 18.262054443359375, 19.166093826293945, 53.40986633300781, 21.183258056640625, 30.273330688476562, 36.93819046020508, -22.373626708984375, 18.783985137939453, -1.2527732849121094, 43.176734924316406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000085.npy"} +{"epoch": 0.12481644640234948, "step": 86, "batch_size": 64, "mean": 20.703155517578125, "std": 37.65457534790039, "min": -57.59767150878906, "p10": -15.5985050201416, "median": 15.043901443481445, "p90": 65.94494781494141, "max": 145.01016235351562, "pos_frac": 0.734375, "sample": [-13.245994567871094, 18.06822967529297, 68.34669494628906, -19.80025863647461, 35.565956115722656, 30.266990661621094, 27.25310707092285, -2.085662841796875, 104.5968017578125, 11.836467742919922, 6.667461395263672, -5.265556335449219, 0.9765472412109375, 21.861907958984375, -10.36920166015625, 19.172042846679688, 48.55000305175781, 19.797500610351562, -2.893230438232422, -50.9091796875, 63.17657470703125, 21.81039810180664, 32.03121566772461, 10.037843704223633, 10.662353515625, -16.60672378540039, -3.9500885009765625, 67.13139343261719, -4.553836822509766, 7.92957878112793, 38.23799514770508, 18.611080169677734, 7.853107452392578, -57.59767150878906, 45.41259765625, 54.230682373046875, 27.65264892578125, -39.70655059814453, 74.01417541503906, 3.0904598236083984, 145.01016235351562, 18.329814910888672, 25.312454223632812, 12.019573211669922, -4.640106201171875, 127.56109619140625, 6.7181243896484375, 121.38037109375, 46.76622009277344, -7.0804443359375, -31.41704559326172, 39.18089294433594, 9.268716812133789, 34.862281799316406, 29.446151733398438, 36.11820602416992, 36.93616485595703, 5.519510269165039, 1.936309814453125, 4.37293815612793, -5.105993270874023, -17.91211700439453, 2.7682037353515625, 19.79253387451172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000086.npy"} +{"epoch": 0.1262848751835536, "step": 87, "batch_size": 64, "mean": 24.061767578125, "std": 32.99917984008789, "min": -73.2835693359375, "p10": -7.940727233886718, "median": 20.0267276763916, "p90": 64.77524414062502, "max": 127.81517028808594, "pos_frac": 0.8125, "sample": [23.0114803314209, 36.17436218261719, 0.5291957855224609, 9.531679153442383, 37.4595947265625, -8.342071533203125, 29.73540496826172, -0.5953998565673828, 2.0038833618164062, -3.789093017578125, -24.208953857421875, 45.84275817871094, -6.848777770996094, 66.58474731445312, 30.67266082763672, 58.89936828613281, 52.91761779785156, 10.684965133666992, 90.90704345703125, 58.91326904296875, 20.773529052734375, 19.600357055664062, 7.7924957275390625, 40.95630645751953, 39.86400604248047, 2.2185306549072266, 38.590782165527344, 127.81517028808594, 13.819068908691406, 2.9153366088867188, 13.078126907348633, 74.799072265625, 59.27046203613281, 9.73178482055664, 56.400238037109375, 25.68871307373047, 13.724794387817383, 39.66257095336914, 20.236194610595703, 73.24845886230469, 0.469879150390625, 78.78927612304688, 15.110977172851562, 19.8172607421875, -73.2835693359375, 11.469369888305664, 40.30597686767578, -34.644317626953125, 38.23097229003906, 69.9071044921875, -16.858325958251953, -16.69973373413086, 8.690122604370117, 19.53759002685547, -7.0042572021484375, 60.553070068359375, 31.70806884765625, 33.82264709472656, -37.393341064453125, 23.588951110839844, 15.700706481933594, 1.275796890258789, 51.68479919433594, -5.095623016357422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000087.npy"} +{"epoch": 0.1277533039647577, "step": 88, "batch_size": 64, "mean": 18.192121505737305, "std": 31.818187713623047, "min": -58.583587646484375, "p10": -8.680522155761718, "median": 11.36335277557373, "p90": 58.73482818603519, "max": 129.30908203125, "pos_frac": 0.78125, "sample": [10.224441528320312, 11.42181396484375, 63.96421813964844, 61.884857177734375, 34.086021423339844, 48.17205810546875, 3.3592491149902344, 61.86534118652344, -7.1773529052734375, 12.955507278442383, 32.64154052734375, 7.1772308349609375, 1.87890625, 47.44628143310547, -22.886306762695312, 31.2918701171875, -20.653106689453125, 15.631332397460938, -46.85285949707031, -58.583587646484375, -5.046075820922852, 75.2921142578125, 36.299278259277344, -4.58367919921875, 44.19892883300781, 5.627523422241211, 27.74560546875, -9.324737548828125, 22.666311264038086, 21.77788543701172, 40.009063720703125, 7.001564025878906, -1.5423259735107422, 27.148513793945312, 1.4930572509765625, 93.34625244140625, 10.446189880371094, 51.4302978515625, 4.588714599609375, 129.30908203125, 5.139991760253906, 44.13361358642578, 4.150993347167969, 14.280397415161133, 10.7442626953125, 23.163299560546875, 1.4702262878417969, -3.0640907287597656, 33.19969177246094, 80.47262573242188, 34.438819885253906, 15.671539306640625, 0.33881378173828125, 9.858978271484375, -52.10382080078125, -5.515167236328125, 10.650510787963867, -1.496225357055664, 11.304891586303711, 15.174318313598633, 18.529584884643555, -20.8260498046875, 8.832901000976562, 40.0146484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000088.npy"} +{"epoch": 0.12922173274596183, "step": 89, "batch_size": 64, "mean": 20.458881378173828, "std": 27.08057975769043, "min": -63.39544677734375, "p10": -2.3469511032104484, "median": 16.435709953308105, "p90": 48.460848236084, "max": 118.79606628417969, "pos_frac": 0.859375, "sample": [4.9568634033203125, 1.6433849334716797, 5.5114288330078125, 20.6496639251709, -9.456489562988281, 49.845855712890625, 40.244667053222656, 16.482765197753906, -8.296409606933594, 57.05327606201172, 57.53154754638672, 22.950695037841797, 22.395309448242188, 45.229164123535156, 36.02806854248047, 29.785987854003906, 12.929405212402344, 10.256210327148438, -2.654356002807617, 19.687702178955078, 14.682510375976562, 2.941469192504883, 16.388654708862305, 12.060955047607422, -10.548477172851562, 1.8405494689941406, 1.50531005859375, 10.335538864135742, 73.69931030273438, 18.852218627929688, 27.50757598876953, 88.91009521484375, 16.35584259033203, 44.343101501464844, 21.61831283569336, 2.6314544677734375, 12.59054183959961, 118.79606628417969, 4.1060333251953125, -1.6296730041503906, 29.07931900024414, -0.19501876831054688, 21.228456497192383, 30.302013397216797, 16.342485427856445, 5.827545166015625, 9.076013565063477, 4.76251220703125, 4.34454345703125, 34.912513732910156, 20.910430908203125, 40.481903076171875, -2.827014923095703, 16.355384826660156, 42.99437713623047, -63.39544677734375, -25.269058227539062, 27.961708068847656, 17.987323760986328, 21.840099334716797, 24.157012939453125, 24.09351348876953, 91.61164855957031, 7.024101257324219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000089.npy"} +{"epoch": 0.13069016152716592, "step": 90, "batch_size": 64, "mean": 27.817413330078125, "std": 41.238189697265625, "min": -50.24147033691406, "p10": -10.123895263671871, "median": 18.199609756469727, "p90": 91.09896850585939, "max": 146.428955078125, "pos_frac": 0.796875, "sample": [65.9423828125, -43.147315979003906, 29.53270721435547, 93.29824829101562, 22.42001724243164, 35.416656494140625, 1.0525474548339844, -4.303424835205078, 58.286102294921875, -1.235076904296875, 45.01789093017578, 17.991649627685547, -0.597442626953125, 13.036109924316406, 38.01982116699219, 85.89848327636719, -4.4951934814453125, 146.428955078125, 52.01654052734375, 2.077442169189453, 2.9022769927978516, 10.125221252441406, 18.407569885253906, -11.869773864746094, 115.00048828125, 16.36245346069336, 24.875343322753906, 119.29403686523438, 4.821964263916016, 22.580867767333984, 136.6070556640625, 21.89220428466797, -25.841476440429688, 11.60478401184082, -6.050178527832031, -50.24147033691406, 27.388652801513672, 7.786643981933594, 61.811676025390625, 5.967201232910156, 13.880104064941406, 10.892776489257812, -23.046463012695312, -5.079343795776367, 20.845199584960938, 85.96731567382812, 99.3575439453125, -36.156715393066406, -30.239364624023438, 11.779045104980469, 35.540809631347656, 1.602987289428711, 18.552024841308594, 36.084144592285156, 68.13015747070312, 32.36791229248047, 10.42060661315918, 9.920938491821289, 60.42730712890625, 16.423503875732422, 23.676429748535156, 16.715429306030273, 95.27911376953125, 40.890281677246094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000090.npy"} +{"epoch": 0.13215859030837004, "step": 91, "batch_size": 64, "mean": 23.0252685546875, "std": 33.885406494140625, "min": -33.904457092285156, "p10": -19.02871170043945, "median": 21.440811157226562, "p90": 68.12580261230472, "max": 120.80810546875, "pos_frac": 0.75, "sample": [17.821449279785156, 52.671844482421875, -32.603302001953125, 7.594470977783203, 72.44447326660156, 0.35076332092285156, -17.22887420654297, 43.7656364440918, 3.952951431274414, 20.2908935546875, 41.91484069824219, 39.504127502441406, 42.356117248535156, 13.92926025390625, -33.904457092285156, 107.83404541015625, -12.0364990234375, 2.0270767211914062, 8.569793701171875, 85.50306701660156, 9.975479125976562, 33.254459381103516, 120.80810546875, -16.625167846679688, 27.79150390625, 53.64177703857422, -19.42737579345703, 3.0146522521972656, 22.43218994140625, -5.525032043457031, -18.098495483398438, 22.47066879272461, 58.04890441894531, -21.12652587890625, 18.450759887695312, 21.403648376464844, 55.375343322753906, -13.673408508300781, 21.47797393798828, -5.166284561157227, 8.136070251464844, 24.702529907226562, 57.96284484863281, 1.968231201171875, -19.44367218017578, 36.784088134765625, 22.96764373779297, 78.34574890136719, 25.92394256591797, 41.223121643066406, 11.054145812988281, -25.454673767089844, 0.9416294097900391, 27.75762176513672, -2.131345748901367, 42.226966857910156, -0.5609607696533203, 84.34805297851562, 44.4744873046875, 23.98858642578125, 51.59953308105469, -24.77149200439453, 47.558837890625, 80.75432586669922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000091.npy"} +{"epoch": 0.13362701908957417, "step": 92, "batch_size": 64, "mean": 23.284170150756836, "std": 33.0507698059082, "min": -46.34049987792969, "p10": -11.360004043579101, "median": 16.738669395446777, "p90": 57.28439636230469, "max": 125.57803344726562, "pos_frac": 0.75, "sample": [-4.7585906982421875, 4.934959411621094, 8.110305786132812, 46.139060974121094, -8.745025634765625, 6.2264251708984375, -46.34049987792969, -15.754194259643555, 46.300193786621094, 24.220977783203125, 65.45367431640625, 0.30835914611816406, 42.79931640625, 2.509021759033203, -5.533561706542969, 109.36367797851562, 54.794952392578125, 125.57803344726562, -40.26580810546875, -6.1319427490234375, 25.964466094970703, -10.820293426513672, 27.165435791015625, -8.706546783447266, -17.721229553222656, 57.28565979003906, -11.59130859375, 72.49307250976562, 29.399635314941406, 5.537885665893555, 71.7579574584961, -2.3123321533203125, 34.612449645996094, 0.6806011199951172, 51.48616027832031, 8.450759887695312, 31.590896606445312, 103.15449523925781, 9.895013809204102, 57.28144836425781, 42.569091796875, 56.6328125, -0.513336181640625, -5.04229736328125, 8.934860229492188, 24.68175506591797, 4.902801513671875, 40.418731689453125, 34.10607147216797, 11.50296401977539, 0.24170684814453125, -12.842924118041992, 53.0255012512207, 15.889856338500977, 17.587482452392578, 13.823692321777344, 35.32127380371094, 51.35826110839844, 42.08381652832031, 23.9010009765625, 11.168386459350586, -12.502151489257812, 53.90874481201172, 34.21510314941406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000092.npy"} +{"epoch": 0.13509544787077826, "step": 93, "batch_size": 64, "mean": 23.431915283203125, "std": 29.27066421508789, "min": -35.55768585205078, "p10": -3.6619720458984357, "median": 16.33733367919922, "p90": 57.50738983154297, "max": 148.38644409179688, "pos_frac": 0.828125, "sample": [19.574256896972656, 9.071245193481445, 26.094207763671875, 62.110801696777344, 57.701271057128906, 54.18620300292969, 5.0331268310546875, 22.677082061767578, 43.506797790527344, -13.58868408203125, 72.74752807617188, 18.88079833984375, 50.78375244140625, 50.43920135498047, 148.38644409179688, 13.521808624267578, -1.7630767822265625, 5.8647918701171875, 3.7880630493164062, 86.82678985595703, -8.590980529785156, 12.735280990600586, 21.355754852294922, -1.00311279296875, 33.735504150390625, 10.159286499023438, 29.253150939941406, 6.034049987792969, -1.5503311157226562, 12.145431518554688, -0.6153812408447266, 14.217948913574219, 9.322376251220703, 20.29745864868164, 33.306129455566406, 52.2052001953125, 65.33577728271484, -23.35546875, -4.4757843017578125, 48.49732971191406, 43.69341278076172, 35.03335952758789, 35.207489013671875, 6.61163330078125, 10.376115798950195, 8.921089172363281, 30.08367919921875, 2.60626220703125, 57.05500030517578, 0.5389251708984375, -7.378211975097656, 58.473915100097656, 6.464546203613281, 16.479415893554688, 5.759532928466797, 6.0829315185546875, -18.796241760253906, 31.365150451660156, 44.824668884277344, 34.866798400878906, 45.86094665527344, 16.19525146484375, -35.55768585205078, 0.022504806518554688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000093.npy"} +{"epoch": 0.13656387665198239, "step": 94, "batch_size": 64, "mean": 26.934350967407227, "std": 30.795570373535156, "min": -22.453811645507812, "p10": -6.913602066040039, "median": 23.203824996948242, "p90": 67.63926315307617, "max": 110.75592041015625, "pos_frac": 0.78125, "sample": [26.09854507446289, -16.73436737060547, 43.32524108886719, 19.300399780273438, 6.5215301513671875, -14.409011840820312, 10.135879516601562, 60.51530075073242, 5.559183120727539, -20.316253662109375, 23.094301223754883, 12.8502197265625, 25.348861694335938, 6.0890350341796875, 11.519477844238281, 0.33825111389160156, 57.90008544921875, 12.387874603271484, -6.805004119873047, 110.75592041015625, 5.734806060791016, 30.09549331665039, 31.81047821044922, 53.598724365234375, 77.78530883789062, 23.543731689453125, 13.931114196777344, 29.23589324951172, 21.193851470947266, -3.0518417358398438, -3.859579086303711, 17.08903694152832, 35.29004669189453, 57.733154296875, 43.330501556396484, 18.712779998779297, 36.46467590332031, 67.68115997314453, 33.963958740234375, 23.3133487701416, 5.440196990966797, 44.438148498535156, -4.180870056152344, -16.6304931640625, 67.54150390625, 73.68475341796875, -6.762174606323242, 59.25282287597656, 110.21200561523438, -6.96014404296875, -22.453811645507812, 86.46659851074219, 31.180625915527344, 56.62451171875, -3.423431396484375, 17.254608154296875, -14.742599487304688, -5.630268096923828, 35.929420471191406, 14.845657348632812, 35.81330490112305, 56.97346496582031, 72.02386474609375, 49.82861328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000094.npy"} +{"epoch": 0.13803230543318648, "step": 95, "batch_size": 64, "mean": 21.93939208984375, "std": 29.070507049560547, "min": -71.04444885253906, "p10": -6.787653350830078, "median": 16.472951889038086, "p90": 61.81281585693362, "max": 91.09779357910156, "pos_frac": 0.828125, "sample": [16.764007568359375, 42.38111114501953, 4.593009948730469, 22.328731536865234, 19.258821487426758, 69.20269775390625, 8.305618286132812, 6.470293045043945, -7.810333251953125, 15.6015625, 19.48042106628418, 4.437076568603516, 34.972347259521484, 80.47328186035156, 50.47126770019531, -6.070899963378906, 38.38386535644531, 2.987760543823242, 15.095125198364258, 45.889556884765625, 22.92870330810547, 39.874237060546875, 20.871179580688477, 15.120698928833008, 9.76392936706543, -11.409622192382812, 10.418594360351562, 52.124977111816406, 20.229473114013672, 28.824607849121094, -9.99542236328125, -7.0948333740234375, 53.61882019042969, 91.09779357910156, 75.32830810546875, 56.58811950683594, 35.798126220703125, 1.6161365509033203, 1.1468124389648438, 64.05197143554688, 45.96049118041992, 80.39376831054688, 70.55656433105469, 3.7011566162109375, 14.330053329467773, 8.44035530090332, -1.6381053924560547, 31.910913467407227, 29.1630859375, -71.04444885253906, 2.5819149017333984, 2.741220474243164, -29.938507080078125, 46.7188720703125, 43.20276641845703, 13.173274993896484, 40.22686767578125, 12.051851272583008, 16.751842498779297, 16.194061279296875, -5.108545303344727, -2.726198196411133, 15.366989135742188, -33.00695037841797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000095.npy"} +{"epoch": 0.1395007342143906, "step": 96, "batch_size": 64, "mean": 29.912555694580078, "std": 35.41636657714844, "min": -54.19017028808594, "p10": -0.3928052902221679, "median": 20.77998924255371, "p90": 78.78724136352541, "max": 153.5980987548828, "pos_frac": 0.875, "sample": [62.63063049316406, 17.93891143798828, 37.97425842285156, 31.139026641845703, 20.309677124023438, 62.575836181640625, 6.3060150146484375, 2.9877243041992188, 1.511606216430664, 25.747894287109375, 109.98143005371094, 16.359771728515625, 10.283645629882812, -3.985057830810547, 1.9467201232910156, 21.250301361083984, 29.115890502929688, 44.40338134765625, 86.90303802490234, 88.80293273925781, 9.590248107910156, 44.56421661376953, 35.99413299560547, 153.5980987548828, 73.36640167236328, 15.213672637939453, 21.547828674316406, 10.927650451660156, 15.148712158203125, 16.33538055419922, 9.651969909667969, 134.56228637695312, 32.99229431152344, 28.90064239501953, 5.647361755371094, 87.57562255859375, 57.08331298828125, 6.119758605957031, 36.40824890136719, 1.9552059173583984, 45.71538543701172, 27.20539665222168, 22.849403381347656, -9.663284301757812, 14.782184600830078, -0.32114410400390625, -0.5985870361328125, 39.68346405029297, -0.42351722717285156, 25.734588623046875, 12.238510131835938, 57.16307067871094, -14.140003204345703, 6.887176513671875, 64.78916931152344, 10.637527465820312, 17.293725967407227, 62.14717102050781, 81.11045837402344, 2.0589447021484375, -0.7487354278564453, 31.18657684326172, -54.19017028808594, 1.6395950317382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000096.npy"} +{"epoch": 0.14096916299559473, "step": 97, "batch_size": 64, "mean": 23.000316619873047, "std": 27.659061431884766, "min": -59.801918029785156, "p10": -7.976758956909177, "median": 21.988481521606445, "p90": 55.99185256958008, "max": 106.21640014648438, "pos_frac": 0.78125, "sample": [17.332717895507812, 52.07530212402344, 2.108795166015625, 1.05291748046875, -4.562038421630859, -2.96453857421875, 15.600582122802734, 27.91274070739746, 2.085773468017578, 47.263267517089844, 22.99441146850586, 40.72913360595703, -0.8938007354736328, 18.086483001708984, 78.79345703125, 60.490264892578125, -20.352935791015625, 34.22294616699219, 0.792816162109375, 13.401906967163086, 27.08056640625, 6.262630462646484, 15.770389556884766, -21.438217163085938, 70.1673583984375, 20.035852432250977, 25.445831298828125, 30.172897338867188, 41.165977478027344, 14.752330780029297, 27.56029510498047, -5.603794097900391, 17.435211181640625, -11.565359115600586, 48.076499938964844, 41.0870361328125, 49.044639587402344, 29.113170623779297, -59.801918029785156, -8.993743896484375, 47.02106475830078, 31.930805206298828, -11.358016967773438, 7.497951507568359, 54.20087432861328, 35.41409683227539, 1.5048542022705078, 21.009923934936523, 22.967039108276367, 17.222640991210938, -4.352994918823242, 56.75941467285156, -9.333574295043945, 49.67306900024414, -1.0345115661621094, 62.46173095703125, -1.1292343139648438, 43.80635070800781, 48.712486267089844, 63.144622802734375, 32.58882522583008, 9.45937728881836, 106.21640014648438, 25.699172973632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000097.npy"} +{"epoch": 0.14243759177679882, "step": 98, "batch_size": 64, "mean": 26.44017791748047, "std": 36.65676498413086, "min": -101.72747802734375, "p10": -12.75349197387695, "median": 28.312896728515625, "p90": 67.85121002197266, "max": 125.05584716796875, "pos_frac": 0.796875, "sample": [41.42652893066406, 5.962932586669922, 66.78483581542969, 43.311866760253906, 6.542634963989258, 20.344873428344727, 23.466598510742188, 26.53246307373047, 29.332717895507812, 7.453620910644531, 30.89134407043457, -7.100128173828125, 20.486011505126953, 23.88842010498047, 66.75914001464844, 47.830169677734375, -14.136043548583984, 3.5022735595703125, -0.771881103515625, -22.81591796875, 40.42275619506836, 35.336021423339844, -29.774612426757812, -37.50923156738281, 55.179595947265625, 53.69546890258789, 30.965011596679688, 68.3082275390625, 68.83314514160156, 26.68596839904785, 40.85108947753906, -2.7983779907226562, 60.826744079589844, 125.05584716796875, -101.72747802734375, 83.72453308105469, 58.30641174316406, 27.293075561523438, 46.29362487792969, 17.76038360595703, -9.527538299560547, 40.364501953125, -55.86943054199219, 25.67687225341797, 38.75177001953125, 3.4631881713867188, 39.450469970703125, 81.88424682617188, 60.865753173828125, 36.63277053833008, 15.87774658203125, 9.15826416015625, 72.52142333984375, 103.7564697265625, 34.94605255126953, 31.70120620727539, 30.214981079101562, 13.365728378295898, 3.9307022094726562, -9.110126495361328, 54.43163299560547, -3.7505416870117188, 3.5270042419433594, -17.51245880126953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000098.npy"} +{"epoch": 0.14390602055800295, "step": 99, "batch_size": 64, "mean": 32.21813201904297, "std": 40.565940856933594, "min": -31.99970245361328, "p10": -7.269179534912109, "median": 22.191299438476562, "p90": 81.61805419921875, "max": 173.73838806152344, "pos_frac": 0.828125, "sample": [-0.6377658843994141, -8.479316711425781, 6.205972671508789, 20.387920379638672, 4.1863861083984375, 41.619651794433594, 26.099334716796875, 29.313217163085938, 106.23239135742188, 20.75615119934082, 23.06281280517578, 121.65399169921875, 8.592586517333984, -29.396011352539062, 81.75663757324219, -3.95361328125, 22.76660919189453, 37.236968994140625, -1.1230201721191406, 17.637657165527344, 72.8218994140625, 15.014175415039062, 11.031999588012695, 158.7764892578125, 9.591379165649414, 58.24620056152344, 2.7319412231445312, -24.764259338378906, 10.673341751098633, -31.99970245361328, -9.279716491699219, 53.19493103027344, 38.422157287597656, 25.43295669555664, 63.97300720214844, 173.73838806152344, 73.73570251464844, 36.65651321411133, 17.342864990234375, 19.46051025390625, 4.920806884765625, 30.082393646240234, 38.04676818847656, 81.29469299316406, 99.9983139038086, 5.2559661865234375, 12.476791381835938, 65.97190856933594, -6.095176696777344, 0.9330234527587891, 32.08808898925781, 86.35372924804688, 30.25063705444336, 10.37130355834961, 55.11131286621094, -7.7723236083984375, 41.88068771362305, 14.165655136108398, 65.66644287109375, 51.096282958984375, 21.615989685058594, 46.863502502441406, 3.4609031677246094, -20.79657745361328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000099.npy"} +{"epoch": 0.14537444933920704, "step": 100, "batch_size": 64, "mean": 18.931367874145508, "std": 35.162384033203125, "min": -73.22457885742188, "p10": -19.56691341400146, "median": 19.68370819091797, "p90": 63.28380966186525, "max": 139.45697021484375, "pos_frac": 0.765625, "sample": [53.64103698730469, 30.61054801940918, -6.158710479736328, 15.226375579833984, 22.705699920654297, -7.70367431640625, 33.02729797363281, 25.38829803466797, 7.2391815185546875, 39.7296142578125, 11.170814514160156, 0.9943103790283203, 37.77886962890625, 23.474334716796875, 4.55540657043457, 77.47323608398438, 19.58533477783203, -44.935516357421875, -29.993545532226562, -52.59576416015625, -14.095590591430664, -10.029121398925781, 1.4527606964111328, -61.118839263916016, 55.58650588989258, 20.049644470214844, 49.388824462890625, 24.28375244140625, 1.300058364868164, 6.270502090454102, 42.87212371826172, -3.2955169677734375, 44.85387420654297, 5.197620391845703, 66.98622131347656, 30.422821044921875, 8.690240859985352, 11.774772644042969, 60.065711975097656, 24.753570556640625, 40.19865036010742, 15.683296203613281, 9.116348266601562, 1.0902347564697266, 29.44281005859375, 34.283172607421875, -8.081817626953125, -26.36656951904297, -6.627403259277344, 66.0183334350586, 35.42095184326172, 68.95565032958984, 24.338937759399414, -73.22457885742188, 139.45697021484375, -10.896635055541992, 17.088531494140625, 77.97445678710938, -21.911766052246094, 64.66299438476562, 45.26934814453125, 7.203521728515625, 36.106971740722656, 19.782081604003906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000100.npy"} +{"epoch": 0.14684287812041116, "step": 101, "batch_size": 64, "mean": 23.0167293548584, "std": 39.5931396484375, "min": -51.72026824951172, "p10": -19.86616401672363, "median": 18.58244228363037, "p90": 82.38601226806648, "max": 132.80038452148438, "pos_frac": 0.6875, "sample": [95.43736267089844, 132.80038452148438, 30.541885375976562, 29.69957733154297, -13.151481628417969, 101.09708404541016, 106.13153076171875, 10.024703979492188, 89.53326416015625, 106.57405090332031, -35.298828125, 27.36260986328125, -37.23778533935547, 46.80702209472656, 12.906875610351562, -8.178672790527344, -21.251953125, 13.656991958618164, 59.97425079345703, 27.936134338378906, -14.314773559570312, 24.8126220703125, 50.92439270019531, 61.785804748535156, 18.868553161621094, 18.28673553466797, 8.941410064697266, 63.60047149658203, 113.84121704101562, 24.666015625, -7.04522705078125, -6.296430587768555, -13.736297607421875, 51.37710952758789, 18.29633140563965, 37.82289123535156, 65.70909118652344, 43.64143371582031, -7.235443115234375, 6.312381744384766, -2.2173023223876953, -16.63265609741211, 19.40552520751953, -7.233543395996094, 2.0147628784179688, -13.574407577514648, 33.471031188964844, 32.03117370605469, 8.863716125488281, 24.366079330444336, -51.72026824951172, 34.97963333129883, 2.2058258056640625, -29.627208709716797, 25.120899200439453, 17.102643966674805, -13.169567108154297, 38.752418518066406, 6.465324401855469, -1.5305824279785156, 50.967926025390625, -27.647396087646484, 38.523277282714844, -33.46989440917969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000101.npy"} +{"epoch": 0.14831130690161526, "step": 102, "batch_size": 64, "mean": 16.883743286132812, "std": 37.09708023071289, "min": -73.98828125, "p10": -21.15103149414062, "median": 9.967472076416016, "p90": 55.19505081176759, "max": 133.6350860595703, "pos_frac": 0.671875, "sample": [2.756593704223633, 39.22419738769531, 56.769508361816406, 10.657058715820312, 5.15057373046875, 7.034645080566406, -17.816635131835938, -43.193817138671875, 21.9775447845459, -70.7513427734375, 26.1451416015625, 85.02302551269531, -10.89431381225586, 37.84125518798828, 27.104183197021484, -8.636688232421875, 9.277885437011719, 33.54490661621094, 48.701683044433594, 0.3158149719238281, -14.013946533203125, -3.869699478149414, 46.15477752685547, -5.029417037963867, 56.18043518066406, 21.65985107421875, 36.060874938964844, 133.6350860595703, -3.5846481323242188, 0.3329620361328125, 107.65235137939453, -0.3299751281738281, 36.91121292114258, 41.05817413330078, 3.7324047088623047, -15.843521118164062, 31.015737533569336, -73.98828125, 0.537750244140625, -27.321060180664062, 1.2662277221679688, -42.03143310546875, -22.142959594726562, -3.2259597778320312, 52.89582061767578, 28.0380859375, 41.15081024169922, 79.41586303710938, 42.36891174316406, 9.096054077148438, 66.92464447021484, -26.328628540039062, -5.9426116943359375, 7.349140167236328, 52.076637268066406, -18.836532592773438, 38.19255828857422, 32.48523712158203, 24.17308235168457, 36.17295837402344, -12.180953979492188, -17.192337036132812, 41.01044464111328, 44.642337799072266], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000102.npy"} +{"epoch": 0.14977973568281938, "step": 103, "batch_size": 64, "mean": 33.756011962890625, "std": 40.74163055419922, "min": -34.49641418457031, "p10": -11.957221221923827, "median": 27.541614532470703, "p90": 78.47422332763672, "max": 174.6031494140625, "pos_frac": 0.796875, "sample": [-5.184345245361328, 8.287117004394531, 13.724794387817383, 32.87705993652344, -17.818222045898438, -23.816650390625, 33.03928756713867, 37.77623748779297, 13.706815719604492, 19.128284454345703, 27.710121154785156, 43.177093505859375, 16.502593994140625, 20.142532348632812, -2.1629791259765625, 64.95376586914062, 107.12918090820312, 35.38671875, 48.125511169433594, 59.09221649169922, -33.074432373046875, -11.998138427734375, 46.636077880859375, 27.37310791015625, -34.49641418457031, 73.60375213623047, 34.19061279296875, -16.43414306640625, 12.844833374023438, 19.51403045654297, 36.900657653808594, 158.03683471679688, 23.959640502929688, 19.130298614501953, 68.51431274414062, 65.43265533447266, -7.22515869140625, -11.861747741699219, 21.062963485717773, 17.218629837036133, -4.5569000244140625, 39.153167724609375, 60.14073181152344, -27.13763427734375, 21.797882080078125, 97.32389068603516, 77.82778930664062, 26.74835205078125, 174.6031494140625, 97.05746459960938, 101.77591705322266, 78.75126647949219, 11.769966125488281, 39.32244110107422, 65.17949676513672, -1.3384857177734375, 32.020957946777344, 4.157663345336914, 4.728275299072266, 49.72966766357422, 77.48888397216797, 16.516530990600586, 31.90618896484375, 44.31249237060547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000103.npy"} +{"epoch": 0.1512481644640235, "step": 104, "batch_size": 64, "mean": 32.083656311035156, "std": 46.35171127319336, "min": -62.71562194824219, "p10": -12.042958259582518, "median": 19.466812133789062, "p90": 88.77457122802736, "max": 183.248046875, "pos_frac": 0.75, "sample": [87.43367004394531, -7.685005187988281, 13.458240509033203, 14.482818603515625, 60.07841491699219, -4.0304718017578125, -25.836463928222656, 2.8988895416259766, -2.5141830444335938, 37.918479919433594, -12.594377517700195, 32.45500946044922, 144.2257080078125, 51.06584930419922, 83.61194610595703, -1.9167022705078125, 89.3492431640625, 31.881635665893555, -16.805702209472656, 111.6820068359375, 35.93046569824219, 6.592826843261719, 20.185192108154297, 8.165607452392578, 13.554815292358398, 2.280050277709961, 183.248046875, -43.34009552001953, 55.54338836669922, 129.1685791015625, 133.59683227539062, 28.94183349609375, 13.504217147827148, 64.18694305419922, 11.98689079284668, 14.013870239257812, -10.75631332397461, -21.46539306640625, 110.0950927734375, 15.19000244140625, 48.402915954589844, -4.89208984375, 66.69508361816406, -1.9049453735351562, 80.88896179199219, 38.302215576171875, 31.55756378173828, 50.07746505737305, 36.12687683105469, 29.332565307617188, 4.224554061889648, 87.32656860351562, 8.567329406738281, 27.099815368652344, 61.67625427246094, 65.65143585205078, -6.85029411315918, 27.6610164642334, -0.5220527648925781, 18.748432159423828, -21.520545959472656, 1.7035865783691406, -62.71562194824219, 7.935161590576172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000104.npy"} +{"epoch": 0.1527165932452276, "step": 105, "batch_size": 64, "mean": 45.658321380615234, "std": 40.5356559753418, "min": -62.70417785644531, "p10": 2.9770790100097666, "median": 42.83394241333008, "p90": 97.63106994628909, "max": 174.4459991455078, "pos_frac": 0.90625, "sample": [46.484588623046875, 31.374893188476562, 45.3277587890625, 28.131118774414062, 40.60984802246094, 42.707618713378906, 72.36871337890625, 107.63536834716797, 10.616388320922852, -62.70417785644531, -5.7277984619140625, 80.37422180175781, 81.59701538085938, 13.794866561889648, 34.28423309326172, 47.71176528930664, -32.13737487792969, 2.5869293212890625, -5.439666748046875, 106.0523910522461, 33.04206848144531, 102.48777770996094, 68.87461853027344, 20.180809020996094, 11.398361206054688, 80.91419219970703, 60.687767028808594, 42.96026611328125, 9.370172500610352, 37.58045196533203, 64.31044006347656, 158.11444091796875, 19.381866455078125, 50.74967956542969, 16.98822784423828, 17.433258056640625, 69.20417022705078, 89.32989501953125, 46.13714599609375, 16.375370025634766, 49.31647491455078, 90.08895874023438, 34.28777313232422, 37.9097900390625, 41.39543151855469, 51.27723693847656, -0.5826797485351562, 63.670257568359375, 67.90861511230469, 100.8634033203125, 35.057579040527344, 23.70117950439453, -17.80303955078125, 8.846160888671875, 63.22828674316406, 60.334228515625, 55.36261749267578, 106.416748046875, 58.5203857421875, 174.4459991455078, 24.85028839111328, 3.8874282836914062, 12.952526092529297, 75.02717590332031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000105.npy"} +{"epoch": 0.15418502202643172, "step": 106, "batch_size": 64, "mean": 28.547531127929688, "std": 35.17011260986328, "min": -53.068092346191406, "p10": -13.400667572021478, "median": 28.971057891845703, "p90": 74.28189849853516, "max": 131.30999755859375, "pos_frac": 0.84375, "sample": [28.161605834960938, 5.057191848754883, 7.942235946655273, -3.9114151000976562, 17.939029693603516, 1.3221149444580078, 49.726470947265625, -7.093025207519531, 30.84893035888672, 66.57478332519531, 35.872894287109375, 35.551414489746094, 3.0796852111816406, 73.329833984375, 24.83075714111328, 88.66896057128906, 6.20305061340332, 12.237190246582031, 74.68992614746094, 1.1338443756103516, 62.2325439453125, 40.28445816040039, 2.198486328125, 82.86776733398438, 23.668363571166992, 40.87659454345703, 7.636512756347656, -16.10394287109375, 52.480125427246094, -33.54309844970703, 46.10472106933594, 15.168281555175781, 39.68574142456055, 40.30021286010742, -2.951967239379883, 5.646217346191406, 23.022613525390625, 3.6230087280273438, 89.55967712402344, 62.62767791748047, 63.7762451171875, 24.24212646484375, -53.068092346191406, 4.471954345703125, 131.30999755859375, 35.606109619140625, 27.830551147460938, 34.30756378173828, 31.882369995117188, 7.4904632568359375, 0.6522083282470703, 56.400657653808594, 102.29015350341797, 33.94422912597656, 54.53515625, 56.02015686035156, -22.497879028320312, 31.528518676757812, -21.644927978515625, 29.78050994873047, 78.01158142089844, -40.64094543457031, -22.527626037597656, 45.79139709472656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000106.npy"} +{"epoch": 0.15565345080763582, "step": 107, "batch_size": 64, "mean": 32.29783630371094, "std": 41.55763626098633, "min": -86.01530456542969, "p10": -12.304551887512197, "median": 21.553399085998535, "p90": 83.75872192382813, "max": 137.59239196777344, "pos_frac": 0.859375, "sample": [82.08799743652344, 107.64443969726562, 46.73741149902344, 66.89472961425781, 24.354652404785156, 29.903587341308594, 50.25151062011719, -2.9845027923583984, -86.01530456542969, 8.377645492553711, 16.00848960876465, 76.28114318847656, -35.12029266357422, 28.02914047241211, 9.740686416625977, 6.7650909423828125, 15.276006698608398, 84.47474670410156, 1.9118385314941406, -26.383071899414062, 102.90975189208984, 66.99842834472656, 78.0601806640625, 5.95469856262207, -17.422691345214844, 4.129585266113281, -1.3209152221679688, 5.2672882080078125, 137.59239196777344, 72.93347930908203, 18.95364761352539, 17.178680419921875, 94.00615692138672, 43.99012756347656, 64.34393310546875, 85.34720611572266, 16.445837020874023, 27.49126434326172, 16.506650924682617, -16.298858642578125, 61.68971252441406, -46.067230224609375, 63.436241149902344, 74.81829833984375, 11.529502868652344, 80.69651794433594, 1.3683013916015625, 78.99052429199219, 49.85325622558594, 51.15544891357422, 22.01342010498047, 21.0933780670166, 2.3251800537109375, -27.06578826904297, 0.1507110595703125, 5.6333160400390625, 13.048847198486328, 70.07275390625, 9.5869140625, 23.7579288482666, 64.23854064941406, 95.33023071289062, 2.4152069091796875, 9.687545776367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000107.npy"} +{"epoch": 0.15712187958883994, "step": 108, "batch_size": 64, "mean": 38.376766204833984, "std": 55.82282257080078, "min": -59.97906494140625, "p10": -22.16678638458252, "median": 27.378726959228516, "p90": 118.28107833862308, "max": 206.72198486328125, "pos_frac": 0.765625, "sample": [35.01667785644531, 130.7605438232422, 38.14289855957031, -45.9859619140625, -59.97906494140625, 102.48583984375, 30.345014572143555, 8.859207153320312, 206.72198486328125, -0.4571075439453125, -11.389389038085938, 121.38001251220703, 7.198951721191406, 5.43682861328125, -5.294862747192383, 72.4368667602539, -6.634426116943359, 93.36231994628906, 156.15333557128906, 103.9257583618164, 111.05023193359375, 79.66761779785156, 1.1192646026611328, -21.171072006225586, 46.559669494628906, 10.705940246582031, 26.806915283203125, 15.173015594482422, 71.71768188476562, 4.664264678955078, 25.40924835205078, 0.6609764099121094, 70.34345245361328, -22.593521118164062, 168.54122924804688, -37.46467590332031, 78.31620788574219, -29.354503631591797, -14.995559692382812, 4.394926071166992, 26.441795349121094, 6.647674560546875, 47.46037292480469, 147.97335815429688, 32.64592742919922, 9.387470245361328, 84.35376739501953, 70.85951232910156, 28.463579177856445, -49.9498291015625, 15.118026733398438, 50.44145965576172, 122.46530151367188, 61.006980895996094, -0.8505630493164062, -1.1794071197509766, 37.96160125732422, 15.973834991455078, 72.17156982421875, -48.17026138305664, 27.950538635253906, 41.58021926879883, 62.21165466308594, 23.1116943359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000108.npy"} +{"epoch": 0.15859030837004406, "step": 109, "batch_size": 64, "mean": 41.38175964355469, "std": 48.83089828491211, "min": -79.39309692382812, "p10": -11.662704467773436, "median": 34.84614181518555, "p90": 104.10176391601563, "max": 179.07785034179688, "pos_frac": 0.828125, "sample": [49.24090576171875, 7.899898529052734, 129.82408142089844, 83.11917877197266, 30.346473693847656, 53.33671188354492, -55.32289123535156, 11.685935974121094, 17.655517578125, 49.203826904296875, 128.27197265625, 41.336483001708984, 72.25675964355469, 50.93424987792969, 53.20403289794922, 5.752176284790039, 21.220890045166016, 36.360313415527344, -12.829113006591797, -4.99659538269043, -6.680812835693359, 179.07785034179688, 0.4922618865966797, -9.503936767578125, 26.001089096069336, 4.483406066894531, 122.02677917480469, 30.659151077270508, 32.434112548828125, -37.22293472290039, 62.26594543457031, 65.31752014160156, 8.958629608154297, 66.35487365722656, 55.8416748046875, 37.65691375732422, 96.78108215332031, 69.89212799072266, 104.73174285888672, 102.6318130493164, -0.7390365600585938, 12.418113708496094, -79.39309692382812, 29.40721893310547, 1.395263671875, 71.51016998291016, 32.74705505371094, 33.184322357177734, 19.12349510192871, 82.38372802734375, -46.870819091796875, 35.35858154296875, 35.89296340942383, 107.43870544433594, 124.49113464355469, 34.333702087402344, 84.4215316772461, 34.13954162597656, 86.75625610351562, 102.01927947998047, -12.587890625, -21.891372680664062, 3.340608596801758, 98.85311126708984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000109.npy"} +{"epoch": 0.16005873715124816, "step": 110, "batch_size": 64, "mean": 37.423946380615234, "std": 52.068763732910156, "min": -94.5721435546875, "p10": -14.092668533325194, "median": 32.070075035095215, "p90": 107.83451538085939, "max": 176.31283569335938, "pos_frac": 0.828125, "sample": [114.00843811035156, 52.0699462890625, 22.999530792236328, 52.83551025390625, 56.32288360595703, 35.628929138183594, 48.04110336303711, 3.3814010620117188, -94.5721435546875, 44.414466857910156, 65.8060302734375, 15.476913452148438, -61.7315673828125, 18.090051651000977, 13.52627182006836, 1.1153278350830078, 29.711877822875977, 164.9337158203125, 103.74380493164062, 26.441314697265625, 4.930149078369141, -8.103763580322266, 176.31283569335938, 129.9278564453125, 98.12101745605469, 76.32437133789062, 17.389602661132812, -15.939903259277344, -11.93703842163086, 59.807186126708984, -41.29008483886719, -15.016510009765625, 9.61532974243164, 19.11534309387207, 5.82904052734375, 155.37130737304688, 52.49598693847656, 83.5357666015625, 109.58767700195312, 4.5007171630859375, 75.41921997070312, -7.068085670471191, 38.25730895996094, 34.99004364013672, 13.688289642333984, 12.19970703125, 76.75664520263672, -57.3524169921875, 71.83248901367188, -24.278427124023438, 39.56952667236328, 1.9475135803222656, 54.256797790527344, 34.42827224731445, 48.609859466552734, 43.64099884033203, 19.726526260375977, 19.636924743652344, 18.93033218383789, 43.233314514160156, 77.54350280761719, 2.1946544647216797, -4.384464263916016, 138.53329467773438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000110.npy"} +{"epoch": 0.16152716593245228, "step": 111, "batch_size": 64, "mean": 42.30280303955078, "std": 57.86744689941406, "min": -81.54879760742188, "p10": -31.243550109863275, "median": 33.0509033203125, "p90": 114.88379058837891, "max": 189.19546508789062, "pos_frac": 0.765625, "sample": [189.19546508789062, -49.60833740234375, -81.54879760742188, 115.70333862304688, -7.380222320556641, 53.20487976074219, 12.846473693847656, 33.27002716064453, 26.59933090209961, 141.8157196044922, 31.617637634277344, 76.82403564453125, 42.526084899902344, 54.54083251953125, 27.798126220703125, 106.94366455078125, 109.8185806274414, -34.459747314453125, 97.66365051269531, 8.524980545043945, 77.51273345947266, -24.94915771484375, -22.102977752685547, -10.41168212890625, 64.48579406738281, 112.97151184082031, -6.050079345703125, 109.94297790527344, -33.94114685058594, 11.028617858886719, -22.845428466796875, 32.83177947998047, 129.45465087890625, 22.362930297851562, 144.93984985351562, 54.753631591796875, 30.359111785888672, 53.42318344116211, 66.23787689208984, 128.5244140625, 23.914459228515625, -41.78724670410156, 19.252788543701172, 26.98868179321289, 135.6851806640625, 68.78419494628906, 28.680023193359375, 57.13935852050781, 108.25798034667969, 2.3196964263916016, 109.81843566894531, 36.71337890625, 91.69222259521484, 69.02365112304688, 50.781532287597656, 13.32756233215332, -80.9703369140625, 31.485084533691406, 27.17033576965332, 40.36967468261719, -5.305706024169922, 90.83354187011719, -61.00807189941406, -10.211288452148438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000111.npy"} +{"epoch": 0.16299559471365638, "step": 112, "batch_size": 64, "mean": 27.042510986328125, "std": 39.837196350097656, "min": -58.46979522705078, "p10": -24.0540771484375, "median": 25.093753814697266, "p90": 88.50456390380862, "max": 119.09957885742188, "pos_frac": 0.765625, "sample": [43.102115631103516, 119.09957885742188, 84.14384460449219, -5.904844284057617, -42.15517807006836, 32.640384674072266, 36.221561431884766, 29.38568115234375, 27.977928161621094, -5.969642639160156, 96.53790283203125, 78.94606018066406, 0.16957664489746094, -2.4005508422851562, 90.37344360351562, 60.28194808959961, 38.957759857177734, 55.074913024902344, 38.66630172729492, 35.512786865234375, 44.158294677734375, 32.20830535888672, 67.93315124511719, -14.675193786621094, 25.156448364257812, 21.920957565307617, 29.774681091308594, 31.77138900756836, -23.306129455566406, 17.381145477294922, 3.490732192993164, -33.175724029541016, 29.55518341064453, 106.36698150634766, 109.36446380615234, -31.550399780273438, 40.2030029296875, 15.063671112060547, 69.2110595703125, 0.5396709442138672, -58.46979522705078, 94.40007781982422, 24.48564910888672, 13.014778137207031, -11.184951782226562, 21.209644317626953, 61.699188232421875, 0.5886764526367188, 14.882743835449219, 96.10386657714844, 25.03105926513672, 9.140644073486328, 43.78784942626953, -40.681549072265625, 11.67170524597168, -24.37462615966797, -25.574424743652344, -10.302543640136719, 72.18505859375, 0.9461498260498047, 23.956586837768555, 5.245445251464844, -8.49078369140625, 39.39708709716797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000112.npy"} +{"epoch": 0.1644640234948605, "step": 113, "batch_size": 64, "mean": 29.26871109008789, "std": 37.84221267700195, "min": -39.223541259765625, "p10": -15.624301338195798, "median": 30.973909378051758, "p90": 81.2593894958496, "max": 116.9789047241211, "pos_frac": 0.734375, "sample": [-31.248046875, 74.07969665527344, -17.29705047607422, 87.2779541015625, 9.124191284179688, 37.88218688964844, 14.383920669555664, 36.349456787109375, 4.254243850708008, -8.934333801269531, 8.5811767578125, 83.22268676757812, 54.51631164550781, 108.43536376953125, 37.54319763183594, -18.122663497924805, -6.19061279296875, -8.438880920410156, 8.657447814941406, 65.94467163085938, -20.769729614257812, 47.00849914550781, 78.63824462890625, -21.50738525390625, 53.611961364746094, -7.341705322265625, 3.871358871459961, 116.9789047241211, 46.707054138183594, -39.223541259765625, 18.499671936035156, 74.34237670898438, 34.08088302612305, -11.014892578125, 35.80747985839844, 51.38391876220703, 13.737464904785156, 81.3922348022461, 19.888931274414062, -10.370758056640625, 51.49054718017578, 70.24627685546875, -11.721220016479492, 1.319234848022461, -26.138778686523438, 44.79397964477539, 35.17298126220703, 39.85040283203125, -9.897865295410156, 15.030462265014648, 111.54217529296875, -2.4377098083496094, 33.00356674194336, 32.79298400878906, 97.82955169677734, 80.94941711425781, 32.65266799926758, 62.01026916503906, -6.966501235961914, 11.262466430664062, 12.263275146484375, 15.16055679321289, 29.295150756835938, 47.951751708984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000113.npy"} +{"epoch": 0.16593245227606462, "step": 114, "batch_size": 64, "mean": 46.41866683959961, "std": 63.81900405883789, "min": -40.19544219970703, "p10": -26.334640502929688, "median": 42.48588562011719, "p90": 108.92114639282231, "max": 345.22332763671875, "pos_frac": 0.765625, "sample": [175.47457885742188, 89.7815933227539, 51.97412109375, 47.39861297607422, 26.39226531982422, 116.13877868652344, 19.412641525268555, 45.7530517578125, -37.340301513671875, 65.76534271240234, -30.824270248413086, 69.73184204101562, 44.423912048339844, 20.46508026123047, -37.50885772705078, 83.8590087890625, 14.447261810302734, -31.550804138183594, 33.5517578125, 120.24971008300781, 13.566003799438477, 54.37975311279297, -7.141845703125, 113.09525299072266, 40.35858917236328, 59.11016845703125, 34.07942199707031, 24.550739288330078, 37.94245910644531, 60.617549896240234, 63.83507537841797, -5.8095550537109375, -9.16729736328125, 56.287315368652344, -24.89471435546875, 15.258050918579102, 49.17717742919922, 68.13323974609375, 42.21647644042969, 99.18156433105469, 58.92233657836914, 87.86813354492188, 5.547904968261719, 49.70307922363281, 37.505313873291016, -12.045318603515625, 232.7224884033203, 6.414785385131836, 42.75529479980469, 139.67669677734375, -9.669927597045898, 71.54861450195312, -40.19544219970703, -26.951751708984375, 345.22332763671875, 56.97404861450195, -3.5288467407226562, 84.75015258789062, 33.81144714355469, -31.93661117553711, 26.18958282470703, 86.28089904785156, -17.447246551513672, 74.30500793457031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000114.npy"} +{"epoch": 0.16740088105726872, "step": 115, "batch_size": 64, "mean": 32.142608642578125, "std": 64.82154083251953, "min": -149.059814453125, "p10": -27.917952728271484, "median": 19.263816833496094, "p90": 124.3146705627442, "max": 235.79763793945312, "pos_frac": 0.75, "sample": [0.5685615539550781, -2.8128280639648438, 130.42262268066406, 59.01904296875, -50.115631103515625, -0.38089752197265625, -45.125450134277344, 10.397706985473633, 13.026958465576172, 18.00251007080078, 25.12104034423828, -8.158218383789062, 42.84785842895508, 56.098304748535156, 10.052793502807617, 165.8162384033203, 21.326066970825195, -7.1557769775390625, 235.79763793945312, -11.25996208190918, 73.3510513305664, 7.5061492919921875, 137.34857177734375, 69.29875946044922, 169.51876831054688, -29.228057861328125, 22.959793090820312, 25.883487701416016, 7.98914909362793, 96.20433044433594, 172.5381317138672, 97.502197265625, 0.6962738037109375, -119.0702133178711, 9.221691131591797, 34.27350616455078, 57.79851531982422, 16.706035614013672, 9.19549560546875, 42.61400604248047, 27.535720825195312, 49.14842224121094, 25.878524780273438, 29.398460388183594, 140.52450561523438, -27.250396728515625, 21.219188690185547, 14.985015869140625, -8.238815307617188, 12.320610046386719, 20.525123596191406, 77.71673583984375, 3.1708450317382812, 15.206829071044922, -20.180782318115234, 110.06278228759766, 65.14149475097656, -15.810569763183594, 63.18345642089844, -149.059814453125, 106.572509765625, -47.290428161621094, 4.775320053100586, -28.20404815673828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000115.npy"} +{"epoch": 0.16886930983847284, "step": 116, "batch_size": 64, "mean": 35.38352966308594, "std": 45.090545654296875, "min": -35.362083435058594, "p10": -15.227689552307128, "median": 30.305294036865234, "p90": 89.13259277343751, "max": 208.32237243652344, "pos_frac": 0.78125, "sample": [7.563470840454102, 3.5611610412597656, -32.37331771850586, -30.20110321044922, 25.598093032836914, 44.71342468261719, 60.4840087890625, 23.27397918701172, 47.57246398925781, 56.62689208984375, 29.370216369628906, -10.115358352661133, 41.11930847167969, 27.82665252685547, 7.975467681884766, -6.34208869934082, 53.70311737060547, -35.362083435058594, 81.39515686035156, 86.64601135253906, 39.036842346191406, -16.012969970703125, 22.723709106445312, 16.984947204589844, 48.07250213623047, 25.277503967285156, 73.92929077148438, 104.75979614257812, 32.048065185546875, -32.31194305419922, 119.66232299804688, 35.61064910888672, 22.40111541748047, 42.681976318359375, -13.395368576049805, -26.6180419921875, 6.500820159912109, 8.3065185546875, -2.1711597442626953, 98.15087890625, 5.270849227905273, 128.34193420410156, 31.240371704101562, 87.7708740234375, 79.14398193359375, 33.95980453491211, 55.109092712402344, -30.543167114257812, -8.207998275756836, 80.15241241455078, 23.974376678466797, -6.177515029907227, 208.32237243652344, 64.25285339355469, 11.9959716796875, 49.97285461425781, 67.22525024414062, 62.20280838012695, -10.485605239868164, 14.701271057128906, 1.0024871826171875, 40.88456726074219, 96.04701232910156, 89.7161865234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000116.npy"} +{"epoch": 0.17033773861967694, "step": 117, "batch_size": 64, "mean": 28.79342269897461, "std": 50.358863830566406, "min": -98.87026977539062, "p10": -25.984368896484366, "median": 23.020792961120605, "p90": 85.40611572265627, "max": 183.33319091796875, "pos_frac": 0.75, "sample": [13.001102447509766, 53.21277618408203, 49.96458435058594, -15.637187957763672, 21.37879753112793, -2.6282119750976562, 19.21799659729004, 38.93675231933594, 20.393821716308594, 39.491600036621094, 42.86838912963867, 66.21492004394531, 130.419677734375, 111.9403076171875, 17.232017517089844, 45.16474914550781, -58.98857116699219, 87.40318298339844, 7.917022705078125, -95.8043212890625, -30.284912109375, 8.188629150390625, 15.7161865234375, 62.61274337768555, 43.69197463989258, -15.94976806640625, -31.560073852539062, 3.819478988647461, 24.66278839111328, -13.638175964355469, 38.355628967285156, 119.65565490722656, -4.211250305175781, -0.76898193359375, 145.89633178710938, 14.564208984375, 183.33319091796875, 15.419815063476562, 29.919822692871094, 62.52771759033203, 14.234813690185547, 8.651447296142578, 8.328969955444336, -9.277915954589844, -98.87026977539062, 30.340606689453125, 79.48307800292969, 31.246994018554688, 88.78496551513672, 37.36378479003906, -47.130340576171875, -9.502325057983398, 6.501125335693359, 54.7862663269043, 80.74629211425781, 78.79981994628906, 6.0871124267578125, 49.042755126953125, 69.64988708496094, 27.835540771484375, 43.302886962890625, -1.8251571655273438, -37.45878601074219, 68.00720977783203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000117.npy"} +{"epoch": 0.17180616740088106, "step": 118, "batch_size": 64, "mean": 35.48102569580078, "std": 45.978145599365234, "min": -54.203582763671875, "p10": -11.347178268432614, "median": 23.785919189453125, "p90": 108.28695220947269, "max": 155.19317626953125, "pos_frac": 0.78125, "sample": [-7.45147705078125, 21.063804626464844, -0.2848949432373047, -25.508071899414062, 19.62920379638672, 16.910167694091797, 60.19553756713867, 42.419471740722656, 47.86420440673828, 112.78955078125, 85.54011535644531, 23.531524658203125, 122.12503051757812, 30.55451202392578, -6.9435882568359375, 54.131309509277344, -16.156909942626953, -12.320144653320312, 32.93107223510742, 155.19317626953125, 120.52243041992188, -9.076923370361328, -54.203582763671875, 2.8790283203125, -38.83384704589844, 19.58254051208496, 2.100004196166992, 33.2315673828125, 13.790546417236328, 13.370254516601562, 11.604660034179688, 73.63341522216797, 2.5700836181640625, 74.68041229248047, 25.34778594970703, 48.76948547363281, 21.719642639160156, 40.270477294921875, 146.8447265625, -2.8382911682128906, 29.970001220703125, -17.987651824951172, -3.2522430419921875, 40.33045959472656, 142.97894287109375, 83.82117462158203, 111.45472717285156, 43.387420654296875, 77.76007843017578, 14.544729232788086, 1.074462890625, 66.31239318847656, 100.89547729492188, 69.80858612060547, 49.369468688964844, 24.040313720703125, 35.80571746826172, 17.161651611328125, 0.010175704956054688, 23.320419311523438, -21.91374969482422, 81.18154907226562, 3.1093292236328125, -4.575828552246094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000118.npy"} +{"epoch": 0.17327459618208516, "step": 119, "batch_size": 64, "mean": 34.964866638183594, "std": 55.77119827270508, "min": -69.93711853027344, "p10": -30.386153411865234, "median": 35.01447868347168, "p90": 96.02911682128908, "max": 207.301025390625, "pos_frac": 0.671875, "sample": [13.827661514282227, 56.305511474609375, 38.215362548828125, 142.07208251953125, 39.6502685546875, -31.554977416992188, -8.039497375488281, 50.256622314453125, -5.573780059814453, 50.441139221191406, -9.300148010253906, -11.382667541503906, 207.301025390625, 48.612144470214844, 18.338729858398438, 121.33074951171875, 22.927234649658203, 9.993587493896484, 181.18296813964844, 48.06111145019531, 90.82337951660156, 73.72306823730469, 74.2200698852539, -5.346393585205078, 29.97917938232422, 34.529808044433594, -32.521995544433594, 18.241546630859375, 53.84953308105469, 56.0277099609375, -0.222015380859375, -4.448219299316406, 98.26014709472656, 36.88920211791992, 169.03964233398438, -4.3070526123046875, 64.6068344116211, 78.85726928710938, -2.352121353149414, 26.937461853027344, -55.29292297363281, 42.68274688720703, -57.5931396484375, 128.967041015625, 50.73919677734375, 65.79779815673828, -23.785919189453125, -30.796463012695312, 84.11617279052734, 35.499149322509766, -29.42876434326172, -69.93711853027344, -2.6151046752929688, 83.23567199707031, 17.370513916015625, 56.6740837097168, 49.997596740722656, 9.944513320922852, 65.76481628417969, -6.874094009399414, -42.89453887939453, 3.7375946044921875, 41.708404541015625, -18.71808624267578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000119.npy"} +{"epoch": 0.17474302496328928, "step": 120, "batch_size": 64, "mean": 48.279685974121094, "std": 59.23860549926758, "min": -80.04412841796875, "p10": -19.45223960876464, "median": 38.3485107421875, "p90": 133.17235412597657, "max": 190.67474365234375, "pos_frac": 0.84375, "sample": [151.5937957763672, 84.99212646484375, 32.9530029296875, 110.66830444335938, 36.977359771728516, 10.088586807250977, -63.26861572265625, 99.52027893066406, 20.053546905517578, 73.2657241821289, 52.6875, 161.76712036132812, 79.74446868896484, 6.668464660644531, 17.44566535949707, 17.355438232421875, 135.03848266601562, 56.735443115234375, 69.15105438232422, -41.56682586669922, -40.78120422363281, 128.81805419921875, 40.107574462890625, 32.94575500488281, 42.21105194091797, 36.86936950683594, 47.11210632324219, 6.474773406982422, 160.03128051757812, -80.04412841796875, -23.190032958984375, 37.74946594238281, 24.20482635498047, 3.8371849060058594, 103.20256042480469, 9.35032844543457, 190.67474365234375, 53.57662582397461, 69.72830200195312, 3.395986557006836, 11.57375717163086, -5.601861953735352, 54.303382873535156, 182.42266845703125, 34.83433532714844, 43.90411376953125, 126.36116027832031, 172.009521484375, 38.94755554199219, 47.072669982910156, 0.33863067626953125, 87.3635025024414, -42.39158630371094, 18.556236267089844, 63.610084533691406, 36.176513671875, -2.764942169189453, 110.66671752929688, 74.51393127441406, -11.699146270751953, 75.25942993164062, -22.774993896484375, 36.188385009765625, 2.8841400146484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000120.npy"} +{"epoch": 0.1762114537444934, "step": 121, "batch_size": 64, "mean": 53.492347717285156, "std": 69.47612762451172, "min": -147.92068481445312, "p10": -10.001323318481441, "median": 48.22388458251953, "p90": 149.72737426757814, "max": 215.3943328857422, "pos_frac": 0.78125, "sample": [8.579057693481445, -5.361457824707031, -1.762298583984375, 215.3943328857422, -6.449474334716797, 10.135932922363281, 94.50265502929688, -16.10687828063965, 77.89765167236328, 76.36898040771484, -0.061279296875, 62.217384338378906, 136.93592834472656, 107.77368927001953, 78.64398193359375, 0.04378509521484375, 88.31130981445312, 65.37849426269531, 53.240516662597656, -40.808876037597656, 36.84595489501953, 113.94789123535156, -59.056602478027344, 145.8870086669922, 167.3637237548828, 64.42304992675781, 33.570350646972656, 16.025257110595703, 111.0394287109375, 3.0889320373535156, 76.85639953613281, -21.46167755126953, 47.989776611328125, 83.22142028808594, 1.3088359832763672, 199.20745849609375, 4.889900207519531, 179.17520141601562, 45.096221923828125, 203.6339111328125, 9.261314392089844, 125.73896026611328, -11.523544311523438, 15.926422119140625, -25.766082763671875, 15.431632995605469, 73.89877319335938, 23.170806884765625, -6.1299591064453125, 55.042510986328125, 4.671548843383789, 19.358863830566406, 63.99098205566406, 130.0633544921875, 151.3732452392578, 70.72348022460938, -147.92068481445312, 56.372528076171875, -5.191802978515625, 76.67581176757812, 205.30029296875, 19.408254623413086, -2.750457763671875, 48.45799255371094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000121.npy"} +{"epoch": 0.1776798825256975, "step": 122, "batch_size": 64, "mean": 66.51847839355469, "std": 65.45465850830078, "min": -89.48963928222656, "p10": 0.6307792663574229, "median": 51.97062683105469, "p90": 158.50116119384765, "max": 232.6812744140625, "pos_frac": 0.90625, "sample": [147.26353454589844, 21.74146270751953, 74.50577545166016, 80.92092895507812, 18.877058029174805, 26.95111083984375, 50.422821044921875, 95.0481948852539, 22.31008529663086, 114.14991760253906, 15.720836639404297, 128.04998779296875, 122.2857894897461, 232.6812744140625, 102.1299819946289, 34.43830108642578, 16.67131805419922, 56.03797149658203, 158.5195770263672, 48.90458679199219, 19.613401412963867, 140.0196533203125, 35.24557876586914, -10.219047546386719, 81.62562561035156, 12.037429809570312, 62.98876953125, 140.84872436523438, 74.62419891357422, 65.02352905273438, 74.42150115966797, -25.62993621826172, 170.82058715820312, -7.273839950561523, 52.95196533203125, 158.45819091796875, 62.84857177734375, 25.010337829589844, 171.93899536132812, 154.9817352294922, 9.845113754272461, 50.989288330078125, -34.011966705322266, 228.75535583496094, 18.89740753173828, 40.25577926635742, 31.355430603027344, 42.75746536254883, 25.070884704589844, -89.48963928222656, -7.3970489501953125, 27.640762329101562, 30.57774543762207, 67.12137603759766, 196.99237060546875, 1.5966224670410156, 114.55239868164062, 53.76142120361328, 0.21684646606445312, 13.852088928222656, 161.9410400390625, 74.64424133300781, 19.895309448242188, 149.3961181640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000122.npy"} +{"epoch": 0.17914831130690162, "step": 123, "batch_size": 64, "mean": 43.66535186767578, "std": 72.58432006835938, "min": -149.0946044921875, "p10": -25.914376640319823, "median": 22.062121391296387, "p90": 141.342221069336, "max": 232.0101318359375, "pos_frac": 0.671875, "sample": [77.86064910888672, 81.28756713867188, -4.355216979980469, 9.003898620605469, 88.48408508300781, 75.94435119628906, 117.9527587890625, 109.91635131835938, 78.90876007080078, 48.02960968017578, -0.4599952697753906, -20.62835693359375, 7.753009796142578, -39.903038024902344, 154.3282928466797, 85.12107849121094, 16.459575653076172, -12.908889770507812, -3.4599456787109375, 65.64547729492188, -5.582366943359375, 120.56692504882812, 64.68882751464844, -75.12178039550781, 21.427772521972656, 95.52398681640625, 212.4322509765625, 22.022132873535156, 150.04776000976562, 163.12646484375, -26.110986709594727, 62.798927307128906, 20.430431365966797, 8.658775329589844, 83.41788482666016, 121.029296875, 20.446151733398438, 47.73419189453125, 175.63429260253906, -20.924819946289062, 91.53128051757812, 84.12788391113281, -41.83488845825195, -66.40733337402344, 74.68095397949219, -7.7734375, -39.42961502075195, -4.532110214233398, -17.79187774658203, 232.0101318359375, -25.45561981201172, 68.4447021484375, 50.50145721435547, 204.12686157226562, -9.050430297851562, 22.102109909057617, 118.84306335449219, 34.80408477783203, -149.0946044921875, -15.401374816894531, 0.18337249755859375, -9.567272186279297, 0.3914318084716797, 1.9475536346435547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000123.npy"} +{"epoch": 0.18061674008810572, "step": 124, "batch_size": 64, "mean": 63.998435974121094, "std": 94.07720184326172, "min": -119.12956237792969, "p10": -26.179146194458006, "median": 41.06078338623047, "p90": 175.1367126464844, "max": 372.7012634277344, "pos_frac": 0.78125, "sample": [144.74769592285156, -25.517013549804688, 41.26829528808594, 32.93009948730469, 25.883007049560547, 148.1160125732422, -98.45211791992188, -9.209434509277344, 159.98907470703125, 185.93206787109375, -2.8145523071289062, 29.603641510009766, 4.553615570068359, 68.3023910522461, 76.27111053466797, 229.95986938476562, 154.96302795410156, 51.17820358276367, -18.887741088867188, 48.859840393066406, -119.12956237792969, -41.93201446533203, 139.41217041015625, -1.9359550476074219, 201.43316650390625, 128.47735595703125, 75.85931396484375, -5.264129638671875, 46.445709228515625, 118.68771362304688, -26.46291732788086, 33.77312088012695, 86.004150390625, 15.275468826293945, 11.308807373046875, 344.79986572265625, 100.45320129394531, 177.6453857421875, 29.148574829101562, 169.28314208984375, 103.60492706298828, -49.402366638183594, 16.456790924072266, -19.229843139648438, 39.01728057861328, 121.63018798828125, -100.01632690429688, 98.72506713867188, 20.68976402282715, 40.853271484375, 38.946537017822266, 75.20694732666016, -64.87773132324219, 248.55093383789062, 23.661346435546875, 372.7012634277344, 46.3978271484375, 165.05966186523438, 1.3960514068603516, 32.42432403564453, 52.15596008300781, 75.74846649169922, 14.182975769042969, 11.056732177734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000124.npy"} +{"epoch": 0.18208516886930984, "step": 125, "batch_size": 64, "mean": 57.12309646606445, "std": 76.90096282958984, "min": -88.54559326171875, "p10": -27.918128967285153, "median": 33.050578117370605, "p90": 162.3137603759766, "max": 249.46945190429688, "pos_frac": 0.78125, "sample": [-3.4563846588134766, 147.55577087402344, 89.96006774902344, 22.595367431640625, -12.869747161865234, 9.742755889892578, 111.20695495605469, -25.959579467773438, 140.22933959960938, 16.514638900756836, 10.723758697509766, -28.75750732421875, 24.44466209411621, 1.0699615478515625, 102.42084503173828, 108.15167236328125, 86.22364807128906, 53.98322296142578, 18.35262107849121, 4.904998779296875, 105.81422424316406, 18.57978057861328, -54.013702392578125, -13.102630615234375, 90.35135650634766, 249.46945190429688, -35.17326354980469, 31.088743209838867, 105.56912231445312, 35.42174530029297, -5.5638580322265625, -80.78047943115234, 35.012413024902344, 16.348875045776367, 30.524917602539062, 207.3389434814453, 6.288860321044922, 204.66021728515625, 120.28335571289062, -23.36774444580078, 11.700634002685547, 103.51644897460938, 186.00390625, 91.08399200439453, 226.95062255859375, 90.62046813964844, 55.51795196533203, 46.565242767333984, 166.42666625976562, 51.01831817626953, 11.842849731445312, 126.99229431152344, 41.708152770996094, -15.71412467956543, 108.35075378417969, 26.762020111083984, -41.37818908691406, 149.5689239501953, 30.67439079284668, -88.54559326171875, 207.15489196777344, 152.71697998046875, -33.287384033203125, 27.84044075012207], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000125.npy"} +{"epoch": 0.18355359765051396, "step": 126, "batch_size": 64, "mean": 53.1960334777832, "std": 70.9228286743164, "min": -77.41954803466797, "p10": -23.095580291748043, "median": 38.00962257385254, "p90": 156.21297149658204, "max": 245.445068359375, "pos_frac": 0.828125, "sample": [118.616943359375, 64.79252624511719, 12.185300827026367, 89.56005096435547, 13.133941650390625, 50.06450653076172, -37.78886413574219, 2.185688018798828, 58.89576721191406, 154.96371459960938, -50.80354309082031, 1.9590644836425781, 245.445068359375, 19.22516632080078, 84.37032318115234, 0.5326156616210938, 131.15719604492188, 41.753631591796875, 127.56396484375, 65.03825378417969, 44.72705841064453, 50.96043014526367, -8.222625732421875, 36.39847183227539, 64.52332305908203, 184.4442138671875, -19.700485229492188, 156.7483673095703, 32.567474365234375, 46.460540771484375, 20.651187896728516, 25.11547088623047, 71.87484741210938, 15.159408569335938, 177.42388916015625, 39.62077331542969, 57.533241271972656, -73.56925964355469, 145.95596313476562, -15.017337799072266, -59.38719177246094, -34.59523010253906, 233.45758056640625, -3.1742687225341797, 26.545507431030273, 9.991275787353516, 2.0450401306152344, 20.2042236328125, 121.14830017089844, 114.54144287109375, -77.41954803466797, 188.41075134277344, 83.86763000488281, 23.0760498046875, 35.22871398925781, 93.02436828613281, 30.999099731445312, 62.931556701660156, 17.115055084228516, -24.550621032714844, 13.557357788085938, 91.29786682128906, 12.416091918945312, 177.308837890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000126.npy"} +{"epoch": 0.18502202643171806, "step": 127, "batch_size": 64, "mean": 39.201202392578125, "std": 61.68424606323242, "min": -123.87353515625, "p10": -34.150109100341794, "median": 40.75104904174805, "p90": 115.02199325561524, "max": 180.01454162597656, "pos_frac": 0.78125, "sample": [54.00200653076172, 79.53555297851562, -8.205238342285156, 37.601158142089844, 2.912353515625, -91.50936889648438, 149.1126708984375, 7.3769378662109375, 99.19518280029297, -123.87353515625, 8.651906967163086, 61.345069885253906, 74.55982971191406, -33.97406768798828, 18.357341766357422, 35.77482604980469, 92.4933090209961, 10.262151718139648, -120.54515075683594, 72.50056457519531, 24.63189697265625, 72.41822814941406, 125.28152465820312, 20.585098266601562, 67.71914672851562, 165.38015747070312, 20.598987579345703, 82.99273681640625, -18.868858337402344, 14.898231506347656, 120.00728607177734, 76.6742935180664, 107.00440979003906, 24.93598175048828, 51.30525207519531, 147.3843231201172, -2.0030784606933594, -18.40930938720703, 20.49215316772461, -1.3974990844726562, 16.660327911376953, 112.8416976928711, 9.129032135009766, 75.24889373779297, 46.32777404785156, 62.122642517089844, 44.98352813720703, 115.95640563964844, 107.5722427368164, 51.813804626464844, -44.29737854003906, 47.68107604980469, 19.372621536254883, 180.01454162597656, -59.55956268310547, 61.318634033203125, 45.076805114746094, 92.07560729980469, 43.90093994140625, 12.826480865478516, 32.86986541748047, -23.789588928222656, -34.248313903808594, -34.225555419921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000127.npy"} +{"epoch": 0.18649045521292218, "step": 128, "batch_size": 64, "mean": 41.13318634033203, "std": 72.4039077758789, "min": -167.17593383789062, "p10": -47.46497650146483, "median": 36.08229064941406, "p90": 142.5253662109375, "max": 247.1139678955078, "pos_frac": 0.703125, "sample": [46.87533187866211, 43.27452087402344, 131.6289520263672, -12.6650390625, 89.09992980957031, -58.98446273803711, 99.6068115234375, 142.35626220703125, 35.21929931640625, 42.66004943847656, 161.88424682617188, 50.6419677734375, 47.95170593261719, 34.86442565917969, -83.51541900634766, 97.69987487792969, 148.38284301757812, 4.652956008911133, 0.5356922149658203, 75.56640625, -70.23863220214844, -4.469856262207031, 247.1139678955078, -167.17593383789062, 145.40823364257812, 48.032066345214844, 161.23968505859375, 142.59783935546875, 65.4847640991211, 49.45661926269531, -9.346179962158203, -57.15806579589844, 102.6694564819336, -4.028776168823242, -15.864601135253906, 19.196517944335938, -76.86253356933594, -3.075542449951172, 32.671485900878906, 47.65726089477539, 85.71825408935547, -39.27391052246094, 54.747772216796875, -23.830886840820312, -7.916757583618164, 145.06187438964844, 31.727489471435547, 36.945281982421875, 22.14186668395996, 50.23838424682617, 23.85118865966797, -8.306524276733398, 123.94021606445312, -1.275482177734375, 12.790842056274414, 1.35504150390625, -10.583351135253906, 54.134483337402344, 101.49078369140625, 12.008752822875977, -50.975433349609375, 126.27015686035156, 139.99972534179688, 1.2201881408691406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000128.npy"} +{"epoch": 0.18795888399412627, "step": 129, "batch_size": 64, "mean": 43.44950866699219, "std": 82.75946807861328, "min": -139.7572021484375, "p10": -41.58316268920898, "median": 30.585763931274414, "p90": 145.14446716308595, "max": 356.43365478515625, "pos_frac": 0.65625, "sample": [3.7278690338134766, 32.30472183227539, -45.38186264038086, 108.88298034667969, -46.67523193359375, 117.70443725585938, 11.070579528808594, -41.61909484863281, 166.58172607421875, 26.72425079345703, -10.698410034179688, 104.9825439453125, 25.52762222290039, 42.206329345703125, 56.790523529052734, 72.37760162353516, 146.31484985351562, -36.32959747314453, -32.447998046875, 54.58850860595703, 212.82275390625, 89.2205810546875, 216.13925170898438, -92.40204620361328, 87.72871398925781, -87.68701171875, -41.49932098388672, 39.497100830078125, 356.43365478515625, 142.38070678710938, 28.32685661315918, 16.821502685546875, -2.4170989990234375, 75.83228302001953, -10.483444213867188, -15.500621795654297, 28.866806030273438, 121.22671508789062, 28.397499084472656, -14.961837768554688, -70.78295135498047, -8.201452255249023, 46.39826965332031, 80.7315673828125, 147.03273010253906, -139.7572021484375, 15.359861373901367, 19.263629913330078, -7.836088180541992, -7.576961517333984, 131.96092224121094, 169.39012145996094, 38.484107971191406, 47.56964111328125, 69.65486145019531, -17.501007080078125, 41.16875457763672, 52.54936218261719, -37.52781677246094, 85.51129913330078, 142.41357421875, -28.784584045410156, 76.71546173095703, -0.8427524566650391], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000129.npy"} +{"epoch": 0.1894273127753304, "step": 130, "batch_size": 64, "mean": 65.76427459716797, "std": 83.48968505859375, "min": -105.32579040527344, "p10": -20.492866516113278, "median": 47.85641860961914, "p90": 191.8082946777344, "max": 321.20233154296875, "pos_frac": 0.78125, "sample": [71.42918395996094, 39.84413528442383, 29.98773956298828, 186.10763549804688, -16.71056365966797, 12.210922241210938, 200.15573120117188, 80.42820739746094, -50.229820251464844, -8.667654037475586, 15.002630233764648, 57.77762222290039, 39.45463180541992, 182.29232788085938, 201.19091796875, 21.842445373535156, 93.38880920410156, -8.124509811401367, 36.64381408691406, 71.54728698730469, -11.264179229736328, 31.578140258789062, 6.296379089355469, 85.03445434570312, 89.12271118164062, -3.1659011840820312, 33.44287109375, 88.43504333496094, 57.23114776611328, 178.8331298828125, 138.8023681640625, 210.09963989257812, -33.71576690673828, 27.662979125976562, 145.70790100097656, -105.32579040527344, 33.339935302734375, 29.0157470703125, 32.8410530090332, -22.113853454589844, 61.562217712402344, 52.165462493896484, 73.76405334472656, 241.7224578857422, 155.40304565429688, -8.044082641601562, 321.20233154296875, -38.66063690185547, -3.777677536010742, 87.2357406616211, 34.391761779785156, 43.5473747253418, 194.25143432617188, -30.325462341308594, 141.99407958984375, -105.09027862548828, 129.11473083496094, 2.2440414428710938, 119.37445068359375, 115.61705017089844, 207.946044921875, 55.73182678222656, 31.87314224243164, 58.24082946777344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000130.npy"} +{"epoch": 0.19089574155653452, "step": 131, "batch_size": 64, "mean": 65.16259002685547, "std": 71.26805877685547, "min": -145.072265625, "p10": -0.2870721817016598, "median": 48.97182655334473, "p90": 155.26929321289063, "max": 257.54010009765625, "pos_frac": 0.890625, "sample": [48.7824821472168, 44.471282958984375, 50.68714904785156, 29.479900360107422, 257.54010009765625, -145.072265625, 56.88762283325195, 37.473121643066406, 29.52788543701172, 102.26494598388672, 110.11441040039062, 21.091533660888672, 56.710723876953125, 12.992086410522461, 95.03819274902344, 35.105567932128906, 40.069950103759766, -1.900869369506836, 136.9400177001953, 70.97222900390625, 41.58116149902344, 153.82272338867188, -12.544143676757812, -12.198257446289062, 49.161170959472656, 42.540069580078125, 83.5887451171875, 9.03750228881836, 196.14215087890625, 38.73584747314453, 41.20155715942383, -81.56539916992188, 155.88925170898438, 9.060005187988281, 216.92031860351562, 3.8324317932128906, 95.8696517944336, 28.682174682617188, 136.5980987548828, 47.52933120727539, 123.88046264648438, 31.186384201049805, 199.66644287109375, 27.50281524658203, -88.79092407226562, 120.5947265625, 6.416290283203125, 105.9291763305664, 0.05748176574707031, 26.60123062133789, -0.4347381591796875, 134.3621063232422, 11.429359436035156, 173.3999786376953, 147.52737426757812, 159.7960205078125, 87.92289733886719, 64.71664428710938, 29.42410659790039, 78.91316223144531, 124.66144561767578, 93.46900177001953, 98.03944396972656, 81.07447814941406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000131.npy"} +{"epoch": 0.19236417033773862, "step": 132, "batch_size": 64, "mean": 53.85203552246094, "std": 85.3536148071289, "min": -153.46356201171875, "p10": -14.178560638427735, "median": 42.406246185302734, "p90": 142.90870208740236, "max": 366.81182861328125, "pos_frac": 0.8125, "sample": [15.871044158935547, 10.330951690673828, 145.91091918945312, -61.24366760253906, 66.46650695800781, 37.270042419433594, 63.19032669067383, 282.6272277832031, 305.09100341796875, 7.6708831787109375, 69.57095336914062, 107.75171661376953, 19.81086540222168, 74.48377990722656, -153.46356201171875, 5.7147979736328125, 215.362060546875, -34.37811279296875, 65.54935455322266, -13.704282760620117, 2.1501083374023438, -11.405021667480469, 125.6214370727539, 66.22483825683594, 20.37377166748047, -13.964302062988281, 0.7633419036865234, 111.09313201904297, 7.2829437255859375, 3.5635223388671875, -14.2703857421875, -10.78961181640625, 19.52910804748535, 91.28145599365234, 77.90242004394531, 145.035888671875, 47.542449951171875, 65.29843139648438, 75.03900146484375, 49.583797454833984, 52.5819091796875, 33.58098220825195, 30.631465911865234, 137.9452667236328, -101.17010498046875, 49.04188919067383, 114.3711166381836, -62.994544982910156, 13.314472198486328, 93.23252868652344, -18.01548194885254, 58.150657653808594, 104.07107543945312, 15.473648071289062, 56.760833740234375, -13.4661865234375, 33.142822265625, 102.39950561523438, 27.723648071289062, 12.068218231201172, 174.67120361328125, 26.727298736572266, 51.737342834472656, 366.81182861328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000132.npy"} +{"epoch": 0.19383259911894274, "step": 133, "batch_size": 64, "mean": 47.43252944946289, "std": 74.29071807861328, "min": -102.72245788574219, "p10": -20.491673278808587, "median": 35.27021408081055, "p90": 143.267660522461, "max": 244.8885040283203, "pos_frac": 0.765625, "sample": [10.248565673828125, 69.98277282714844, -9.127883911132812, 107.87176513671875, 8.157947540283203, -39.317283630371094, -6.43011474609375, 35.657470703125, -73.03349304199219, 244.8885040283203, 50.95262145996094, 71.12860870361328, -5.260345458984375, 101.78752899169922, -75.31695556640625, 18.40185546875, 87.99868774414062, -7.96197509765625, 210.18846130371094, 6.44658088684082, 63.44165802001953, 80.189453125, -13.227317810058594, 129.62396240234375, 1.7835235595703125, 102.69363403320312, 46.860984802246094, 75.38627624511719, 83.14200592041016, 66.14956665039062, 10.2000732421875, 19.550636291503906, 25.216230392456055, 238.57278442382812, -1.285614013671875, 50.480438232421875, 70.21704864501953, -64.61236572265625, 206.59384155273438, 33.185150146484375, 42.0560417175293, 34.882957458496094, 52.418609619140625, 237.14703369140625, -102.72245788574219, -13.735107421875, 64.89570617675781, 173.60079956054688, 17.7730712890625, 48.1247673034668, 7.1030120849609375, 115.42245483398438, -52.842193603515625, 149.11495971679688, 98.13582611083984, -23.387344360351562, 64.95852661132812, 45.975311279296875, -9.613906860351562, 20.67424201965332, 7.618446350097656, 0.2924308776855469, 9.788928985595703, 16.574485778808594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000133.npy"} +{"epoch": 0.19530102790014683, "step": 134, "batch_size": 64, "mean": 50.91827392578125, "std": 66.5137939453125, "min": -43.87969207763672, "p10": -30.389616966247555, "median": 41.28227424621582, "p90": 129.29384536743166, "max": 324.4138488769531, "pos_frac": 0.828125, "sample": [15.81475830078125, -43.87969207763672, -36.12805938720703, 126.43214416503906, 197.09841918945312, 33.87244415283203, 18.38309097290039, 40.13528823852539, -31.909263610839844, 47.634849548339844, 119.52705383300781, 23.27829360961914, 12.030380249023438, 7.168556213378906, 51.119422912597656, 39.53492736816406, -26.843774795532227, 46.127349853515625, 77.15719604492188, 23.219505310058594, 5.74980354309082, 10.226280212402344, 97.35533905029297, 19.259315490722656, 162.37120056152344, 3.078216552734375, 108.04824829101562, 94.77308654785156, 51.765140533447266, 17.218124389648438, 47.64257049560547, 130.2677764892578, 2.3635311126708984, 47.42559051513672, 160.72866821289062, 112.79476165771484, 127.0213394165039, -40.28400421142578, 81.79974365234375, -17.139907836914062, 55.853729248046875, -34.07884216308594, 64.66082763671875, 6.912992477416992, 324.4138488769531, 11.685138702392578, 78.99464416503906, 39.806182861328125, 71.07736206054688, -4.603996276855469, 63.43780517578125, -23.195899963378906, -35.360694885253906, 176.16787719726562, 59.311561584472656, 6.4620361328125, 59.328758239746094, 39.085636138916016, 42.42926025390625, 71.75414276123047, -39.686561584472656, 78.68746948242188, 31.391193389892578, 151.99722290039062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000134.npy"} +{"epoch": 0.19676945668135096, "step": 135, "batch_size": 64, "mean": 73.2041015625, "std": 92.93647003173828, "min": -86.2076416015625, "p10": -34.68207778930663, "median": 52.88039016723633, "p90": 212.27519378662112, "max": 352.334228515625, "pos_frac": 0.8125, "sample": [28.506668090820312, 129.30755615234375, 31.056629180908203, 243.22393798828125, 123.29718017578125, 73.8278579711914, 244.3993682861328, 41.002803802490234, 239.40145874023438, 215.13243103027344, 128.096435546875, 86.28236389160156, 187.9637451171875, 126.19825744628906, 271.4989013671875, 115.16860961914062, 32.04754638671875, 195.50152587890625, 12.67142105102539, -37.21863555908203, 1.409811019897461, 111.95722961425781, 157.42227172851562, 77.62954711914062, 39.27131652832031, 97.38397216796875, 2.6647186279296875, 352.334228515625, -22.683704376220703, 108.69610595703125, -58.53497314453125, 200.42013549804688, 205.60830688476562, -73.78630065917969, 40.00053024291992, -16.826595306396484, 27.315696716308594, 55.99645233154297, 124.73989868164062, -1.37982177734375, -57.93870544433594, 52.801605224609375, 82.57479858398438, 55.93927001953125, 1.3987159729003906, 52.95917510986328, 4.402858734130859, -13.982803344726562, 47.16035461425781, 16.77370834350586, 33.73876953125, 110.44157409667969, 91.41560363769531, 86.5749740600586, 6.63726806640625, 97.13453674316406, 44.48826599121094, 228.43850708007812, 13.204338073730469, -86.2076416015625, -28.763442993164062, -37.650909423828125, -41.26713562011719, 7.784111022949219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000135.npy"} +{"epoch": 0.19823788546255505, "step": 136, "batch_size": 64, "mean": 48.954044342041016, "std": 91.8631591796875, "min": -73.37477111816406, "p10": -45.74725914001465, "median": 28.49736976623535, "p90": 121.53847579956056, "max": 407.9271240234375, "pos_frac": 0.6875, "sample": [61.94028854370117, 71.2911605834961, 103.3582763671875, 93.95970153808594, -6.632789611816406, -48.86715316772461, 295.301025390625, 130.65235900878906, -18.669158935546875, 66.39047241210938, 101.14225769042969, 65.28919982910156, -21.897323608398438, 13.071399688720703, 75.56541442871094, 54.33355712890625, 80.41458129882812, 34.68449401855469, 113.4661865234375, 318.4679870605469, -2.1588134765625, 72.50215148925781, 407.9271240234375, 3.8148651123046875, 115.97169494628906, -3.9505233764648438, 221.34307861328125, -7.569911956787109, 258.4736328125, 23.93436050415039, 9.677635192871094, 3.041790008544922, 58.208580017089844, -70.63323974609375, 52.61967468261719, -64.39022827148438, 21.063949584960938, -24.653160095214844, 88.85484313964844, 88.42623901367188, 11.584161758422852, -15.841472625732422, -14.062759399414062, 62.66218566894531, 9.171916961669922, 1.3610210418701172, -24.827228546142578, 32.59392166137695, 92.1588134765625, 119.37146759033203, -46.357521057128906, -60.123985290527344, 35.79779052734375, 77.57820892333984, -73.37477111816406, 2.281951904296875, 69.11485290527344, -22.363445281982422, -44.32331466674805, 24.40081787109375, -4.118568420410156, 122.46719360351562, 3.2088088989257812, -61.06703186035156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000136.npy"} +{"epoch": 0.19970631424375918, "step": 137, "batch_size": 64, "mean": 76.9243392944336, "std": 112.35306549072266, "min": -155.94601440429688, "p10": -26.437416076660153, "median": 49.164344787597656, "p90": 249.27919616699245, "max": 384.16986083984375, "pos_frac": 0.84375, "sample": [-43.00120544433594, 11.007831573486328, 71.79203796386719, 119.37999725341797, 151.95242309570312, 32.40300750732422, 47.935150146484375, 89.71243286132812, -22.38741683959961, 102.29389953613281, 3.584930419921875, 95.68246459960938, 384.16986083984375, 18.493467330932617, 38.521453857421875, -111.41058349609375, 112.82551574707031, 12.295673370361328, 35.69537353515625, 150.2581024169922, 155.2628173828125, 80.21126556396484, 3.009632110595703, 383.29754638671875, 112.15388488769531, 324.77880859375, 188.3812255859375, 123.37745666503906, 19.4357852935791, 167.58071899414062, -91.39701843261719, 11.017913818359375, 67.40196990966797, 11.618415832519531, 133.69955444335938, 9.542205810546875, 124.75993347167969, 50.37443542480469, 7.665596008300781, 48.91688537597656, 63.88481903076172, 42.71709442138672, -3.0038986206054688, -155.94601440429688, -44.22278594970703, -72.55867767333984, 362.611083984375, -28.17313003540039, 39.485130310058594, 295.9627990722656, 54.17768096923828, 129.26577758789062, 3.520477294921875, 90.76055908203125, -16.560365676879883, 28.69383430480957, 49.41180419921875, 9.246082305908203, 0.6995639801025391, 102.19324493408203, 303.87432861328125, 35.35050964355469, 124.09598541259766, 275.3783264160156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000137.npy"} +{"epoch": 0.2011747430249633, "step": 138, "batch_size": 64, "mean": 58.114654541015625, "std": 71.87110137939453, "min": -114.43649291992188, "p10": -17.50610809326172, "median": 42.20337104797363, "p90": 151.28023071289067, "max": 275.4041748046875, "pos_frac": 0.78125, "sample": [156.1363525390625, -14.487762451171875, 71.55155944824219, 138.98941040039062, 9.112152099609375, 114.52545928955078, -17.668197631835938, -1.123931884765625, 7.3747406005859375, 172.4095916748047, 31.143062591552734, 18.524063110351562, 41.35221862792969, 11.362695693969727, 26.55531120300293, -114.43649291992188, 16.207372665405273, -20.714717864990234, -0.17459869384765625, -13.586463928222656, 185.5174560546875, 174.2708740234375, 15.541706085205078, 10.225196838378906, 116.08380126953125, 130.1786346435547, 130.53598022460938, -4.019325256347656, 91.07793426513672, 136.15318298339844, 56.88848876953125, 29.19875144958496, 124.20149230957031, 6.8475341796875, 136.55441284179688, 169.58177185058594, 74.8636474609375, 43.05452346801758, -68.73858642578125, 61.369529724121094, 64.06232452392578, 28.60125732421875, 86.16136169433594, 0.9622859954833984, -6.312034606933594, -55.92393493652344, 95.91546630859375, 139.94927978515625, 15.77145004272461, 40.44361114501953, 111.1055908203125, 33.79503631591797, 136.05064392089844, -33.36548614501953, -19.73171043395996, 58.57278823852539, -17.127899169921875, 70.16513061523438, 19.738616943359375, 275.4041748046875, 103.41582489013672, 56.52326965332031, 106.25089263916016, 156.47119140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000138.npy"} +{"epoch": 0.2026431718061674, "step": 139, "batch_size": 64, "mean": 69.69491577148438, "std": 86.99911499023438, "min": -109.46734619140625, "p10": -29.612622070312494, "median": 55.30386734008789, "p90": 167.07206878662114, "max": 369.9289245605469, "pos_frac": 0.796875, "sample": [-19.46373748779297, 155.17654418945312, 99.50556182861328, 173.67893981933594, -32.504173278808594, 64.87044525146484, 121.59984588623047, -12.747291564941406, 82.69035339355469, 143.27838134765625, -109.46734619140625, 57.327178955078125, -49.54243087768555, 272.598876953125, 139.83544921875, 46.622154235839844, 86.25333404541016, 53.280555725097656, 34.534141540527344, 62.204708099365234, 143.291015625, 73.20791625976562, -70.05695343017578, 125.63509368896484, 50.80836486816406, 1.3875808715820312, 154.2473907470703, 82.74406433105469, 15.784097671508789, 38.647499084472656, 151.72235107421875, 23.180299758911133, 125.94572448730469, 116.01268768310547, 37.38750457763672, 90.66062927246094, -22.22357940673828, -1.0368938446044922, 9.249870300292969, 15.843727111816406, 80.16492462158203, -0.371978759765625, 16.88744354248047, 150.18429565429688, 22.98516845703125, 263.0988464355469, 90.51812744140625, -22.86566925048828, 76.69994354248047, -46.05303192138672, 82.9174575805664, -46.9595947265625, 34.14586639404297, 84.23963928222656, 369.9289245605469, 39.06660842895508, 252.3577117919922, 39.261688232421875, 45.39124298095703, -38.922882080078125, 191.31427001953125, 29.1539306640625, 42.99192810058594, 172.17015075683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000139.npy"} +{"epoch": 0.20411160058737152, "step": 140, "batch_size": 64, "mean": 63.4018440246582, "std": 89.44715118408203, "min": -128.67735290527344, "p10": -51.1243595123291, "median": 64.66999435424805, "p90": 171.37539672851565, "max": 346.57183837890625, "pos_frac": 0.734375, "sample": [-26.75271987915039, 122.87307739257812, 346.57183837890625, -74.75318145751953, -13.551933288574219, 123.36424255371094, 85.55101776123047, 146.6265411376953, 56.10523223876953, 162.3393096923828, 118.36004638671875, 72.0396957397461, 113.58232116699219, 40.964759826660156, 23.648452758789062, 84.26991271972656, 272.4944152832031, 11.42165756225586, 6.435932159423828, 75.08983612060547, 19.918533325195312, 65.15484619140625, 87.61383056640625, 137.81192016601562, -2.0508193969726562, -55.994964599609375, 254.29701232910156, -53.61273193359375, 36.464202880859375, 79.02386474609375, 21.007814407348633, 64.18514251708984, 8.406171798706055, 115.04237365722656, -32.78474426269531, 61.049896240234375, 93.00254821777344, 67.15621185302734, -8.335662841796875, 164.89727783203125, 130.49087524414062, -33.219512939453125, 175.3437042236328, -128.67735290527344, -1.5329132080078125, 62.86962127685547, 62.24098205566406, 91.6654052734375, 187.8402099609375, 23.65520477294922, -7.069269180297852, 154.3893280029297, -12.36515998840332, 19.898056030273438, -88.24607849121094, -52.4589729309082, -72.5126724243164, 65.73582458496094, 179.94729614257812, 174.1517333984375, -48.01026153564453, 84.46949768066406, 82.95242309570312, 137.2269287109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000140.npy"} +{"epoch": 0.2055800293685756, "step": 141, "batch_size": 64, "mean": 59.664695739746094, "std": 95.8642807006836, "min": -245.87655639648438, "p10": -48.8665557861328, "median": 59.9300594329834, "p90": 183.569808959961, "max": 293.49139404296875, "pos_frac": 0.75, "sample": [61.801605224609375, 38.592750549316406, -16.72921371459961, 101.13851165771484, 79.79246520996094, -33.61730194091797, 108.12677001953125, 59.42173767089844, 1.6284713745117188, -7.613285064697266, 60.43838119506836, 263.74835205078125, -245.87655639648438, 48.451210021972656, -4.794624328613281, 34.30303955078125, -0.5732936859130859, -56.0010986328125, 264.6234130859375, -63.81306457519531, -38.39728546142578, 198.8325958251953, 48.16616439819336, 84.166748046875, 147.30093383789062, 80.78778076171875, 107.57003784179688, 3.0399856567382812, -62.854461669921875, 73.32330322265625, 111.42962646484375, 78.69795227050781, 34.69287872314453, 170.91061401367188, 247.69906616210938, 25.488725662231445, 117.35466766357422, 25.07213592529297, -19.841432571411133, 16.71316909790039, -25.26258087158203, 50.561988830566406, -55.08588409423828, 188.99517822265625, 5.123270034790039, 136.19012451171875, 190.53857421875, 119.37167358398438, 11.684009552001953, -165.80072021484375, 18.872512817382812, 97.6536865234375, 63.17939758300781, 89.36070251464844, 113.70736694335938, 129.09127807617188, 293.49139404296875, 129.19735717773438, -0.6419467926025391, 160.02037048339844, 69.39237976074219, 104.93350219726562, -53.35338592529297, 4.1188201904296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000141.npy"} +{"epoch": 0.20704845814977973, "step": 142, "batch_size": 64, "mean": 68.77471923828125, "std": 108.09823608398438, "min": -212.98211669921875, "p10": -47.50071716308592, "median": 35.54212188720703, "p90": 235.541943359375, "max": 297.48779296875, "pos_frac": 0.765625, "sample": [-22.065391540527344, 264.2941589355469, 33.245033264160156, 41.91747283935547, -30.657371520996094, 246.0963897705078, -32.91093063354492, -21.951047897338867, -92.27911376953125, 32.01002883911133, 139.39566040039062, 97.08877563476562, 159.83026123046875, 30.085390090942383, 34.859947204589844, 230.7392120361328, 16.332263946533203, -212.98211669921875, -156.27053833007812, 73.32886505126953, 18.51146697998047, 34.14289855957031, 18.22470474243164, 297.48779296875, 165.0576934814453, 36.22429656982422, 148.4558868408203, 26.005712509155273, 146.60467529296875, -21.7854061126709, 105.52362060546875, 119.07063293457031, 234.7266082763672, 82.36729431152344, -0.8978443145751953, 206.69004821777344, 216.55979919433594, 18.205032348632812, 20.543615341186523, 42.534393310546875, -67.05801391601562, -53.753482818603516, 3.796588897705078, 235.89137268066406, 67.33252716064453, 244.47215270996094, 14.109661102294922, 23.544151306152344, 72.5848388671875, -57.64079284667969, 57.14628982543945, -17.807662963867188, 7.106096267700195, -82.77850341796875, 191.5092010498047, 242.6427001953125, 24.30156707763672, 189.60000610351562, 25.662307739257812, -11.355031967163086, 125.94478607177734, 250.46560668945312, 45.36112976074219, 126.1447525024414], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000142.npy"} +{"epoch": 0.20851688693098386, "step": 143, "batch_size": 64, "mean": 43.79881286621094, "std": 94.29562377929688, "min": -156.56546020507812, "p10": -55.83059616088867, "median": 28.435501098632812, "p90": 144.1503875732422, "max": 332.3783874511719, "pos_frac": 0.65625, "sample": [84.56964111328125, -7.206268310546875, 53.86438751220703, 25.645912170410156, 284.75872802734375, 90.86522674560547, -56.628013610839844, 96.80835723876953, 106.17286682128906, 23.81292152404785, 144.55218505859375, -87.57260131835938, -53.96995544433594, 137.487548828125, 85.04375457763672, -123.38062286376953, -47.8173828125, -2.799745559692383, 103.85987854003906, -6.0804901123046875, 33.761268615722656, 259.19305419921875, -68.58541107177734, 98.25881958007812, 16.777305603027344, -16.626800537109375, 46.92082214355469, 143.21286010742188, 29.248634338378906, 14.608501434326172, -12.692398071289062, -49.101531982421875, -60.39521789550781, 3.969247817993164, 134.66993713378906, 5.686920166015625, 44.347084045410156, 67.04415893554688, 25.6136474609375, 332.3783874511719, -35.686912536621094, -12.813758850097656, 34.35393524169922, 83.02519226074219, -2.6446285247802734, 54.567718505859375, 155.43637084960938, 136.28463745117188, -5.493915557861328, -52.20849609375, -88.80523681640625, 287.10888671875, -34.385841369628906, 79.9761734008789, 59.23143005371094, 29.974138259887695, 110.04443359375, 27.62236785888672, 180.3021240234375, -156.56546020507812, -9.181846618652344, 42.474700927734375, 4.188636779785156, 16.043670654296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000143.npy"} +{"epoch": 0.20998531571218795, "step": 144, "batch_size": 64, "mean": 53.788116455078125, "std": 78.23200225830078, "min": -106.958251953125, "p10": -33.41476974487305, "median": 43.817298889160156, "p90": 169.40383605957032, "max": 242.93838500976562, "pos_frac": 0.734375, "sample": [-33.533905029296875, 110.55303192138672, -4.753889083862305, 171.93350219726562, -101.8635025024414, -19.873153686523438, 67.63292694091797, 118.31734466552734, 43.276268005371094, 14.397686004638672, 63.43629455566406, -41.58500289916992, 74.16607666015625, 104.13070678710938, 57.695556640625, 163.50128173828125, 50.588539123535156, 200.17318725585938, 104.48700714111328, 141.44659423828125, 173.05233764648438, 191.44241333007812, -23.90410041809082, -106.958251953125, 102.8016586303711, -25.244781494140625, 49.18018341064453, -3.5575790405273438, 68.89425659179688, 23.137388229370117, -54.265403747558594, 75.09275817871094, 65.62272644042969, 15.5711669921875, -48.27423858642578, 139.04531860351562, 46.76128387451172, 27.281002044677734, 24.688894271850586, 89.37387084960938, 30.690460205078125, 93.85089874267578, 242.93838500976562, 27.087270736694336, 27.113327026367188, 75.70999145507812, -33.13678741455078, 23.15106964111328, 21.283462524414062, 101.01654052734375, 120.49102020263672, 229.2256317138672, -1.6124267578125, -26.839813232421875, -17.092239379882812, -22.85608673095703, 219.27659606933594, 15.587406158447266, 22.963415145874023, 0.7411575317382812, 44.35832977294922, 29.343338012695312, 149.73477172851562, -44.453773498535156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000144.npy"} +{"epoch": 0.21145374449339208, "step": 145, "batch_size": 64, "mean": 57.771766662597656, "std": 80.50733184814453, "min": -119.71025085449219, "p10": -41.77843666076659, "median": 44.88104248046875, "p90": 148.09603881835938, "max": 263.5973205566406, "pos_frac": 0.75, "sample": [24.426055908203125, -46.12120056152344, -97.0699462890625, 263.5973205566406, 147.971923828125, 113.62303924560547, 243.03488159179688, 2.8041648864746094, -119.71025085449219, 57.97629928588867, 4.9766845703125, 78.83944702148438, 143.1132049560547, -54.53861618041992, 11.500232696533203, 120.44918060302734, 55.32337188720703, 40.98395538330078, 119.88749694824219, -48.270790100097656, -8.361852645874023, 36.876190185546875, 43.3001594543457, 134.52197265625, 91.34233093261719, -65.9007568359375, -9.93377685546875, 29.381702423095703, -11.111465454101562, 180.87567138671875, 22.898801803588867, 12.76251220703125, 112.04373168945312, -31.645320892333984, -56.70745849609375, 119.24983215332031, 16.326904296875, 22.39307975769043, 78.33853912353516, -9.809051513671875, 72.50367736816406, -6.151111602783203, 116.21361541748047, -15.27829360961914, 114.58151245117188, 191.28392028808594, 103.71049499511719, -21.831680297851562, 51.70820236206055, 124.99840545654297, 30.779468536376953, 40.93655014038086, 141.48191833496094, 155.77609252929688, 59.798152923583984, 18.244686126708984, -5.821891784667969, 65.54853820800781, 133.620361328125, 118.15935516357422, 231.02938842773438, 46.4619255065918, 11.852462768554688, 148.14923095703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000145.npy"} +{"epoch": 0.21292217327459617, "step": 146, "batch_size": 64, "mean": 44.597129821777344, "std": 88.64009857177734, "min": -239.86172485351562, "p10": -71.05649948120117, "median": 58.54458045959473, "p90": 156.06778717041018, "max": 243.79176330566406, "pos_frac": 0.671875, "sample": [33.64196014404297, -33.665496826171875, 107.54576110839844, -64.17914581298828, 18.961732864379883, 2.8320388793945312, 73.38711547851562, 63.02696228027344, 76.24275207519531, 64.20836639404297, 214.23138427734375, 95.31578826904297, 43.38665771484375, -8.426483154296875, 24.648643493652344, -6.951271057128906, 93.37494659423828, -9.468948364257812, 39.584922790527344, -18.214494705200195, -5.882175445556641, 30.77911376953125, 60.42420196533203, 116.61811828613281, -101.93742370605469, 70.36962127685547, 150.28314208984375, -239.86172485351562, -74.00393676757812, 208.1759033203125, -79.61194610595703, 73.97654724121094, -3.9143333435058594, 73.001708984375, 186.94895935058594, -79.8497543334961, 49.09098815917969, 60.341041564941406, 118.8978500366211, 243.79176330566406, 5.379417419433594, -5.9987640380859375, 58.244876861572266, 79.63087463378906, 58.84428405761719, 103.6448745727539, 103.00223541259766, -100.92442321777344, 59.11845397949219, -75.97003173828125, 81.41143798828125, 139.13442993164062, 158.5469207763672, 74.31758117675781, -8.988555908203125, 92.13054656982422, 35.73612976074219, -35.80867004394531, 85.14054870605469, -53.541656494140625, 228.72264099121094, -32.734527587890625, 188.9276123046875, -50.870758056640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000146.npy"} +{"epoch": 0.2143906020558003, "step": 147, "batch_size": 64, "mean": 60.34022521972656, "std": 90.45318603515625, "min": -148.34124755859375, "p10": -31.65120162963867, "median": 46.38974380493164, "p90": 193.26815185546877, "max": 299.40435791015625, "pos_frac": 0.75, "sample": [108.49249267578125, -62.97639846801758, 42.04887390136719, 230.51303100585938, -28.752769470214844, -8.794197082519531, 97.4781494140625, 97.06840515136719, 50.78166198730469, -43.708656311035156, 28.69838523864746, 74.56834411621094, 58.92033386230469, 123.45657348632812, 23.865352630615234, 195.12689208984375, 112.65422058105469, 19.691680908203125, -7.455963134765625, 188.93109130859375, 161.30203247070312, -23.677467346191406, 174.6533660888672, 21.795740127563477, 270.9537353515625, 46.53339385986328, 196.95559692382812, 105.45709228515625, -11.799982070922852, 46.24609375, 29.45229721069336, -130.47283935546875, 3.9680938720703125, 114.5360107421875, 100.6793212890625, -27.494430541992188, 61.34150695800781, 299.40435791015625, 76.29334259033203, 20.399860382080078, 176.00357055664062, 14.335784912109375, -15.444211959838867, 14.190544128417969, 33.60981750488281, 59.327980041503906, -12.235366821289062, 79.85391235351562, 64.92311096191406, -32.89338684082031, 21.3265380859375, 49.38984298706055, 19.40017318725586, 173.00071716308594, -25.878921508789062, 57.92104721069336, 242.3570556640625, -33.21513366699219, 30.765594482421875, -148.34124755859375, 212.70001220703125, -59.1842041015625, 58.5601806640625, 44.16633224487305], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000147.npy"} +{"epoch": 0.21585903083700442, "step": 148, "batch_size": 64, "mean": 51.533233642578125, "std": 82.40593719482422, "min": -102.71951293945312, "p10": -29.22577972412109, "median": 34.79726791381836, "p90": 143.45980224609377, "max": 336.9862060546875, "pos_frac": 0.796875, "sample": [-6.596851348876953, 50.438629150390625, 5.10784912109375, 83.48876953125, -56.879478454589844, 97.61302947998047, 80.36970520019531, 39.283382415771484, -42.05071258544922, 187.87095642089844, 18.39844512939453, 140.86996459960938, 22.312623977661133, -16.32274627685547, 152.90260314941406, 251.6890869140625, 2.6171951293945312, 15.2822265625, -39.733787536621094, -72.98110961914062, 133.6666717529297, 129.78358459472656, -10.419416427612305, 91.97549438476562, 94.62904357910156, 40.9617919921875, 63.72491455078125, 50.406917572021484, 106.41477966308594, 1.2786674499511719, 144.56973266601562, 31.3990478515625, 36.87962341308594, 67.12336730957031, -30.927635192871094, 45.89542770385742, 30.50110626220703, 17.087539672851562, 111.49150848388672, 54.56257247924805, 133.68338012695312, 33.901954650878906, -81.78868103027344, 48.15281677246094, 35.69258117675781, 22.979759216308594, 182.150634765625, 47.228050231933594, -4.7233734130859375, -25.254783630371094, -102.71951293945312, 41.167205810546875, 9.775558471679688, 6.356992721557617, 33.09064483642578, 6.851018905639648, 33.265403747558594, 336.9862060546875, 318.42083740234375, 7.689840316772461, -2.9343204498291016, 1.4414443969726562, 86.43403625488281, 5.594758987426758], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000148.npy"} +{"epoch": 0.2173274596182085, "step": 149, "batch_size": 64, "mean": 64.60050964355469, "std": 81.1431884765625, "min": -182.0203399658203, "p10": -8.874083328247067, "median": 56.97078895568848, "p90": 150.89680023193358, "max": 306.36065673828125, "pos_frac": 0.828125, "sample": [110.35436248779297, 65.56507873535156, 203.42080688476562, 60.6282844543457, 105.12516021728516, 68.34854125976562, 73.25789642333984, -13.538259506225586, -18.998634338378906, -9.79708480834961, 21.957664489746094, -6.7204132080078125, 58.306331634521484, 44.24468231201172, 60.5096435546875, -63.977752685546875, 89.76043701171875, 93.9150390625, 46.52105712890625, 133.40066528320312, 109.125244140625, 172.18917846679688, 101.77281951904297, 28.47931480407715, 14.669469833374023, 97.73321533203125, 50.32769012451172, 124.83208465576172, 72.05204772949219, 129.1788330078125, 63.77747344970703, -1.1791820526123047, 306.36065673828125, 36.957862854003906, 150.746826171875, 26.799545288085938, 225.8013916015625, 66.31311798095703, 29.372299194335938, 106.45000457763672, 24.03921127319336, 75.3624267578125, 11.476289749145508, 44.799720764160156, -0.7868270874023438, 54.18896484375, 14.111148834228516, 55.63524627685547, -13.803436279296875, -182.0203399658203, 97.85342407226562, 15.311897277832031, 1.568695068359375, 150.96107482910156, 11.196004867553711, 274.632568359375, 17.142017364501953, 42.12152099609375, 140.80364990234375, 263.2383728027344, 8.306068420410156, -6.560604095458984, 85.66371154785156, -84.85136413574219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000149.npy"} +{"epoch": 0.21879588839941264, "step": 150, "batch_size": 64, "mean": 63.096435546875, "std": 93.77081298828125, "min": -82.7943115234375, "p10": -23.533736419677727, "median": 36.44698143005371, "p90": 168.97206878662112, "max": 409.53924560546875, "pos_frac": 0.828125, "sample": [287.36883544921875, 324.929931640625, 17.71851348876953, 31.326568603515625, 60.609527587890625, -34.86304473876953, 178.753173828125, 35.33976745605469, -82.2635726928711, 68.94361114501953, 409.53924560546875, -5.842657089233398, -15.165283203125, 17.989105224609375, -8.28656005859375, 132.1071014404297, 19.965835571289062, 51.131019592285156, 47.40095520019531, 74.33753204345703, 37.554195404052734, 55.449241638183594, 150.3697509765625, 26.964202880859375, 107.29396057128906, 89.01813507080078, 190.1161346435547, 130.07244873046875, 17.03722381591797, 69.04753112792969, 13.358451843261719, -46.60319519042969, 84.29108428955078, 171.60679626464844, 69.37664794921875, 8.083084106445312, 162.82437133789062, 21.676862716674805, 141.95428466796875, -27.120216369628906, 93.73463439941406, -37.88715362548828, -39.860862731933594, 6.0289306640625, 116.7437515258789, 4.290470123291016, 27.545005798339844, 63.7164306640625, 17.335914611816406, 39.28260803222656, 54.64599609375, 67.47276306152344, 63.786277770996094, 18.872634887695312, 34.81627655029297, 349.44647216796875, 14.374662399291992, 16.60365867614746, 10.378206253051758, 32.544097900390625, 93.69597625732422, -10.062370300292969, -82.7943115234375, 0.051052093505859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000150.npy"} +{"epoch": 0.22026431718061673, "step": 151, "batch_size": 64, "mean": 62.93691635131836, "std": 78.74150848388672, "min": -146.86474609375, "p10": -27.229729080200194, "median": 54.71632385253906, "p90": 176.61609344482423, "max": 243.765625, "pos_frac": 0.828125, "sample": [-146.86474609375, 218.2666778564453, 45.06510925292969, -1.8977890014648438, 41.49696350097656, 54.91795349121094, 69.00609588623047, 111.95790100097656, 74.49870300292969, 26.6239013671875, 177.35354614257812, -42.476646423339844, -27.934837341308594, 101.13865661621094, 54.51469421386719, -43.44021987915039, -7.853580474853516, 41.292564392089844, 56.898712158203125, 46.502174377441406, 233.08502197265625, 57.45957565307617, 143.52493286132812, 58.08013153076172, -54.12494659423828, 172.69174194335938, 24.754928588867188, 5.547943115234375, -25.584476470947266, 21.0740966796875, 38.11225128173828, 11.581151962280273, 19.72146224975586, 8.59461784362793, 103.26161193847656, 174.89537048339844, -64.61128234863281, 64.14280700683594, 101.21269989013672, 100.7218017578125, 7.960878372192383, 58.819610595703125, 96.71614074707031, 78.73910522460938, 11.363189697265625, 190.94888305664062, 104.06336975097656, 53.28559112548828, 118.73731994628906, -1.8123397827148438, 208.12791442871094, 9.982879638671875, 2.9581222534179688, 138.14773559570312, 15.336135864257812, 14.224206924438477, 200.35272216796875, 105.25153350830078, 243.765625, 172.46600341796875, -46.89720153808594, 75.48359680175781, 20.261398315429688, 106.47311401367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000151.npy"} +{"epoch": 0.22173274596182085, "step": 152, "batch_size": 64, "mean": 76.53138732910156, "std": 92.72167205810547, "min": -87.21243286132812, "p10": -39.08784866333007, "median": 59.96224403381348, "p90": 206.79201507568362, "max": 307.6248779296875, "pos_frac": 0.796875, "sample": [30.18341064453125, 55.902992248535156, 158.05862426757812, 172.66346740722656, 73.48338317871094, 199.34420776367188, 59.7000617980957, 137.49359130859375, 307.6248779296875, 33.96068572998047, 152.2926025390625, 125.4827880859375, 34.92204666137695, -63.37702178955078, -2.9425735473632812, 229.2847900390625, 139.07455444335938, 48.95148468017578, 66.97053527832031, 62.55708312988281, 56.84657287597656, 51.96227264404297, -44.31182098388672, 72.51762390136719, 2.3003273010253906, 306.97967529296875, 125.5694351196289, 216.3571014404297, 40.68992614746094, 22.32994842529297, 62.51264953613281, -53.6934814453125, 21.91103744506836, -3.85498046875, 2.7438182830810547, 60.22442626953125, 20.35327911376953, 194.38229370117188, 35.294677734375, 46.33998107910156, 279.8299865722656, -22.813575744628906, 55.04437255859375, 198.85104370117188, 66.86293029785156, -30.415908813476562, 238.08790588378906, 34.483604431152344, -73.41730499267578, 104.26953125, 6.787393569946289, 138.49766540527344, 89.3670654296875, 66.11876678466797, 209.9839324951172, -42.73760986328125, 159.43862915039062, -87.21243286132812, -6.2095947265625, 113.91355895996094, -42.82586669921875, -30.571739196777344, 131.962158203125, 81.62781524658203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000152.npy"} +{"epoch": 0.22320117474302498, "step": 153, "batch_size": 64, "mean": 65.17752075195312, "std": 99.81834411621094, "min": -174.7130126953125, "p10": -35.07184906005859, "median": 54.3188591003418, "p90": 194.54412536621098, "max": 412.63946533203125, "pos_frac": 0.75, "sample": [82.91603088378906, -2.328338623046875, 9.03448486328125, -5.688636779785156, 20.217693328857422, -7.3118896484375, 28.58612060546875, 153.863525390625, -40.99230194091797, 184.73793029785156, 93.589599609375, 8.321296691894531, -38.57137680053711, 81.72300720214844, 236.94874572753906, -67.86449432373047, 75.57992553710938, 16.5783634185791, 55.06867218017578, 102.23857879638672, 53.56904602050781, 412.63946533203125, 38.29285430908203, 63.843353271484375, 78.010498046875, 180.62600708007812, -24.34490203857422, -41.75446319580078, 198.7467803955078, 202.1715087890625, 137.4299774169922, 36.96955108642578, 235.90634155273438, 6.554256439208984, 1.701425552368164, 1.6465682983398438, 162.35855102539062, 94.09033966064453, -33.54392623901367, -35.7266731262207, 122.63902282714844, 13.351821899414062, 8.977645874023438, 264.1441345214844, 23.106918334960938, -15.703842163085938, -58.191307067871094, 63.54307556152344, 8.304954528808594, -174.7130126953125, 87.19566345214844, 350.48095703125, 75.39019775390625, -30.772228240966797, 62.34038543701172, -3.9802989959716797, 93.07838439941406, 42.47138214111328, 129.87432861328125, 133.32278442382812, 73.30746459960938, -20.716758728027344, 61.356163024902344, 106.71983337402344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000153.npy"} +{"epoch": 0.22466960352422907, "step": 154, "batch_size": 64, "mean": 62.42070007324219, "std": 103.75210571289062, "min": -200.0953369140625, "p10": -50.61149024963379, "median": 53.1880989074707, "p90": 188.33187408447264, "max": 294.7155456542969, "pos_frac": 0.65625, "sample": [-10.657060623168945, 52.31953430175781, -200.0953369140625, 23.689172744750977, 294.7155456542969, 54.056663513183594, -11.744524002075195, 144.9627685546875, 171.81228637695312, -50.66941452026367, 87.66788482666016, 227.2865753173828, -85.3992919921875, 123.09710693359375, 120.29815673828125, 59.56705093383789, 2.245086669921875, 19.884628295898438, 34.24199295043945, -4.3458404541015625, 106.63851928710938, 60.66659164428711, -3.2045249938964844, 59.317020416259766, -27.68817138671875, 187.98995971679688, -49.701358795166016, 202.20516967773438, 148.39370727539062, -86.75227355957031, -33.029396057128906, 177.11373901367188, -74.16912078857422, 168.52798461914062, -50.47633361816406, 195.30252075195312, 292.8734436035156, 170.25643920898438, -2.6581573486328125, 175.70860290527344, 188.47840881347656, 117.17926025390625, 15.877269744873047, 79.81922912597656, 183.22251892089844, -11.727058410644531, -17.204727172851562, -5.300676345825195, -92.22452545166016, 113.76979064941406, 157.5595245361328, -20.563156127929688, 3.015249252319336, 29.10430908203125, 7.422576904296875, -79.82206726074219, 108.34086608886719, 135.84164428710938, 118.28990936279297, -48.96356201171875, -9.144973754882812, 2.8463497161865234, 77.68318176269531, 271.17822265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000154.npy"} +{"epoch": 0.2261380323054332, "step": 155, "batch_size": 64, "mean": 63.25199508666992, "std": 93.8571548461914, "min": -211.89219665527344, "p10": -22.805210494995112, "median": 61.51967239379883, "p90": 168.02356719970712, "max": 346.95538330078125, "pos_frac": 0.75, "sample": [-6.3214569091796875, 84.43929290771484, 54.201026916503906, -4.25579833984375, 90.06901550292969, 12.088546752929688, 82.64229583740234, -28.249954223632812, 74.8118667602539, 22.573997497558594, -5.9971771240234375, 61.90570831298828, 100.73892974853516, -79.13174438476562, 104.9903335571289, 74.28694915771484, -24.94302749633789, 46.355552673339844, 61.133636474609375, 0.240692138671875, 72.57582092285156, 66.0464096069336, 42.03056335449219, 346.95538330078125, 190.15206909179688, 8.326845169067383, 129.82843017578125, 105.33240509033203, 10.107635498046875, -117.50471496582031, 21.90665626525879, -2.1355438232421875, 22.386627197265625, 91.68878173828125, 188.93560791015625, -211.89219665527344, 276.34393310546875, 44.24993133544922, 131.32052612304688, -17.816970825195312, 3.1816368103027344, 92.226806640625, 146.07371520996094, 43.118377685546875, 286.11968994140625, 95.70310974121094, 278.9224853515625, 147.58470153808594, -73.09494018554688, 96.48959350585938, -5.3052215576171875, 176.7830810546875, -11.835132598876953, 112.09850311279297, 86.9486083984375, -48.61376190185547, 58.05016326904297, 129.87063598632812, 122.15160369873047, 10.541248321533203, 131.47642517089844, -3.5379199981689453, -17.32541847229004, 70.08287811279297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000155.npy"} +{"epoch": 0.2276064610866373, "step": 156, "batch_size": 64, "mean": 36.69999313354492, "std": 106.28523254394531, "min": -275.2389221191406, "p10": -92.2230743408203, "median": 24.820392608642578, "p90": 135.37381286621095, "max": 286.06109619140625, "pos_frac": 0.734375, "sample": [130.53265380859375, 57.17736053466797, 286.06109619140625, 251.43702697753906, 171.02149963378906, 110.24981689453125, -199.18441772460938, 3.1621551513671875, -82.54763793945312, 18.619956970214844, -201.64271545410156, 131.492431640625, 95.35137176513672, -23.01134490966797, -62.574771881103516, -131.80276489257812, 42.37952423095703, 100.75044250488281, 249.99461364746094, 73.88664245605469, 19.466598510742188, 118.77716064453125, 49.115386962890625, 21.50680160522461, 3.5506839752197266, -7.062904357910156, 15.44343376159668, 30.09180450439453, 251.57275390625, -12.669326782226562, 241.36688232421875, 137.03726196289062, 118.57501220703125, 66.86109924316406, 6.898612976074219, 5.622720718383789, 70.70780181884766, -122.18963623046875, -275.2389221191406, -75.8515853881836, 78.68851470947266, -51.251869201660156, 4.1170501708984375, 126.24626159667969, 13.422866821289062, -96.36968994140625, 82.79590606689453, 119.33895111083984, 76.32026672363281, 8.838602066040039, -3.087156295776367, 76.72518157958984, 59.19274139404297, 54.355377197265625, -107.40575408935547, -44.24641418457031, 10.443283081054688, 23.867965698242188, 7.934295654296875, -1.3104171752929688, 25.77281951904297, 74.12361907958984, 8.557506561279297, 116.79510498046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000156.npy"} +{"epoch": 0.2290748898678414, "step": 157, "batch_size": 64, "mean": 79.97874450683594, "std": 103.37088775634766, "min": -296.0206298828125, "p10": -27.427484893798827, "median": 71.3656120300293, "p90": 190.09415283203128, "max": 375.78466796875, "pos_frac": 0.828125, "sample": [159.42916870117188, 167.57009887695312, 85.4817123413086, 184.7319793701172, 132.34979248046875, 8.196578979492188, 75.51033782958984, 145.1187744140625, 4.657432556152344, 146.94699096679688, 56.57301712036133, 205.619140625, 50.000030517578125, 83.48365783691406, 217.00543212890625, 192.39222717285156, 147.98797607421875, 103.38780212402344, 48.86598205566406, 159.71792602539062, 15.911354064941406, 34.35302734375, 67.22088623046875, -27.94046401977539, 112.24864196777344, 243.9077606201172, -3.503997802734375, 32.39069747924805, -3.55877685546875, 108.96824645996094, -296.0206298828125, -39.821327209472656, 26.416427612304688, 48.5654296875, 128.9255828857422, 79.97464752197266, 247.42428588867188, 127.295166015625, 107.02464294433594, -31.91370964050293, 26.316905975341797, 276.6546325683594, 33.57627868652344, 58.09965515136719, 8.110210418701172, 178.42462158203125, 106.54383850097656, -26.230533599853516, 172.656005859375, 120.60749816894531, 184.6007080078125, 39.983795166015625, -56.3244743347168, 21.455322265625, 183.03372192382812, 150.54591369628906, 375.78466796875, 54.700828552246094, 43.95994567871094, -140.85858154296875, 18.765472412109375, -59.62367248535156, 12.715385437011719, -17.752838134765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000157.npy"} +{"epoch": 0.2305433186490455, "step": 158, "batch_size": 64, "mean": 85.15373229980469, "std": 129.79788208007812, "min": -168.0992431640625, "p10": -43.85169105529784, "median": 51.01091766357422, "p90": 272.2266433715821, "max": 477.7098693847656, "pos_frac": 0.71875, "sample": [8.638843536376953, 11.526878356933594, 42.25885009765625, 51.57013702392578, 47.95274353027344, -30.194473266601562, 33.66624069213867, 85.08145904541016, 36.97438049316406, -52.06387710571289, 213.953125, -112.8878173828125, 248.4960174560547, 82.21287536621094, -12.377487182617188, -53.250335693359375, -15.474084854125977, 108.90559387207031, -12.726903915405273, 13.848320007324219, 227.1854248046875, -35.98200225830078, 282.39691162109375, -168.0992431640625, 299.63372802734375, 72.99561309814453, 134.92495727539062, 20.605932235717773, 36.15605163574219, 11.522891998291016, 79.88143920898438, 83.27831268310547, -19.291053771972656, 151.24386596679688, -51.9932746887207, -4.8471527099609375, 336.81927490234375, 103.5009994506836, -117.95230865478516, 324.111572265625, 407.58843994140625, -2.3161773681640625, 62.477684020996094, 477.7098693847656, 23.746917724609375, 10.182594299316406, -47.22441482543945, 121.13186645507812, 229.66000366210938, 108.08487701416016, 372.79791259765625, 166.7162628173828, 100.06196594238281, 167.6868133544922, -16.462682723999023, 173.79867553710938, 88.71034240722656, -8.745895385742188, 26.184425354003906, 95.06383514404297, 246.39944458007812, 50.451698303222656, 155.0250244140625, -21.093233108520508], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000158.npy"} +{"epoch": 0.23201174743024963, "step": 159, "batch_size": 64, "mean": 58.1856689453125, "std": 103.80752563476562, "min": -202.02503967285156, "p10": -57.62577590942382, "median": 58.278175354003906, "p90": 190.72889709472656, "max": 314.29376220703125, "pos_frac": 0.75, "sample": [8.844818115234375, 41.65857696533203, 131.8000030517578, -46.63865661621094, 242.86962890625, 127.48934936523438, -202.02503967285156, -37.32670974731445, 125.81378173828125, 156.44482421875, 91.17008972167969, -59.113304138183594, 15.479082107543945, -21.40692138671875, 85.21072387695312, -72.4663314819336, -117.05593872070312, 51.50143051147461, 187.9818115234375, 138.13116455078125, 314.29376220703125, 7.004947662353516, -45.7569580078125, 18.777320861816406, 29.832077026367188, -71.60279846191406, 19.751876831054688, -54.154876708984375, 83.92794799804688, 53.642791748046875, -19.439701080322266, 191.90621948242188, 85.43880462646484, 12.40945816040039, 113.10001373291016, -22.04730796813965, 57.98396301269531, -97.36622619628906, -2.5820388793945312, 224.622802734375, 142.191650390625, 220.13482666015625, 61.204734802246094, 71.04826354980469, 0.6610260009765625, 275.3395080566406, 13.047208786010742, 1.1657085418701172, 72.15290069580078, 58.5723876953125, 131.9595947265625, 25.23009490966797, -52.278648376464844, -162.71978759765625, 69.69417572021484, 95.99354553222656, 95.87108612060547, 95.90809631347656, 20.85001564025879, 299.6709899902344, 182.2584228515625, 63.528900146484375, 69.92868041992188, 124.36479187011719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000159.npy"} +{"epoch": 0.23348017621145375, "step": 160, "batch_size": 64, "mean": 78.42352294921875, "std": 127.36312866210938, "min": -142.99940490722656, "p10": -68.38926162719726, "median": 63.46631622314453, "p90": 238.48291931152343, "max": 457.6695556640625, "pos_frac": 0.6875, "sample": [100.48798370361328, -11.044706344604492, 184.60977172851562, 155.3506317138672, -11.384807586669922, -70.03118896484375, 185.14126586914062, 178.53416442871094, 170.90847778320312, 12.072542190551758, -78.94766998291016, -5.8256988525390625, 137.05271911621094, 238.67562866210938, 114.3878402709961, -70.15892028808594, 36.66643142700195, -26.091812133789062, -140.2211456298828, 226.5265655517578, 274.6365966796875, -115.79768371582031, 238.03326416015625, -64.02307891845703, 140.1597137451172, 47.237396240234375, 6.351448059082031, -13.445877075195312, 457.6695556640625, 60.359901428222656, 14.35002326965332, 66.5727310180664, -34.36767578125, 379.78155517578125, -2.8368301391601562, 67.21745300292969, 98.91631317138672, 159.2276611328125, 369.42340087890625, 301.2987060546875, 33.33599090576172, 209.63528442382812, -142.99940490722656, 205.21951293945312, 93.93260955810547, 16.508941650390625, -28.588823318481445, 18.659093856811523, 111.0027084350586, 112.23982238769531, -35.682212829589844, 32.23247528076172, 112.07630920410156, -64.55809783935547, 88.29989624023438, 78.97230529785156, -9.100252151489258, 0.6564083099365234, -95.72502899169922, 153.74752807617188, 30.621009826660156, 252.47064208984375, -43.81884765625, 112.49490356445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000160.npy"} +{"epoch": 0.23494860499265785, "step": 161, "batch_size": 64, "mean": 68.96272277832031, "std": 102.6685562133789, "min": -128.23831176757812, "p10": -47.91453552246094, "median": 48.46743392944336, "p90": 229.77516632080088, "max": 312.36700439453125, "pos_frac": 0.75, "sample": [76.08839416503906, 194.216796875, -49.13323974609375, 138.4630889892578, 296.2932434082031, -128.23831176757812, -49.979156494140625, -67.38082885742188, 94.43302917480469, -42.417747497558594, 46.4990234375, 98.92219543457031, 78.21806335449219, -69.74580383300781, -4.182781219482422, 119.07371520996094, 312.36700439453125, 79.21368408203125, 136.388916015625, 265.87762451171875, 15.676246643066406, 54.560848236083984, -4.82708740234375, -33.64506530761719, 31.710235595703125, -9.355976104736328, 122.3812026977539, 37.16522979736328, 279.1807556152344, 297.3194580078125, 117.35304260253906, -33.24000549316406, 2.197490692138672, -26.918224334716797, 238.89981079101562, 116.4853744506836, 32.045257568359375, -45.070892333984375, 17.781402587890625, 82.81593322753906, -13.034011840820312, 23.777198791503906, 9.316986083984375, 33.48310852050781, 17.754981994628906, 29.37933349609375, 205.171875, 70.66747283935547, 16.343162536621094, 140.5223388671875, 208.4843292236328, -103.53285217285156, 66.412353515625, 168.39990234375, 34.430809020996094, 39.518096923828125, 33.49393844604492, 122.12168884277344, 89.83714294433594, -85.76470184326172, 50.43584442138672, 255.59872436523438, 113.12522888183594, 70.1793441772461], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000161.npy"} +{"epoch": 0.23641703377386197, "step": 162, "batch_size": 64, "mean": 68.85668182373047, "std": 110.2022933959961, "min": -174.91551208496094, "p10": -40.50316467285156, "median": 59.39109802246094, "p90": 199.9821533203125, "max": 448.7046813964844, "pos_frac": 0.75, "sample": [1.0594253540039062, 52.79348373413086, 105.08004760742188, 246.68263244628906, 56.33625793457031, 220.32846069335938, 29.055784225463867, 135.8911590576172, 43.069068908691406, -9.408666610717773, -21.133148193359375, 195.64825439453125, 67.92282104492188, -105.75849151611328, -36.447723388671875, -18.502689361572266, 161.42825317382812, 165.10171508789062, -6.0762481689453125, 122.54179382324219, 74.87828826904297, 27.844703674316406, 49.42161560058594, 8.333908081054688, 20.8566837310791, 201.83953857421875, 131.63558959960938, -33.39387512207031, -174.91551208496094, 144.67840576171875, 100.62625885009766, -111.60150146484375, -15.571531295776367, 17.214277267456055, 148.36019897460938, 66.60610961914062, 226.58184814453125, 448.7046813964844, 63.71790313720703, 117.6795883178711, 85.41779327392578, 90.691162109375, 149.20790100097656, 158.7545623779297, 143.14480590820312, 5.5687103271484375, -42.2412109375, -0.9060745239257812, 96.04835510253906, 41.817222595214844, -85.60777282714844, 162.2737274169922, 3.2722625732421875, -91.99735260009766, 388.55584716796875, 24.949951171875, -113.91202545166016, 32.02342224121094, 75.78467559814453, 86.01464080810547, 62.44593811035156, -34.253875732421875, 39.753997802734375, 210.91162109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000162.npy"} +{"epoch": 0.23788546255506607, "step": 163, "batch_size": 64, "mean": 66.64288330078125, "std": 107.73143005371094, "min": -156.284423828125, "p10": -50.26417083740234, "median": 59.91253662109375, "p90": 249.68630676269535, "max": 350.09222412109375, "pos_frac": 0.671875, "sample": [-6.938240051269531, -9.244606018066406, 262.64813232421875, 350.09222412109375, -26.12059783935547, -119.74946594238281, 92.79283905029297, -9.20953369140625, -9.35662841796875, 75.7454833984375, 129.85519409179688, -33.84355926513672, 105.5017318725586, 314.396240234375, 103.7608413696289, 41.628692626953125, 98.35582733154297, -16.063827514648438, 16.833763122558594, -60.733680725097656, 53.657867431640625, 150.0352783203125, 106.23330688476562, -27.92237091064453, 96.48268127441406, -0.5515327453613281, 122.06086730957031, -156.284423828125, -67.82723236083984, 104.39940643310547, 260.40887451171875, 12.648456573486328, 134.6378173828125, 11.96351432800293, 68.14360046386719, -10.296878814697266, -52.30543518066406, 290.8199462890625, 124.08655548095703, 62.9788818359375, -72.28265380859375, 38.787437438964844, -45.501220703125, 240.7498779296875, 42.727874755859375, 97.62823486328125, -29.39629364013672, 284.939453125, 144.7406463623047, 29.314905166625977, 58.560943603515625, 107.38177490234375, 61.264129638671875, 180.01956176757812, 78.15003204345703, -24.279441833496094, 82.01719665527344, 10.179336547851562, -92.01004791259766, 0.44612693786621094, 253.51620483398438, 85.36276245117188, 159.38671875, -10.279367446899414], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000163.npy"} +{"epoch": 0.2393538913362702, "step": 164, "batch_size": 64, "mean": 82.15338134765625, "std": 103.5638198852539, "min": -147.525634765625, "p10": -20.6383207321167, "median": 69.3184814453125, "p90": 207.42846984863286, "max": 417.7083740234375, "pos_frac": 0.84375, "sample": [-140.80445861816406, 166.08816528320312, 165.7470703125, 164.18167114257812, 219.31939697265625, 51.91791534423828, 78.45735168457031, 122.6304931640625, 70.22732543945312, 71.32450103759766, 244.14132690429688, 47.607383728027344, 19.111406326293945, 51.07591247558594, -67.70011901855469, 131.6824951171875, 63.30223846435547, 65.45531463623047, -20.490137100219727, 68.40963745117188, 1.7608623504638672, 126.54764556884766, 73.81710815429688, 196.9732666015625, 31.62431526184082, 152.18478393554688, -22.427387237548828, -87.76502227783203, 146.31173706054688, 15.99915885925293, 44.66477966308594, -19.529037475585938, 152.25146484375, 175.14779663085938, 134.5771484375, 113.16508483886719, 35.210853576660156, 99.2343978881836, 13.891853332519531, 164.52098083496094, 38.732177734375, 86.40611267089844, 68.29943084716797, 30.99786376953125, 44.10804748535156, 2.0480194091796875, 71.04216003417969, 32.102027893066406, -147.525634765625, 136.83438110351562, 75.25143432617188, 49.56956481933594, 335.2001647949219, 2.178497314453125, 344.07391357421875, 229.439208984375, 1.500396728515625, 211.90927124023438, 82.75811767578125, -52.426971435546875, -20.701828002929688, 102.59405517578125, 417.7083740234375, -4.1291351318359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000164.npy"} +{"epoch": 0.24082232011747431, "step": 165, "batch_size": 64, "mean": 52.59346008300781, "std": 99.6047134399414, "min": -150.33958435058594, "p10": -81.61388778686523, "median": 39.368804931640625, "p90": 199.76520690917977, "max": 277.62506103515625, "pos_frac": 0.703125, "sample": [88.9291763305664, 84.9374008178711, -86.54830932617188, 18.700416564941406, -99.42501831054688, -3.0272293090820312, 75.69561004638672, 114.07705688476562, -18.276947021484375, -82.00702667236328, 125.73062896728516, -148.82049560546875, -81.35334777832031, 36.93244934082031, 37.61009216308594, -95.90752410888672, -10.689411163330078, -48.19825744628906, 152.693603515625, 150.96368408203125, 82.25227355957031, 23.668601989746094, 15.789260864257812, 277.62506103515625, -150.33958435058594, 35.778656005859375, -19.0174617767334, -45.889747619628906, -5.14459228515625, 141.0664520263672, -81.72554779052734, 19.436513900756836, 214.3419189453125, 182.69940185546875, 47.40374755859375, -29.411659240722656, 79.712646484375, 60.68656921386719, 77.2723617553711, 4.750688552856445, 41.12751770019531, 58.398597717285156, 150.72767639160156, 76.21944427490234, 222.24810791015625, -21.282135009765625, 97.61865234375, 60.24977111816406, 6.230583190917969, -68.8607177734375, 30.005279541015625, 219.21328735351562, 30.367431640625, -38.349876403808594, 59.09577560424805, 263.05169677734375, 26.026222229003906, 263.12530517578125, 206.93780517578125, 124.14274597167969, 114.01066589355469, 12.309486389160156, 107.36691284179688, 183.02914428710938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000165.npy"} +{"epoch": 0.2422907488986784, "step": 166, "batch_size": 64, "mean": 98.27557373046875, "std": 139.42352294921875, "min": -92.2435302734375, "p10": -34.65679473876952, "median": 64.8866138458252, "p90": 275.8610504150392, "max": 566.3701171875, "pos_frac": 0.8125, "sample": [448.509765625, 311.58251953125, 29.313922882080078, 490.4876708984375, -38.66413116455078, 5.019512176513672, 19.650577545166016, 69.48880004882812, 172.27145385742188, 44.436370849609375, 123.43666076660156, -9.991291046142578, 10.95170783996582, 159.50393676757812, -62.47607421875, 76.9910888671875, 158.88330078125, 17.801498413085938, 2.4459571838378906, 189.48931884765625, 3.048868179321289, 566.3701171875, 39.94882583618164, -49.072994232177734, -52.143089294433594, -52.23330307006836, 37.149559020996094, 141.75347900390625, -92.2435302734375, 243.82363891601562, 289.59136962890625, 468.9508361816406, 107.71634674072266, 67.3253173828125, 27.6390380859375, 361.2008056640625, 12.529975891113281, 199.60198974609375, 26.054656982421875, 150.599365234375, -22.416873931884766, 86.11788940429688, 18.998031616210938, 11.233345031738281, 177.35403442382812, 129.84254455566406, 214.03172302246094, -9.731342315673828, 76.34229278564453, 170.5189208984375, 31.65426254272461, 83.52533721923828, -61.85377502441406, 62.44791030883789, -5.909034729003906, -25.30634307861328, 115.65264129638672, 2.308380126953125, 114.6673583984375, 6.289703369140625, 89.56895446777344, 17.289838790893555, 203.62411499023438, 86.64289093017578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000166.npy"} +{"epoch": 0.24375917767988253, "step": 167, "batch_size": 64, "mean": 103.19129943847656, "std": 115.97958374023438, "min": -140.95623779296875, "p10": -18.148437499999996, "median": 86.8951416015625, "p90": 249.67114715576182, "max": 436.15704345703125, "pos_frac": 0.828125, "sample": [103.93629455566406, 64.26771545410156, 149.34719848632812, 217.19313049316406, 111.80438232421875, 124.2625732421875, 91.29949951171875, 67.64863586425781, 96.38710021972656, 47.69345474243164, 436.15704345703125, 189.16534423828125, 70.89759826660156, -15.762588500976562, 27.187225341796875, 60.99480056762695, 78.53099822998047, -56.834373474121094, 131.5762481689453, 183.46160888671875, 23.037004470825195, 41.11162567138672, 78.95323181152344, 58.268394470214844, 105.28288269042969, -140.95623779296875, 276.1710205078125, 206.226318359375, -23.18499755859375, 170.91107177734375, -12.557548522949219, 105.22149658203125, 76.19102478027344, 56.46105194091797, 368.01611328125, 157.7550048828125, 260.5696716308594, 82.49078369140625, 202.53480529785156, -125.69293212890625, 224.2412567138672, 262.3982849121094, 334.7462463378906, 147.0511474609375, 403.76690673828125, -15.381538391113281, 175.26553344726562, 143.26080322265625, 220.9287109375, 175.60519409179688, 102.55094909667969, 11.355056762695312, -0.9027156829833984, 29.838050842285156, 56.13600158691406, 131.49203491210938, 103.36540985107422, 56.28966522216797, 21.363571166992188, -97.97689056396484, 22.675479888916016, 14.74485969543457, -45.42295837402344, -19.170944213867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000167.npy"} +{"epoch": 0.24522760646108663, "step": 168, "batch_size": 64, "mean": 75.70437622070312, "std": 103.38095092773438, "min": -86.1849136352539, "p10": -50.684083175659175, "median": 53.44643211364746, "p90": 200.01539306640626, "max": 399.284423828125, "pos_frac": 0.796875, "sample": [16.28314208984375, 113.39205169677734, 26.362281799316406, 21.49120330810547, 6.375711441040039, 32.025665283203125, 133.66363525390625, -46.32265090942383, 20.219318389892578, 16.10253143310547, -81.86676025390625, 186.9267120361328, 399.284423828125, 90.76441955566406, -54.37471008300781, 196.11203002929688, 177.35040283203125, 242.6710662841797, 201.68826293945312, 47.184226989746094, 214.48190307617188, 270.7938232421875, 175.7098846435547, 165.60910034179688, -44.40125274658203, -52.55326843261719, -15.9111328125, 126.03857421875, 149.35728454589844, 132.37628173828125, 88.20953369140625, 33.00956726074219, -64.18276977539062, 97.42864990234375, 37.097808837890625, 119.0643310546875, -67.93504333496094, -10.520755767822266, -30.845218658447266, 27.23614501953125, 29.00261878967285, 88.43501281738281, 60.89520263671875, 94.50436401367188, 113.08198547363281, 66.72035217285156, 28.275182723999023, -61.41992950439453, 276.0365295410156, 56.613990783691406, 158.00074768066406, 22.135923385620117, 24.423730850219727, 27.650238037109375, 127.24488067626953, 54.945098876953125, 26.12666893005371, -86.1849136352539, 51.9477653503418, -32.376220703125, 374.1309509277344, 19.519813537597656, 94.76846313476562, 135.205322265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000168.npy"} +{"epoch": 0.24669603524229075, "step": 169, "batch_size": 64, "mean": 66.46955108642578, "std": 129.17422485351562, "min": -271.70806884765625, "p10": -66.80077896118162, "median": 43.99698448181152, "p90": 220.34434356689457, "max": 560.31982421875, "pos_frac": 0.75, "sample": [76.2109146118164, 87.16573333740234, 3.771617889404297, 19.073158264160156, 211.6922149658203, 111.17176055908203, 19.210006713867188, 157.0628204345703, 87.82257843017578, 34.18928909301758, -1.1896858215332031, 45.81877136230469, -271.70806884765625, 2.2320404052734375, 53.682308197021484, -24.439620971679688, 26.806705474853516, -83.89784240722656, 85.3841552734375, 167.71832275390625, 144.53106689453125, -52.43125915527344, 64.85565185546875, -81.86824035644531, 207.78500366210938, 224.05239868164062, 64.14849853515625, 3.06231689453125, -14.634033203125, 29.998268127441406, 207.7987060546875, -3.2576980590820312, 17.717309951782227, 67.1411361694336, 88.73193359375, 128.8036651611328, 253.37356567382812, 24.447057723999023, 67.98129272460938, 231.8995819091797, 6.09520149230957, 388.96673583984375, -124.7173080444336, 92.06207275390625, 4.209846496582031, 52.111083984375, 31.249435424804688, -72.95914459228516, 336.0099792480469, -14.600784301757812, 560.31982421875, -5.020233154296875, 93.2453384399414, 14.629920959472656, 284.9467468261719, -9.84918212890625, 81.87571716308594, 18.732589721679688, 42.17519760131836, -27.61583709716797, -121.86566162109375, 92.45796203613281, 195.7908172607422, -146.112548828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000169.npy"} +{"epoch": 0.24816446402349487, "step": 170, "batch_size": 64, "mean": 78.5929946899414, "std": 105.24567413330078, "min": -227.08193969726562, "p10": -31.82768859863281, "median": 66.83643341064453, "p90": 212.44104309082036, "max": 326.0834655761719, "pos_frac": 0.828125, "sample": [187.47747802734375, -132.84783935546875, 135.20028686523438, 83.15020751953125, 200.804931640625, 80.00796508789062, 123.59465026855469, 17.243728637695312, 143.6584930419922, 2.5821990966796875, 90.92594146728516, -1.2941627502441406, 72.601318359375, -35.54524230957031, 9.702592849731445, -50.12763977050781, 111.75501251220703, 236.1373291015625, 16.52142333984375, 294.7087097167969, 4.720300674438477, -18.50482749938965, 165.82321166992188, 47.9402961730957, 2.0860214233398438, 21.646507263183594, -227.08193969726562, -30.184532165527344, 75.22698974609375, 195.55718994140625, 76.87623596191406, 124.65945434570312, 53.6235466003418, 78.335205078125, 175.026123046875, -52.375274658203125, 307.9747314453125, 6.2653045654296875, 8.204526901245117, 326.0834655761719, 72.60693359375, -90.72816467285156, 57.06222915649414, 120.8131103515625, 197.2724609375, -32.531898498535156, 27.848360061645508, 5.531290054321289, 37.066139221191406, 61.07154846191406, 90.24542999267578, 48.787376403808594, 235.22500610351562, 41.76777267456055, 133.93246459960938, 58.444725036621094, 187.75599670410156, 217.42794799804688, -17.150409698486328, 194.76974487304688, 234.5889892578125, 12.76945686340332, 189.503662109375, 17.711322784423828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000170.npy"} +{"epoch": 0.24963289280469897, "step": 171, "batch_size": 64, "mean": 63.276702880859375, "std": 130.76609802246094, "min": -255.5075225830078, "p10": -90.3816551208496, "median": 58.221946716308594, "p90": 209.469775390625, "max": 574.9740600585938, "pos_frac": 0.703125, "sample": [-82.31124877929688, 217.5814971923828, 23.78008270263672, 91.110107421875, -97.6507797241211, -49.897247314453125, 8.848350524902344, 108.13836669921875, -77.58343505859375, 135.14215087890625, 11.847190856933594, 150.67990112304688, 89.52603149414062, 108.94798278808594, 49.566429138183594, -56.36253356933594, 90.91586303710938, -96.16792297363281, 109.09703063964844, -8.212394714355469, 201.99594116210938, 84.40469360351562, 51.052032470703125, 85.65522003173828, 327.21453857421875, 69.53575134277344, -93.84040069580078, 574.9740600585938, -95.35747528076172, 49.475372314453125, 71.63545227050781, -255.5075225830078, 419.456298828125, 132.81124877929688, 90.10713195800781, -117.8187255859375, 209.22714233398438, 101.30299377441406, 9.464035034179688, 290.25048828125, 7.22601318359375, 70.73638153076172, 209.57376098632812, -22.369489669799805, -94.403076171875, 11.26068115234375, 65.39186096191406, 82.93907165527344, -57.93879318237305, 102.91708374023438, -25.144920349121094, -55.301605224609375, 135.6165771484375, 40.685585021972656, 78.14959716796875, -10.01957893371582, 33.976959228515625, 157.21719360351562, -18.79358673095703, -6.372711181640625, 88.3777847290039, 16.656644821166992, 257.57977294921875, 48.71437454223633], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000171.npy"} +{"epoch": 0.2511013215859031, "step": 172, "batch_size": 64, "mean": 66.58623504638672, "std": 110.25837707519531, "min": -188.6351318359375, "p10": -69.86798782348632, "median": 74.27996444702148, "p90": 193.79675292968753, "max": 451.4119873046875, "pos_frac": 0.6875, "sample": [-34.3768310546875, 103.73171997070312, -111.34133911132812, 87.32744598388672, 35.21101379394531, -45.54723358154297, 145.082275390625, 99.75471496582031, 74.40091705322266, -74.48043060302734, -121.85789489746094, 55.42795944213867, 50.90247344970703, -78.72111511230469, 147.61886596679688, 86.66885375976562, 16.710674285888672, 68.95610046386719, -51.59780502319336, 68.13321685791016, -35.62129211425781, 92.11679077148438, 184.015869140625, 140.83863830566406, 20.48365020751953, 74.15901184082031, 228.53305053710938, 195.646484375, -76.96612548828125, 237.1708984375, -25.926231384277344, 109.24335479736328, 101.67249298095703, 91.95550537109375, 117.88018798828125, 301.2025451660156, 451.4119873046875, 46.701171875, 120.65099334716797, 30.129650115966797, -20.179771423339844, -44.534149169921875, 80.4306640625, 80.64425659179688, 189.480712890625, -24.976165771484375, -11.415407180786133, 27.537900924682617, -29.905319213867188, 170.15231323242188, 155.58009338378906, -4.5281829833984375, -59.105621337890625, 95.70353698730469, 105.0307846069336, -188.6351318359375, 71.29597473144531, 102.831787109375, 227.13690185546875, 94.58255004882812, -15.656883239746094, -81.08091735839844, 237.2763671875, 176.55081176757812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000172.npy"} +{"epoch": 0.2525697503671072, "step": 173, "batch_size": 64, "mean": 67.0791015625, "std": 95.90992736816406, "min": -185.72247314453125, "p10": -24.512316513061517, "median": 49.90376091003418, "p90": 209.48938903808596, "max": 344.5565185546875, "pos_frac": 0.765625, "sample": [47.79362487792969, 80.2350845336914, 34.554046630859375, -5.232872009277344, -2.1371116638183594, 48.77415466308594, 1.6487922668457031, 136.41571044921875, 118.29762268066406, 9.443166732788086, -14.025115966796875, -41.04693603515625, 16.660463333129883, -2.4698867797851562, 79.96475219726562, 267.44366455078125, 72.21331024169922, 55.90407180786133, 69.67339324951172, 202.35269165039062, 115.31294250488281, 87.05532836914062, 84.96471405029297, -57.302268981933594, 50.660240173339844, 26.36029815673828, 344.5565185546875, 29.023502349853516, 159.41049194335938, 212.5479736328125, 65.15543365478516, 17.4885311126709, -58.0169677734375, 112.4795150756836, 103.80047607421875, -16.803512573242188, 118.17215728759766, 7.24884033203125, -2.7701034545898438, 231.0130615234375, 63.06674575805664, 187.82244873046875, -27.816089630126953, 36.457923889160156, -7.528450012207031, 13.831829071044922, 219.240966796875, 112.26727294921875, -41.728355407714844, -2.27423095703125, 93.5533676147461, 49.147281646728516, 131.88011169433594, -67.50186157226562, -185.72247314453125, 335.40045166015625, 36.7421875, 5.430757522583008, 90.05645751953125, 18.95743179321289, 266.8503723144531, 60.89576721191406, 51.73677444458008, 45.475921630859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000173.npy"} +{"epoch": 0.2540381791483113, "step": 174, "batch_size": 64, "mean": 79.28968048095703, "std": 118.29193878173828, "min": -157.06565856933594, "p10": -51.246144866943354, "median": 59.71384048461914, "p90": 257.02567291259777, "max": 453.7724304199219, "pos_frac": 0.765625, "sample": [23.644515991210938, -15.945465087890625, -52.84898376464844, -91.7467041015625, -12.613174438476562, 35.032142639160156, -13.152202606201172, 12.334844589233398, 112.48796844482422, 199.5895233154297, 222.3898468017578, -1.5279922485351562, 61.08783721923828, 18.509374618530273, 54.75311279296875, -42.80873107910156, 127.93601989746094, 161.7725067138672, 153.3106231689453, 271.8695983886719, 58.33984375, 33.87589645385742, 61.610862731933594, -15.858514785766602, 325.9761657714844, -85.4498291015625, 134.29794311523438, 299.69329833984375, 28.985977172851562, 92.06880187988281, 9.360090255737305, -13.982067108154297, 64.22344970703125, -80.885498046875, 140.10845947265625, -47.506187438964844, 164.15609741210938, 108.31317901611328, -126.53024291992188, 273.047119140625, 305.44305419921875, 328.5495910644531, 33.01139831542969, 121.99420166015625, 8.878183364868164, -157.06565856933594, 3.36181640625, 134.1031494140625, 122.18727111816406, 142.15017700195312, 80.86653137207031, 48.625946044921875, 24.41558074951172, 138.18145751953125, -97.93958282470703, 187.94236755371094, 149.03643798828125, 93.68624877929688, 46.55591583251953, 13.660533905029297, 453.7724304199219, 122.75117492675781, 88.93905639648438, 33.51258850097656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000174.npy"} +{"epoch": 0.2555066079295154, "step": 175, "batch_size": 64, "mean": 108.45330810546875, "std": 108.2730712890625, "min": -88.28738403320312, "p10": -8.24448833465576, "median": 86.6563720703125, "p90": 242.66899414062502, "max": 428.94464111328125, "pos_frac": 0.828125, "sample": [21.91706657409668, 36.907127380371094, 56.43286895751953, -88.28738403320312, 86.56977844238281, 163.46859741210938, 232.88134765625, 85.45568084716797, 22.94220733642578, 151.3407440185547, 296.307861328125, -72.18975830078125, -10.516876220703125, 311.0262451171875, 105.92010498046875, 31.863250732421875, 218.40093994140625, 223.93540954589844, 231.32382202148438, 163.26028442382812, 178.658935546875, -56.014122009277344, 203.18707275390625, 428.94464111328125, 36.659423828125, 133.69549560546875, 320.74957275390625, 45.30610656738281, 132.84689331054688, 85.1355209350586, -5.355064392089844, 55.21868133544922, 301.4743957519531, 17.45052719116211, 5.298988342285156, 35.690086364746094, -1.6426887512207031, 263.9162292480469, 7.999019622802734, 83.70065307617188, 125.19122314453125, 120.13906860351562, 124.31761932373047, 212.683837890625, -1.3431110382080078, 217.12802124023438, 237.33187866210938, 188.46971130371094, -32.06058120727539, 100.56848907470703, 170.78570556640625, 95.18962860107422, 86.74296569824219, 154.01992797851562, 42.17702102661133, 69.12934875488281, 143.14944458007812, -9.482812881469727, 40.82312774658203, 244.95632934570312, -50.967735290527344, 66.4291000366211, -1.360382080078125, 55.11418151855469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000175.npy"} +{"epoch": 0.25697503671071953, "step": 176, "batch_size": 64, "mean": 91.49755859375, "std": 121.00588989257812, "min": -63.83177185058594, "p10": -33.48957977294922, "median": 62.287893295288086, "p90": 221.0479476928711, "max": 508.62054443359375, "pos_frac": 0.8125, "sample": [3.238189697265625, 26.375625610351562, 100.08401489257812, 215.4659881591797, 61.138545989990234, 25.037403106689453, 14.715896606445312, 329.1025085449219, 19.302223205566406, -25.386199951171875, -32.3533935546875, 482.4090270996094, 12.858736038208008, -63.83177185058594, 223.44021606445312, 70.18820190429688, 28.335205078125, 211.4492645263672, 177.953369140625, -5.4128570556640625, -19.633865356445312, 73.07108306884766, 144.15296936035156, -48.700401306152344, 227.48190307617188, 1.8192119598388672, 28.43028450012207, 197.73439025878906, 184.8323974609375, 14.874427795410156, 117.40890502929688, -63.5994873046875, 151.44558715820312, -49.665809631347656, 37.98439025878906, 168.88040161132812, -44.20869064331055, 81.02104949951172, 186.78836059570312, 136.04164123535156, 187.56411743164062, 0.32955169677734375, -42.55218505859375, 2.6951637268066406, 144.63751220703125, 135.84646606445312, 129.37689208984375, 66.37631225585938, 134.92904663085938, 294.94720458984375, 36.496795654296875, -33.97651672363281, 18.418228149414062, 15.99067497253418, 138.3938751220703, 63.43724060058594, -15.136051177978516, 18.495609283447266, 135.5623321533203, 3.1354331970214844, 163.27310180664062, 508.62054443359375, 19.97081756591797, 328.742919921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000176.npy"} +{"epoch": 0.25844346549192365, "step": 177, "batch_size": 64, "mean": 89.67583465576172, "std": 119.57540893554688, "min": -108.71656799316406, "p10": -23.93701953887939, "median": 60.34989547729492, "p90": 241.86781616210942, "max": 586.0198974609375, "pos_frac": 0.734375, "sample": [168.949462890625, 56.214080810546875, 230.15501403808594, 121.67339324951172, 63.311798095703125, 126.837646484375, -108.71656799316406, 93.31196594238281, 202.083251953125, 169.26806640625, 252.2129364013672, 42.20030975341797, 83.37554931640625, 257.7668151855469, 127.12033081054688, 80.30183410644531, 21.84803009033203, 367.7132263183594, 28.052024841308594, 176.44305419921875, 54.293785095214844, 151.9193878173828, -77.63533020019531, 149.71205139160156, -2.903757095336914, 45.188331604003906, 204.364013671875, -21.14093017578125, -6.340545654296875, 4.78759765625, 57.38799285888672, 290.10064697265625, 104.96360778808594, -19.47589111328125, 12.39849853515625, 79.365966796875, -16.771629333496094, 246.88758850097656, -0.41808128356933594, -28.669828414916992, -80.1270751953125, 155.693115234375, -5.279293060302734, 52.311607360839844, -90.2547607421875, 151.62411499023438, 132.33657836914062, 15.993404388427734, -8.983680725097656, -4.5784149169921875, 194.3650665283203, 13.309938430786133, 77.79032897949219, -13.147499084472656, 195.54098510742188, 33.09035110473633, 41.63435363769531, -25.135343551635742, 160.11196899414062, 42.50144958496094, 586.0198974609375, 282.4483642578125, 72.4594955444336, -28.607406616210938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000177.npy"} +{"epoch": 0.2599118942731278, "step": 178, "batch_size": 64, "mean": 114.01716613769531, "std": 142.20709228515625, "min": -148.04067993164062, "p10": -30.468666839599603, "median": 101.61541748046875, "p90": 319.33523559570324, "max": 559.3980712890625, "pos_frac": 0.796875, "sample": [46.210269927978516, 23.861366271972656, 157.43502807617188, -23.497650146484375, 138.53399658203125, 92.5973129272461, 111.4464340209961, 43.23418426513672, 20.182369232177734, 294.82391357421875, 2.0660324096679688, 112.01165771484375, 279.5423583984375, 112.86376953125, 50.421417236328125, 158.37191772460938, 402.4897155761719, 112.40870666503906, 559.3980712890625, -52.71592712402344, 208.6377410888672, 59.939056396484375, 9.408416748046875, 55.630653381347656, -5.92119026184082, 178.74343872070312, 368.7800598144531, -46.30370330810547, 65.51943969726562, -5.39190673828125, -76.70075988769531, 107.06381225585938, 223.79669189453125, 22.822341918945312, 122.5319595336914, 190.81321716308594, 134.53085327148438, -94.40704345703125, 68.07276916503906, 266.933349609375, 31.830394744873047, 120.73513793945312, -148.04067993164062, 435.6546325683594, -8.432397842407227, 159.81898498535156, 117.37158203125, 140.729248046875, -33.45624542236328, 469.2853698730469, 37.745941162109375, 136.68966674804688, 333.3603820800781, 1.5527935028076172, 8.71282958984375, 220.67530822753906, 96.16702270507812, -18.203872680664062, 61.06285095214844, 220.35067749023438, 329.840087890625, -87.55856323242188, 184.74606323242188, -9.722885131835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000178.npy"} +{"epoch": 0.26138032305433184, "step": 179, "batch_size": 64, "mean": 81.7546157836914, "std": 139.3387451171875, "min": -276.90814208984375, "p10": -82.68174438476561, "median": 79.09677124023438, "p90": 301.4401031494141, "max": 378.0808410644531, "pos_frac": 0.6875, "sample": [117.49341583251953, 159.2561798095703, 175.60662841796875, 378.0808410644531, 69.07298278808594, -97.2564697265625, 184.13356018066406, 40.456050872802734, -11.969833374023438, 14.884237289428711, 107.12960815429688, 59.2451171875, 47.96484375, 86.8432388305664, -16.90088653564453, 99.33873748779297, -70.06838989257812, 319.83349609375, -36.95682907104492, -50.896453857421875, 76.83326721191406, -87.33580017089844, 352.0744323730469, 23.39710807800293, 221.53932189941406, 127.0008544921875, 156.53297424316406, 306.1080322265625, 169.4183807373047, 35.36833953857422, 80.31194305419922, 302.5817565917969, 342.80975341796875, 100.52835083007812, 79.61267852783203, 128.9009246826172, -36.32373809814453, -15.77023696899414, -1.6111526489257812, 85.23965454101562, 80.26819610595703, -35.43600845336914, 78.58086395263672, -98.55490112304688, 135.44937133789062, 340.5235290527344, -96.89867401123047, 254.0109100341797, 114.37718963623047, -122.91321563720703, -50.39056396484375, 230.0001983642578, 31.561233520507812, 241.28421020507812, 298.7762451171875, 220.6375274658203, -150.86032104492188, 163.9404296875, -61.659156799316406, -71.82228088378906, -276.90814208984375, 19.7913818359375, 24.59370994567871, -58.56298828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000179.npy"} +{"epoch": 0.26284875183553597, "step": 180, "batch_size": 64, "mean": 103.52677917480469, "std": 123.07260131835938, "min": -207.23655700683594, "p10": -27.119700050354005, "median": 88.14494323730469, "p90": 257.01291198730473, "max": 492.6019287109375, "pos_frac": 0.828125, "sample": [95.57996368408203, 166.3060302734375, -45.06721496582031, 3.0006980895996094, -63.500267028808594, 492.6019287109375, -25.412593841552734, 66.40692138671875, 104.04295349121094, 59.953704833984375, 5.9847259521484375, 233.02670288085938, -207.23655700683594, 149.60935974121094, 176.11207580566406, 130.25192260742188, 88.09917449951172, 84.22148132324219, 101.35514068603516, 63.45048522949219, 166.56210327148438, -134.42356872558594, 54.6420783996582, 290.22247314453125, 76.5103530883789, 226.67169189453125, 150.87887573242188, 7.44866943359375, 35.63335418701172, 56.426090240478516, 275.831787109375, 4.171224594116211, 246.56979370117188, 169.54666137695312, 81.2336654663086, -5.588842391967773, 99.68437957763672, 197.02923583984375, -27.153968811035156, 181.9256591796875, 82.95012664794922, 7.017585754394531, 251.91323852539062, 70.253662109375, -124.18565368652344, 86.71806335449219, 1.920684814453125, 259.198486328125, 230.62742614746094, 90.61005401611328, 137.5398712158203, 240.26992797851562, -27.03973960876465, 334.8411865234375, 88.19071197509766, -21.357925415039062, 195.23892211914062, 153.7153778076172, 29.557212829589844, -68.52454376220703, 264.0016784667969, 282.6883544921875, 30.19525909423828, 196.765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000180.npy"} +{"epoch": 0.2643171806167401, "step": 181, "batch_size": 64, "mean": 64.5262222290039, "std": 114.39883422851562, "min": -200.0332794189453, "p10": -55.17941818237305, "median": 45.94013595581055, "p90": 210.99144287109377, "max": 385.420654296875, "pos_frac": 0.765625, "sample": [129.3859100341797, -54.45880889892578, 15.186830520629883, 63.16606521606445, 110.47936248779297, -2.5374584197998047, 31.070758819580078, 41.579322814941406, 83.65018463134766, 121.06611633300781, -1.6680183410644531, 17.818735122680664, 4.985988616943359, 14.552986145019531, -152.86563110351562, 218.24935913085938, 114.13740539550781, -27.775917053222656, 76.36712646484375, -10.031352996826172, 44.42416000366211, -55.488250732421875, -16.951919555664062, 235.03367614746094, -77.81752014160156, 83.79573822021484, 121.37277221679688, 272.59918212890625, 66.90206146240234, 48.716468811035156, 123.03546905517578, 80.52591705322266, -200.0332794189453, 47.40092468261719, 23.386367797851562, 79.18618774414062, 13.022150039672852, 186.17564392089844, 139.0680694580078, -60.300933837890625, 21.45489501953125, -189.865478515625, 213.61221313476562, -16.835811614990234, 350.75982666015625, 159.80242919921875, -164.33811950683594, 59.160560607910156, -30.049972534179688, 129.68045043945312, 204.87631225585938, 385.420654296875, 143.78884887695312, 29.604000091552734, 132.205322265625, 5.583255767822266, 4.568351745605469, 1.4069747924804688, 289.63995361328125, 43.41579055786133, 29.514976501464844, 183.83969116210938, 151.54153442382812, 44.479347229003906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000181.npy"} +{"epoch": 0.2657856093979442, "step": 182, "batch_size": 64, "mean": 111.48786926269531, "std": 132.6415557861328, "min": -231.09249877929688, "p10": -51.9391086578369, "median": 103.611572265625, "p90": 276.62159118652346, "max": 381.67913818359375, "pos_frac": 0.828125, "sample": [286.1573486328125, 81.75495910644531, 305.8132629394531, 194.29458618164062, 84.64657592773438, 222.96473693847656, 237.43740844726562, 277.561279296875, 348.8660888671875, 75.95851135253906, 133.1105499267578, 48.020198822021484, 27.851762771606445, -18.951824188232422, 256.71612548828125, -231.09249877929688, 221.48336791992188, 66.75335693359375, 4.990255355834961, 44.07472229003906, 381.67913818359375, -0.4015960693359375, 254.9219970703125, 112.90031433105469, 146.01318359375, -58.891326904296875, 241.9791259765625, 94.32283020019531, 21.001346588134766, 71.25712585449219, 148.9384765625, 257.20947265625, -102.9856185913086, -117.94011688232422, 161.15225219726562, 233.83047485351562, 43.401214599609375, 149.03616333007812, 210.43844604492188, 230.61439514160156, -65.92440032958984, 73.36376190185547, 32.38684844970703, 128.17124938964844, 151.4104461669922, 175.5416259765625, 181.61761474609375, 114.91185760498047, -123.45393371582031, 6.441219329833984, 369.4463195800781, 145.4595947265625, 50.00563049316406, 11.86778450012207, -30.798828125, 7.74517822265625, -126.6898422241211, 72.55006408691406, 274.4289855957031, 17.17339324951172, 348.78759765625, -35.71726608276367, 49.27113342285156, 160.33956909179688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000182.npy"} +{"epoch": 0.26725403817914833, "step": 183, "batch_size": 64, "mean": 60.38490676879883, "std": 107.37228393554688, "min": -156.22642517089844, "p10": -73.0094612121582, "median": 57.64152908325195, "p90": 185.45636596679694, "max": 352.64239501953125, "pos_frac": 0.75, "sample": [26.279953002929688, 71.53716278076172, -3.331632614135742, 60.74897766113281, 13.14981460571289, 165.16424560546875, 149.4445037841797, 129.08914184570312, 48.745758056640625, 56.452980041503906, -73.36732482910156, 138.75613403320312, 60.88371658325195, 20.654993057250977, 29.311279296875, 82.08428955078125, 330.76904296875, 242.95220947265625, 140.77891540527344, 31.71368980407715, 65.96634674072266, 112.39863586425781, 287.61370849609375, 99.89974975585938, -12.130149841308594, 4.498983383178711, 70.38134765625, 54.48111343383789, -2.8968658447265625, -156.22642517089844, 168.7781982421875, 79.85186767578125, -55.11128234863281, 13.96314811706543, 172.25454711914062, 58.830078125, 42.288734436035156, 100.43624114990234, 135.66842651367188, 228.54440307617188, -122.29025268554688, -37.82427978515625, 352.64239501953125, -72.17444610595703, 34.6065673828125, -34.78047180175781, 78.52398681640625, -99.21484375, -112.60283660888672, -104.89212036132812, 161.29391479492188, 116.1629638671875, 193.6891326904297, 191.11428833007812, 77.85791015625, 56.34803009033203, 27.483421325683594, -64.95745086669922, 137.67562866210938, 83.69882202148438, -151.10745239257812, -56.456233978271484, 18.357955932617188, 0.1706085205078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000183.npy"} +{"epoch": 0.2687224669603524, "step": 184, "batch_size": 64, "mean": 63.327796936035156, "std": 109.94463348388672, "min": -143.05380249023438, "p10": -46.01745109558105, "median": 30.798585891723633, "p90": 207.55157623291018, "max": 336.789794921875, "pos_frac": 0.75, "sample": [29.730255126953125, -104.01485443115234, 63.323646545410156, 76.1811294555664, 198.13063049316406, 179.0877685546875, 10.74542236328125, 12.502883911132812, 159.91656494140625, -18.424095153808594, 307.3714599609375, 336.789794921875, 59.02796173095703, 210.34133911132812, -41.901039123535156, 187.05142211914062, 10.5506591796875, 214.611328125, 96.68682861328125, 266.48919677734375, -23.724510192871094, 302.26263427734375, -91.35403442382812, 16.897293090820312, 37.48024368286133, 35.52294158935547, 42.911521911621094, 11.445365905761719, 86.47834777832031, 3.680692672729492, 40.94844055175781, 95.19143676757812, 138.49302673339844, 197.6870574951172, -92.38427734375, 199.96533203125, 15.309890747070312, -25.237754821777344, -35.764034271240234, 25.479564666748047, 73.4583969116211, -20.26306915283203, 29.522499084472656, 9.564310073852539, 15.973182678222656, 201.04212951660156, 66.31122589111328, 55.74960708618164, -52.187652587890625, -127.60385131835938, -47.7816276550293, 25.89459800720215, -5.806125640869141, 4.5623931884765625, -143.05380249023438, 29.327545166015625, -40.54508972167969, 176.5757293701172, 31.86691665649414, 105.75846099853516, 6.721351623535156, -0.9719085693359375, 96.88392639160156, 326.4925842285156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000184.npy"} +{"epoch": 0.2701908957415565, "step": 185, "batch_size": 64, "mean": 50.67546081542969, "std": 120.80158233642578, "min": -196.05227661132812, "p10": -65.72291679382323, "median": 26.072877883911133, "p90": 198.2263122558594, "max": 425.5668640136719, "pos_frac": 0.625, "sample": [-161.49563598632812, -69.27690124511719, -18.829408645629883, 88.38473510742188, 71.50459289550781, 18.74321174621582, -5.858423233032227, 76.67626953125, 411.9520568847656, -4.998405456542969, 14.949779510498047, 112.70494842529297, 106.9072265625, -5.607654571533203, 253.58657836914062, 34.012725830078125, -9.227216720581055, 32.555564880371094, 138.7235870361328, -108.94839477539062, -9.715593338012695, 35.42025375366211, 25.962055206298828, 68.75840759277344, 74.31787109375, -5.485374450683594, -113.21879577636719, 19.069068908691406, 90.50098419189453, -134.46676635742188, 34.23797607421875, 24.546062469482422, 93.0136947631836, 98.57627868652344, 26.183700561523438, 356.17138671875, -50.955833435058594, 33.75102996826172, 189.62298583984375, -44.4088249206543, 33.143367767333984, 5.428899765014648, -19.780723571777344, -8.639022827148438, -21.39731788635254, 1.3965587615966797, 322.0910949707031, 139.391357421875, 145.97952270507812, -0.2043590545654297, 425.5668640136719, -30.03247833251953, 21.923160552978516, 64.80415344238281, -14.565147399902344, 201.9134521484375, 40.06901550292969, 127.1788330078125, -196.05227661132812, 257.94158935546875, -71.66830444335938, 97.2287368774414, -9.397109985351562, -57.4302864074707], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000185.npy"} +{"epoch": 0.27165932452276065, "step": 186, "batch_size": 64, "mean": 96.6115951538086, "std": 117.97352600097656, "min": -196.24459838867188, "p10": -22.207040405273435, "median": 74.91999816894531, "p90": 231.19376831054691, "max": 456.15716552734375, "pos_frac": 0.796875, "sample": [-196.24459838867188, 131.54002380371094, 311.1773986816406, 147.75645446777344, 144.13156127929688, -5.7744140625, 75.44625854492188, 48.33295822143555, 209.05130004882812, 156.36495971679688, 68.47994995117188, 218.53314208984375, 3.9484329223632812, 82.35641479492188, 37.04509353637695, -16.48685073852539, 97.04342651367188, -117.24493408203125, 40.2973518371582, 47.72749328613281, 304.012939453125, 43.375118255615234, -1.7076034545898438, 176.16140747070312, 37.659629821777344, 74.39373779296875, 98.53472137451172, 142.96224975585938, 324.06103515625, 102.25994873046875, 180.66543579101562, -23.62577247619629, 159.15878295898438, 66.88156127929688, 34.292076110839844, 127.18905639648438, 395.72650146484375, 236.6197509765625, 46.23013687133789, 5.855319976806641, 158.12396240234375, 30.14066505432129, 73.629638671875, 62.745452880859375, -62.578857421875, 398.23291015625, -19.475997924804688, 74.00260162353516, -23.377487182617188, -42.71644592285156, 456.15716552734375, 158.9173583984375, 68.7431640625, 144.9330596923828, -2.232177734375, 141.04705810546875, 94.75718688964844, -28.901756286621094, 2.4819564819335938, 116.56793212890625, 143.04397583007812, 131.481201171875, -4.273193359375, 97.50531005859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000186.npy"} +{"epoch": 0.27312775330396477, "step": 187, "batch_size": 64, "mean": 80.06629943847656, "std": 107.42435455322266, "min": -154.1185302734375, "p10": -36.58426933288574, "median": 62.09721565246582, "p90": 215.47777404785157, "max": 392.33245849609375, "pos_frac": 0.765625, "sample": [-154.1185302734375, 207.92361450195312, -23.014402389526367, 59.14955520629883, -79.2701416015625, 228.67092895507812, 84.28062438964844, 156.28732299804688, 54.29969024658203, 40.43685531616211, 70.63656616210938, 66.21946716308594, 336.2509765625, 78.4168930053711, 123.03450012207031, 207.31219482421875, 161.2300567626953, 62.37998962402344, 115.44136047363281, -42.88249588012695, 57.44728088378906, 35.462486267089844, 392.33245849609375, -34.906829833984375, 141.83004760742188, 57.16276550292969, 248.22140502929688, -36.656917572021484, 188.2895050048828, 39.664337158203125, -45.8331298828125, 82.08238983154297, 20.69286346435547, -61.828887939453125, -36.23120880126953, 159.05499267578125, -36.414756774902344, 5.838750839233398, -10.926490783691406, 214.06634521484375, 148.90049743652344, 350.00701904296875, 95.95763397216797, 43.15385437011719, 66.15655517578125, 34.66632843017578, 61.8144416809082, 45.607208251953125, -128.00479125976562, 38.08802032470703, 98.6147232055664, -2.6990280151367188, 154.75802612304688, 102.31008911132812, 75.91944885253906, 185.90818786621094, 86.75048828125, -0.49114227294921875, 59.825286865234375, 228.1158905029297, 216.08267211914062, 35.04627990722656, -20.876020431518555, 16.5991153717041], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000187.npy"} +{"epoch": 0.2745961820851689, "step": 188, "batch_size": 64, "mean": 98.17579650878906, "std": 121.02018737792969, "min": -131.13772583007812, "p10": -18.280582809448237, "median": 70.97017669677734, "p90": 289.22151489257817, "max": 494.649658203125, "pos_frac": 0.828125, "sample": [330.3936462402344, 123.80902862548828, 243.6399383544922, 69.72344970703125, 72.2121353149414, 0.061370849609375, -34.012176513671875, -115.54505157470703, 294.2806396484375, 81.97766876220703, 48.32228088378906, -27.588645935058594, 119.69171905517578, 78.85966491699219, 291.9649658203125, 253.3543243408203, 87.82592010498047, -32.284568786621094, 57.03125, 7.748077392578125, 3.6935577392578125, 28.324302673339844, 45.90423583984375, 143.01486206054688, 69.7047348022461, 31.046348571777344, 11.127578735351562, -4.248256683349609, 317.03656005859375, 46.40791320800781, 359.76513671875, -131.13772583007812, 282.82012939453125, 114.84944152832031, 149.5623779296875, 114.85903930664062, 66.17083740234375, 188.49884033203125, -10.863096237182617, 188.4698486328125, 165.96983337402344, 25.530166625976562, -88.99591064453125, -20.82729148864746, 192.2804718017578, 39.693939208984375, 219.9013671875, 42.02122116088867, 6.2198333740234375, 337.435546875, 87.39908599853516, 166.85888671875, 53.01558303833008, -11.567419052124023, 157.2728729248047, 81.52790832519531, 101.9923324584961, 494.649658203125, 94.94357299804688, 69.72821807861328, 74.48086547851562, -12.338262557983398, 22.19916534423828, 17.386972427368164], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000188.npy"} +{"epoch": 0.27606461086637296, "step": 189, "batch_size": 64, "mean": 79.58168029785156, "std": 134.24612426757812, "min": -180.94384765625, "p10": -68.00683135986327, "median": 60.598785400390625, "p90": 195.78351745605468, "max": 562.9107055664062, "pos_frac": 0.765625, "sample": [40.69878387451172, 171.99090576171875, -123.34855651855469, 356.2100830078125, -70.20286560058594, -53.29698944091797, 45.856842041015625, 9.914894104003906, -3.5176639556884766, 33.042236328125, 153.4758758544922, 149.10980224609375, 141.29534912109375, -8.07830810546875, 162.01205444335938, 44.208648681640625, 195.8895263671875, 43.20283889770508, -180.94384765625, 6.101541519165039, -83.64017486572266, 26.6498966217041, 80.48919677734375, 185.5062255859375, 147.3994598388672, 81.1781005859375, -48.890289306640625, 55.09982681274414, 34.57619857788086, 182.90380859375, 1.9068946838378906, 19.74953269958496, 95.63243103027344, 488.61199951171875, 57.997467041015625, -25.943588256835938, 23.227088928222656, 217.11952209472656, 76.08686828613281, 10.272499084472656, -142.41481018066406, 119.4594955444336, -62.88275146484375, 120.35289764404297, 63.978939056396484, 338.33660888671875, 181.5255584716797, 562.9107055664062, -19.474464416503906, 125.63345336914062, 169.95726013183594, 30.78662872314453, 81.16676330566406, 150.02590942382812, 105.91017150878906, 195.53616333007812, 250.75595092773438, 88.74606323242188, -2.1633453369140625, -174.4017333984375, 63.200103759765625, -82.55455780029297, 29.116424560546875, 160.1660919189453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000189.npy"} +{"epoch": 0.2775330396475771, "step": 190, "batch_size": 64, "mean": 95.53245544433594, "std": 142.71243286132812, "min": -218.02952575683594, "p10": -68.1217170715332, "median": 69.45353698730469, "p90": 309.6427673339844, "max": 432.3765563964844, "pos_frac": 0.796875, "sample": [-3.9917755126953125, 107.77558135986328, 191.14181518554688, 159.1076202392578, 14.684913635253906, -91.92427825927734, 247.88458251953125, 41.29845428466797, 13.444751739501953, 365.0323486328125, 72.07339477539062, 67.76349639892578, 134.8141326904297, 18.354324340820312, 374.1639404296875, 149.94143676757812, 106.5567626953125, 89.16999816894531, 195.63473510742188, 23.35308837890625, 3.3092269897460938, 265.5447082519531, -218.02952575683594, 71.1435775756836, 275.11846923828125, -23.90802001953125, 62.162139892578125, 334.036376953125, 156.64495849609375, 57.99998474121094, -14.829421997070312, 311.8975830078125, 20.283843994140625, -3.7573413848876953, 135.34625244140625, 72.63236999511719, -102.12032318115234, -68.57508850097656, -158.56195068359375, 15.749404907226562, 100.3402328491211, -67.06385040283203, 109.40360260009766, 44.59169006347656, -27.883302688598633, -95.02525329589844, 115.70326232910156, 304.38153076171875, 51.511749267578125, 55.13066864013672, -199.1580047607422, 398.810302734375, 126.04751586914062, 19.437204360961914, 204.8014678955078, 107.51819610595703, 64.11575317382812, 387.943115234375, 4.896757125854492, 432.3765563964844, 201.91744995117188, 225.9712371826172, 37.615840911865234, 42.32672119140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000190.npy"} +{"epoch": 0.2790014684287812, "step": 191, "batch_size": 64, "mean": 138.17645263671875, "std": 154.89373779296875, "min": -146.46664428710938, "p10": -6.250457382202148, "median": 112.46092987060547, "p90": 322.95994567871094, "max": 584.6844482421875, "pos_frac": 0.859375, "sample": [481.4455871582031, 107.3055419921875, 584.6844482421875, 30.77960205078125, 307.28436279296875, 61.58543395996094, 144.53195190429688, 262.2823486328125, 14.843358993530273, 376.92462158203125, 107.46499633789062, 159.6817626953125, -6.5591583251953125, 271.3479919433594, 287.2579345703125, 20.973426818847656, 38.04692840576172, 80.7034683227539, 25.198287963867188, 71.00633239746094, 264.15240478515625, 56.45721435546875, 42.64434814453125, 304.2054443359375, 123.92888641357422, 11.349533081054688, 8.838804244995117, 568.5394287109375, 188.4965362548828, -19.035255432128906, 231.49215698242188, -79.42181396484375, 17.12847137451172, -49.263240814208984, 145.33517456054688, 178.17071533203125, 185.8934326171875, 148.49087524414062, 325.84088134765625, 32.06535339355469, 9.188676834106445, 243.59078979492188, 227.69125366210938, -146.46664428710938, 68.78450012207031, 508.9999084472656, 41.993499755859375, -70.22267150878906, 120.18775939941406, 96.24967956542969, 190.42051696777344, 316.2377624511719, 20.61725616455078, -5.530155181884766, 126.09315490722656, 176.37115478515625, -76.12654876708984, 117.45686340332031, -4.1428375244140625, 125.47034454345703, 361.66845703125, 32.98986053466797, 83.4040756225586, 166.26806640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000191.npy"} +{"epoch": 0.28046989720998533, "step": 192, "batch_size": 64, "mean": 91.24540710449219, "std": 148.73919677734375, "min": -263.3235168457031, "p10": -53.35022964477539, "median": 66.03786659240723, "p90": 315.03936157226565, "max": 542.9531860351562, "pos_frac": 0.734375, "sample": [-44.33063507080078, -149.77041625976562, 42.01300811767578, 170.2860107421875, 133.8136749267578, -36.564659118652344, -54.33995819091797, -5.062063217163086, 37.471160888671875, -263.3235168457031, 23.029296875, 92.88152313232422, -33.643951416015625, 224.16880798339844, 18.605819702148438, 42.66139221191406, -71.8460693359375, 95.2376708984375, -51.040863037109375, 176.0959930419922, -120.68192291259766, 417.2904357910156, -46.3162727355957, 77.67304992675781, -19.375350952148438, 319.548095703125, 361.18414306640625, 383.2453918457031, 156.49667358398438, 358.95367431640625, 96.329833984375, 73.58369445800781, 542.9531860351562, 100.48666381835938, 179.8341827392578, 28.077585220336914, 115.41014099121094, 217.6685028076172, 274.6590881347656, 62.92808151245117, 47.86183166503906, 304.51898193359375, 7.946741104125977, 42.14353942871094, 128.470458984375, 50.937034606933594, 62.493202209472656, -176.63990783691406, 200.70492553710938, 69.14765167236328, 81.40384674072266, -8.8026123046875, -45.385169982910156, -58.30371856689453, 12.786458969116211, 232.80172729492188, 75.6220703125, -28.359329223632812, 41.147430419921875, 208.33560180664062, 145.28799438476562, 124.18838500976562, 59.06910705566406, 336.0389709472656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000192.npy"} +{"epoch": 0.28193832599118945, "step": 193, "batch_size": 64, "mean": 53.04465103149414, "std": 141.73948669433594, "min": -424.73858642578125, "p10": -79.41808624267577, "median": 51.5461540222168, "p90": 221.7535079956055, "max": 371.7496337890625, "pos_frac": 0.671875, "sample": [112.43316650390625, -20.19493865966797, 25.823753356933594, 186.18466186523438, -424.73858642578125, 95.43633270263672, 71.90680694580078, 54.47020721435547, -423.5020751953125, 302.9881591796875, 65.97174072265625, -16.001379013061523, 184.99310302734375, -37.23275375366211, -25.462860107421875, 97.23776245117188, 110.15912628173828, 128.11676025390625, -0.42052459716796875, 226.1290283203125, 2.8729019165039062, -19.828697204589844, 12.649616241455078, 159.2628173828125, 28.91145133972168, 128.50387573242188, -56.00136947631836, -40.192298889160156, 8.411270141601562, -61.08625793457031, -22.419517517089844, 211.54396057128906, -83.60662841796875, -90.5306167602539, 250.29055786132812, -129.14608764648438, 178.70846557617188, -89.08113861083984, 13.196191787719727, 48.622100830078125, 10.1116943359375, 141.54135131835938, 266.1632995605469, 169.9881591796875, 116.1536865234375, -133.73330688476562, 318.85467529296875, 79.81333923339844, 123.6581802368164, 123.60895538330078, -65.39154815673828, 88.45547485351562, 17.585590362548828, -69.64482116699219, -39.41001892089844, 143.54327392578125, 371.7496337890625, 127.62269592285156, -60.048831939697266, 21.41155242919922, 283.32745361328125, 76.42688751220703, 15.353057861328125, 102.33917999267578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000193.npy"} +{"epoch": 0.2834067547723935, "step": 194, "batch_size": 64, "mean": 86.09469604492188, "std": 99.99124145507812, "min": -109.02793884277344, "p10": -51.782063293457014, "median": 71.0604019165039, "p90": 235.417431640625, "max": 294.2304992675781, "pos_frac": 0.8125, "sample": [142.46517944335938, 20.214820861816406, 33.404380798339844, 5.4705352783203125, 122.33161926269531, 271.43853759765625, 103.50717163085938, 166.39317321777344, -23.075775146484375, 19.45440673828125, 28.351213455200195, 247.20355224609375, -66.87322998046875, -64.93814086914062, 43.33625030517578, 61.69921112060547, 214.11471557617188, 53.333412170410156, 68.12171936035156, 131.61660766601562, 33.523502349853516, 80.10419464111328, 14.293338775634766, 6.601016998291016, 117.2780990600586, -83.1103515625, -2.531320571899414, 229.42074584960938, 66.24735260009766, 79.62589263916016, 242.9126434326172, 131.77206420898438, 235.5964813232422, 169.48190307617188, 29.135047912597656, 276.585693359375, 64.03263854980469, 29.28849220275879, 147.1497039794922, 48.09377670288086, 188.01280212402344, -75.27407836914062, 47.057373046875, -11.437248229980469, 103.2247314453125, 160.7125244140625, -3.2206153869628906, -109.02793884277344, 34.014183044433594, -57.46112060546875, 113.70939636230469, -62.792457580566406, 60.58634948730469, 234.99964904785156, 76.49288940429688, 294.2304992675781, 133.66189575195312, 73.99908447265625, 290.69384765625, 92.79360961914062, 174.36801147460938, 181.56292724609375, 114.61465454101562, -38.53092956542969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000194.npy"} +{"epoch": 0.28487518355359764, "step": 195, "batch_size": 64, "mean": 93.46810913085938, "std": 118.85218811035156, "min": -131.37991333007812, "p10": -52.70166511535645, "median": 76.18124389648438, "p90": 241.66904449462893, "max": 398.1551818847656, "pos_frac": 0.78125, "sample": [83.13178253173828, -21.19281005859375, 215.3645477294922, 52.48524856567383, 74.7281494140625, 398.1551818847656, 71.57465362548828, 337.81787109375, 204.5836181640625, -7.218915939331055, -7.87860107421875, 48.082550048828125, -3.3397903442382812, -101.38612365722656, 244.01858520507812, 114.48005676269531, 55.99317932128906, 236.18678283691406, -43.56652069091797, 351.46881103515625, -53.0106315612793, 108.73912048339844, -102.31790161132812, 36.0860595703125, 104.89869689941406, 108.11522674560547, 226.91995239257812, 301.21990966796875, 142.6515655517578, 121.85945129394531, 150.4920654296875, -83.40363311767578, 71.51058197021484, 180.85128784179688, 18.201019287109375, 142.9118194580078, -33.044960021972656, 8.749259948730469, 171.900146484375, 220.3597869873047, -131.37991333007812, 245.06765747070312, 47.52457046508789, 68.50247955322266, 385.0509948730469, 141.97610473632812, 80.57831573486328, 62.33882141113281, -59.909706115722656, 11.526487350463867, 164.12771606445312, -71.60025024414062, 221.72821044921875, 106.66729736328125, 31.188888549804688, 77.63433837890625, 30.695571899414062, 63.20360565185547, 18.7231502532959, 86.312255859375, -51.980743408203125, 49.58517837524414, 142.70034790039062, 114.52061462402344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000195.npy"} +{"epoch": 0.28634361233480177, "step": 196, "batch_size": 64, "mean": 105.74667358398438, "std": 144.51861572265625, "min": -217.51046752929688, "p10": -57.22457275390625, "median": 80.21939468383789, "p90": 309.51323852539065, "max": 457.20098876953125, "pos_frac": 0.765625, "sample": [190.99072265625, 198.795654296875, 307.64202880859375, 217.5844268798828, -58.02997589111328, 291.6065673828125, 310.315185546875, 444.84796142578125, 226.31301879882812, 69.62086486816406, 4.050144195556641, 52.15034484863281, 286.2071533203125, 190.78500366210938, 48.965057373046875, 10.385286331176758, -26.476512908935547, 87.70463562011719, -6.9410400390625, 51.45011901855469, 439.91680908203125, -18.989681243896484, 62.24229431152344, 31.197711944580078, 127.16987609863281, 121.88878631591797, 123.12541198730469, 140.26632690429688, 45.454864501953125, -10.836837768554688, 112.45042419433594, 78.55672454833984, 439.9766845703125, 457.20098876953125, -20.448448181152344, 179.6512908935547, 83.82258605957031, 56.55125427246094, 111.87152099609375, 81.88206481933594, 65.5568618774414, 57.509429931640625, 222.83462524414062, -2.8761539459228516, 134.03436279296875, 32.719425201416016, 83.46504211425781, -151.9102325439453, 69.04319763183594, -217.51046752929688, -62.7781982421875, 151.95321655273438, 90.10646057128906, -63.468265533447266, 316.930908203125, -55.345298767089844, -136.4858856201172, 69.27742004394531, -3.1411285400390625, 127.96891021728516, 259.0711975097656, 311.0126953125, -76.93365478515625, 5.835441589355469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000196.npy"} +{"epoch": 0.2878120411160059, "step": 197, "batch_size": 64, "mean": 80.89613342285156, "std": 113.04435729980469, "min": -123.44822692871094, "p10": -35.94133148193359, "median": 53.378299713134766, "p90": 232.82198028564457, "max": 400.571044921875, "pos_frac": 0.796875, "sample": [57.39115905761719, 330.1175231933594, 139.1900634765625, -19.554473876953125, 164.53549194335938, 10.561019897460938, 16.870285034179688, -65.54133605957031, 66.10052490234375, 400.571044921875, 354.4127502441406, 225.08189392089844, 66.74774932861328, 122.7001953125, -102.37041473388672, -0.18047332763671875, 123.85791778564453, 275.17803955078125, 24.001365661621094, 41.9847526550293, 100.15231323242188, 210.6070098876953, 42.40727615356445, -1.17047119140625, 53.88129425048828, 7.470735549926758, -105.08324432373047, 131.20208740234375, 84.53754425048828, 95.11041259765625, 259.95361328125, 139.7333221435547, 52.87530517578125, 236.13916015625, 2.019927978515625, 191.7805938720703, -37.791717529296875, 2.9177703857421875, -77.75411987304688, -123.44822692871094, 32.442893981933594, 123.5970687866211, -0.8019237518310547, 7.602485656738281, 26.555448532104492, 77.28762817382812, 305.81158447265625, 200.99856567382812, 23.950546264648438, 189.3396453857422, -11.35649299621582, 49.005001068115234, 82.82758331298828, 27.45709991455078, 23.832359313964844, 103.38240814208984, 168.44126892089844, 75.36032104492188, 199.23782348632812, -31.623764038085938, -82.271240234375, 17.929542541503906, 20.292694091796875, 50.85845947265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000197.npy"} +{"epoch": 0.28928046989721, "step": 198, "batch_size": 64, "mean": 103.20319366455078, "std": 141.6293182373047, "min": -224.87091064453125, "p10": -28.538953590393064, "median": 86.55558395385742, "p90": 231.76798400878909, "max": 584.1566162109375, "pos_frac": 0.828125, "sample": [-120.5759506225586, 91.83274841308594, -36.399742126464844, 512.8724365234375, 123.81925964355469, 203.1912841796875, 321.99603271484375, 54.98476791381836, 146.11817932128906, 53.38854217529297, 125.42947387695312, 165.12246704101562, -147.47622680664062, 4.412055969238281, 2.14093017578125, 234.79612731933594, 142.0828857421875, 102.12704467773438, 80.89007568359375, 86.5596694946289, -29.36420249938965, -26.613372802734375, 34.86857604980469, 161.15626525878906, 11.553976058959961, 71.16146087646484, -71.10982513427734, 197.33819580078125, 86.55149841308594, 76.25767517089844, 90.78888702392578, 122.44953155517578, 127.590576171875, 76.68214416503906, 127.21989440917969, 13.919925689697266, 79.29566955566406, 159.14456176757812, 153.440185546875, 225.11685180664062, -224.87091064453125, 545.6815185546875, 98.5829086303711, 65.06745147705078, -0.00878143310546875, 6.2953948974609375, 234.61846923828125, -41.41400146484375, 3.658781051635742, 69.94960021972656, 32.8546257019043, 88.0877685546875, 86.18878173828125, 135.33511352539062, -2.661661148071289, 143.05462646484375, 147.0585479736328, 204.60549926757812, 13.888124465942383, 402.84967041015625, -5.9656982421875, 584.1566162109375, 46.55902099609375, 136.67257690429688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000198.npy"} +{"epoch": 0.2907488986784141, "step": 199, "batch_size": 64, "mean": 102.95765686035156, "std": 139.86825561523438, "min": -168.65264892578125, "p10": -61.64832725524901, "median": 86.84391784667969, "p90": 280.63388671875003, "max": 454.39599609375, "pos_frac": 0.78125, "sample": [207.07516479492188, 173.09376525878906, -135.30117797851562, -9.007553100585938, 270.804443359375, 74.35324096679688, -68.33245849609375, 197.4844207763672, -164.93109130859375, 116.79900360107422, -2.6178054809570312, -79.60348510742188, -168.65264892578125, 214.99098205566406, 200.93972778320312, 70.80547332763672, 14.364585876464844, 137.08590698242188, 384.9705810546875, 41.59986114501953, 85.75202941894531, 168.09327697753906, 376.9091796875, 294.895263671875, -92.67242431640625, 40.81349182128906, -43.930946350097656, 122.88787078857422, 206.18753051757812, 24.42071533203125, 34.68223571777344, 252.2763671875, 454.39599609375, 81.79742431640625, 70.39264678955078, 199.46530151367188, -24.013671875, 113.05864715576172, 25.989105224609375, 90.58512878417969, 61.7333984375, 277.0953063964844, 133.92404174804688, 38.30745315551758, 282.1504211425781, 117.05638885498047, 69.92652893066406, -43.02685546875, -46.05202102661133, -159.7952423095703, 264.11285400390625, 80.2431411743164, 101.5732192993164, 91.24724578857422, 351.39617919921875, 179.6820526123047, 5.918703079223633, 88.94109344482422, 54.59117889404297, 87.93580627441406, 45.71702575683594, 143.84683227539062, 424.68817138671875, -19.829147338867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000199.npy"} +{"epoch": 0.2922173274596182, "step": 200, "batch_size": 64, "mean": 102.1077880859375, "std": 155.07470703125, "min": -309.08270263671875, "p10": -45.22786865234374, "median": 78.93537902832031, "p90": 323.9662536621094, "max": 537.7470703125, "pos_frac": 0.796875, "sample": [232.94952392578125, 106.01602172851562, 59.93587112426758, 403.5652770996094, 62.12794876098633, 153.434814453125, 17.35187530517578, 2.2430343627929688, 401.763427734375, -91.04010009765625, 316.8278503417969, 234.36700439453125, 100.41191864013672, 6.989536285400391, 73.78703308105469, 321.85723876953125, 208.67527770996094, 373.521240234375, 5.807769775390625, 324.8701171875, 68.431640625, 2.0669689178466797, 154.29774475097656, 215.62281799316406, -21.87081527709961, 74.01066589355469, 186.306396484375, -189.45126342773438, 537.7470703125, 87.15550994873047, 101.59793090820312, -27.69162368774414, -5.3050384521484375, 74.42782592773438, 51.88117980957031, -33.49090576171875, -309.08270263671875, 7.604705810546875, 99.91448974609375, 340.0821838378906, 5.50526237487793, 90.2611312866211, 57.89837646484375, -2.1691665649414062, 0.046146392822265625, -155.20877075195312, 83.44293212890625, -50.25799560546875, 291.2373352050781, -30.473485946655273, 4.185600280761719, 410.07940673828125, 46.54130554199219, 24.06275177001953, 98.80625915527344, 255.4284210205078, 158.11956787109375, 163.743408203125, 103.11872863769531, 169.10037231445312, -122.69386291503906, 144.47105407714844, -65.6595458984375, 125.5959701538086], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000200.npy"} +{"epoch": 0.2936857562408223, "step": 201, "batch_size": 64, "mean": 101.78836059570312, "std": 144.7853240966797, "min": -285.1991271972656, "p10": -42.69153900146483, "median": 80.66946411132812, "p90": 285.44678039550786, "max": 488.533203125, "pos_frac": 0.8125, "sample": [271.12371826171875, 18.071897506713867, 215.5498809814453, -227.4378662109375, 52.942588806152344, -92.52766418457031, 8.548725128173828, 14.470579147338867, -27.77618408203125, 85.00717163085938, 2.693634033203125, 27.308746337890625, 36.5526123046875, -28.385833740234375, 463.7787170410156, 106.72628021240234, 70.03189086914062, 105.5915298461914, 24.379608154296875, 235.63961791992188, 353.2997131347656, 127.51961517333984, 5.117767333984375, 93.42268371582031, 216.06253051757812, 264.5859069824219, -77.11985778808594, 220.0215606689453, 194.06045532226562, -13.51716423034668, 76.33175659179688, 122.614501953125, 121.40563201904297, 118.54637145996094, -34.1278076171875, -68.40798950195312, 176.34423828125, 71.0750961303711, 50.68329620361328, 488.533203125, 104.7509765625, -24.979515075683594, 160.72076416015625, 75.4656982421875, 46.99433898925781, 167.83912658691406, 145.18563842773438, 252.19398498535156, 388.6724548339844, 68.84805297851562, 86.6653823852539, -64.22894287109375, 143.68141174316406, 69.8940658569336, -285.1991271972656, 294.86614990234375, 412.2462463378906, 140.86419677734375, 12.805719375610352, -46.36170959472656, 291.5852355957031, 44.940025329589844, 12.860671997070312, 145.40298461914062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000201.npy"} +{"epoch": 0.29515418502202645, "step": 202, "batch_size": 64, "mean": 71.75799560546875, "std": 142.43643188476562, "min": -213.11941528320312, "p10": -97.53666763305662, "median": 61.881126403808594, "p90": 256.46564025878905, "max": 526.05322265625, "pos_frac": 0.703125, "sample": [526.05322265625, -51.64067459106445, 93.32756805419922, 305.93524169921875, 2.0363407135009766, -49.02562713623047, 235.52850341796875, -46.72687530517578, 122.58370971679688, 93.53215026855469, 50.134342193603516, -192.13429260253906, 174.450927734375, 241.01885986328125, 211.52828979492188, 47.046661376953125, -202.46267700195312, 47.716522216796875, 105.07058715820312, 340.997802734375, 116.85680389404297, 282.05670166015625, -136.42649841308594, 186.93353271484375, 26.711212158203125, 29.908912658691406, 390.59912109375, -16.324766159057617, 105.82981872558594, 133.86814880371094, 101.37223815917969, 125.84597778320312, 255.50662231445312, -31.222396850585938, -38.759674072265625, -118.11332702636719, 81.89356994628906, 60.384613037109375, 2.2354965209960938, 231.84686279296875, 99.94755554199219, -8.198989868164062, 23.156383514404297, 265.2305603027344, 152.17117309570312, 114.00845336914062, 80.52568817138672, -213.11941528320312, 63.37763977050781, 99.4615478515625, 16.70828628540039, 41.55596160888672, -55.32872009277344, -67.02851104736328, 35.52197265625, -107.68321228027344, 82.48626708984375, -73.86139678955078, -50.13321304321289, 53.289146423339844, 256.87664794921875, -131.19595336914062, 89.9765625, -21.206707000732422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000202.npy"} +{"epoch": 0.2966226138032305, "step": 203, "batch_size": 64, "mean": 87.42697143554688, "std": 122.77902221679688, "min": -187.09182739257812, "p10": -65.85315475463867, "median": 71.69087982177734, "p90": 246.66162109375003, "max": 453.85174560546875, "pos_frac": 0.78125, "sample": [17.479084014892578, 132.01870727539062, 158.966796875, 63.333351135253906, -28.214168548583984, 280.5261535644531, -74.66671752929688, 134.28631591796875, 27.271718978881836, 113.94326782226562, 0.8704071044921875, 104.46146392822266, 104.33635711669922, 160.77218627929688, -106.54536437988281, 182.7406768798828, 123.81341552734375, -23.046524047851562, 181.8553009033203, 135.5670623779297, 271.969970703125, 414.781494140625, -71.12776184082031, 114.98870849609375, 128.48284912109375, 37.36846923828125, 453.85174560546875, 11.483955383300781, -91.07218933105469, 145.10745239257812, -3.0806045532226562, 168.59976196289062, 183.9542236328125, 265.4671630859375, 21.846435546875, 183.3017120361328, 80.54850006103516, 102.96720123291016, 73.35491943359375, 122.35472869873047, 28.3770751953125, 66.54853057861328, 40.414634704589844, 239.50418090820312, -53.545738220214844, -187.09182739257812, -52.23500061035156, 70.02684020996094, -30.768144607543945, 8.028156280517578, 162.5260009765625, 15.833503723144531, 19.770626068115234, 47.23042297363281, -75.181396484375, -77.20587158203125, 138.88967895507812, 54.27809143066406, 249.72909545898438, 336.4516296386719, -9.897058486938477, 238.9939727783203, 34.674598693847656, 25.056259155273438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000203.npy"} +{"epoch": 0.29809104258443464, "step": 204, "batch_size": 64, "mean": 81.47257995605469, "std": 144.63739013671875, "min": -233.6527099609375, "p10": -71.2285934448242, "median": 70.60073852539062, "p90": 309.64455566406264, "max": 473.2161865234375, "pos_frac": 0.703125, "sample": [207.6559295654297, 73.87043762207031, 44.07413864135742, 43.17179870605469, 120.20130157470703, -15.734634399414062, 221.08917236328125, -106.7759017944336, 86.53588104248047, 17.475854873657227, -39.00509262084961, 22.064733505249023, 13.67706298828125, 84.52071380615234, 56.00592041015625, -42.413917541503906, -233.6527099609375, 164.93759155273438, 50.94941711425781, 7.817678451538086, 280.32769775390625, -127.15821075439453, -25.121932983398438, -9.344432830810547, 77.8043212890625, -16.49081802368164, 72.75645446777344, 150.2292022705078, 153.56866455078125, -81.26924133300781, 396.5112609863281, -12.720527648925781, 75.89002990722656, 32.378929138183594, -47.8004150390625, 145.0404815673828, 55.03523254394531, 155.06185913085938, 431.0225830078125, -201.3225555419922, 322.20892333984375, 167.05247497558594, 164.17697143554688, 75.03558349609375, 92.22671508789062, -10.705469131469727, 43.22821044921875, 223.2478790283203, 327.7889099121094, -9.94171142578125, 85.0364990234375, 160.10723876953125, -121.18893432617188, 68.44502258300781, 473.2161865234375, 367.00225830078125, -37.81804656982422, 142.80892944335938, 23.990694046020508, 355.6167907714844, -125.3470458984375, 86.22834014892578, 108.43948364257812, -47.474403381347656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000204.npy"} +{"epoch": 0.29955947136563876, "step": 205, "batch_size": 64, "mean": 107.37113952636719, "std": 141.7747802734375, "min": -262.6477966308594, "p10": -74.80586776733398, "median": 110.17658996582031, "p90": 273.14307250976566, "max": 396.8501892089844, "pos_frac": 0.765625, "sample": [35.83800506591797, 138.63632202148438, 222.6278076171875, 365.7477111816406, 116.88506317138672, -1.690786361694336, -7.287073135375977, 24.103363037109375, 8.894096374511719, 234.69375610351562, 256.84490966796875, -94.3277587890625, -98.09886169433594, 94.22338104248047, -16.957618713378906, 73.43264770507812, 16.63750457763672, 371.0246276855469, 227.2021942138672, 27.553104400634766, 103.4681167602539, 97.6956787109375, -144.56356811523438, 172.65110778808594, 304.97467041015625, 242.68508911132812, 195.88375854492188, -4.582798004150391, -75.6966323852539, 212.98483276367188, -48.619754791259766, 387.8525085449219, 127.62445068359375, 84.06195831298828, 41.61664581298828, 123.73239135742188, 396.8501892089844, 267.52764892578125, 117.80912780761719, 133.1713104248047, -211.20840454101562, 144.69390869140625, 117.94393157958984, 239.4132843017578, 151.93045043945312, -72.7274169921875, 146.56942749023438, 90.12027740478516, 275.5496826171875, 78.4461898803711, 240.1511688232422, 191.73121643066406, 170.07203674316406, 68.77952575683594, 146.4585723876953, 366.2047424316406, 102.2500991821289, -11.571968078613281, 16.221450805664062, -262.6477966308594, -76.58341979980469, 215.41055297851562, -31.1246337890625, 42.56117248535156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000205.npy"} +{"epoch": 0.3010279001468429, "step": 206, "batch_size": 64, "mean": 117.07075500488281, "std": 128.15878295898438, "min": -244.322265625, "p10": -37.4110345840454, "median": 104.99200820922852, "p90": 290.76638793945324, "max": 403.7200927734375, "pos_frac": 0.84375, "sample": [47.6864013671875, 192.3782958984375, 97.04057312011719, 123.73007202148438, 403.7200927734375, 301.888671875, 84.28556823730469, 50.707801818847656, 220.9821014404297, 248.5625457763672, 57.15812683105469, 131.25758361816406, 9.407964706420898, 104.88671112060547, -244.322265625, 88.61737060546875, 78.59809875488281, 166.39768981933594, 60.703575134277344, -29.135671615600586, 217.59390258789062, -40.957618713378906, 227.09024047851562, 356.3891906738281, 115.12245178222656, 193.28814697265625, 24.991653442382812, 204.2659454345703, -100.20640563964844, -110.4993667602539, 195.7232208251953, 305.3233642578125, 212.46502685546875, 223.05076599121094, 13.418581008911133, 244.44235229492188, 80.07279205322266, -74.34521484375, 155.41085815429688, -141.42027282714844, 48.058067321777344, 216.30410766601562, 12.97439956665039, 163.1947021484375, 80.41719818115234, -11.6385498046875, 116.29451751708984, 87.85832977294922, 196.5759735107422, 92.01690673828125, -64.00833892822266, 6.060935974121094, 102.45491790771484, 189.056884765625, 363.8929443359375, 264.81439208984375, -3.624208450317383, 121.25949096679688, 105.09730529785156, 307.510009765625, 64.47785186767578, 25.460466384887695, 315.31341552734375, 196.93527221679688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000206.npy"} +{"epoch": 0.302496328928047, "step": 207, "batch_size": 64, "mean": 136.12110900878906, "std": 158.26390075683594, "min": -133.196533203125, "p10": -30.548121643066402, "median": 89.42457580566406, "p90": 314.4951293945313, "max": 596.5138549804688, "pos_frac": 0.84375, "sample": [25.076824188232422, -133.196533203125, 64.12458038330078, 98.76174926757812, 179.48233032226562, 46.5146484375, 319.71612548828125, -32.499046325683594, -13.552093505859375, 14.568344116210938, 82.04205322265625, 83.94928741455078, 302.31280517578125, -9.575729370117188, 129.5380859375, 170.22425842285156, 49.24919891357422, 90.49520874023438, 35.970706939697266, 34.46246337890625, 52.95255661010742, 175.903564453125, 31.87274932861328, 255.8198699951172, 28.279617309570312, -70.08810424804688, 300.7051086425781, 61.75733947753906, 34.661705017089844, -51.40916061401367, 281.0083923339844, 260.752197265625, -61.91729736328125, 0.27335357666015625, 28.43608856201172, 273.0008850097656, 35.01214599609375, 237.8317413330078, 155.41189575195312, 251.82777404785156, 265.8712158203125, 125.22665405273438, 105.7276611328125, 136.31610107421875, 267.4071044921875, 246.19784545898438, 80.36451721191406, 388.5614013671875, 518.6900634765625, 287.042236328125, 48.20018005371094, 448.28045654296875, -43.15746307373047, 43.428810119628906, 596.5138549804688, 42.013763427734375, 96.06954956054688, 97.13888549804688, 88.35394287109375, 534.4915771484375, -92.42047882080078, 191.20849609375, 446.4629821777344, -25.99596405029297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000207.npy"} +{"epoch": 0.3039647577092511, "step": 208, "batch_size": 64, "mean": 105.91764831542969, "std": 156.5018310546875, "min": -351.854736328125, "p10": -41.703514862060544, "median": 81.09241485595703, "p90": 310.95688476562503, "max": 444.2874755859375, "pos_frac": 0.78125, "sample": [-206.7952117919922, 144.9790802001953, -148.91407775878906, 291.79144287109375, 20.07177734375, -20.53079605102539, 371.997314453125, 33.55540466308594, 108.35921478271484, 249.55999755859375, 251.37025451660156, 61.544105529785156, 80.80319213867188, 174.24827575683594, 213.17123413085938, 57.95378875732422, 444.2874755859375, 15.993621826171875, 422.61676025390625, 126.90716552734375, -3.984344482421875, 27.89954376220703, 246.42739868164062, 170.551025390625, 81.38163757324219, 26.92113494873047, 52.24371337890625, 5.012908935546875, -129.2196502685547, 281.3805236816406, -135.4814910888672, -10.502490997314453, 49.440128326416016, 110.72417449951172, 138.32962036132812, 302.040771484375, 282.22705078125, 39.5037956237793, -25.732017517089844, 237.70700073242188, -42.596527099609375, 68.23512268066406, 25.43865203857422, 341.718994140625, 131.67230224609375, 314.778076171875, 102.99479675292969, -351.854736328125, -39.61981964111328, 153.30764770507812, 14.636810302734375, 125.92549133300781, 342.06634521484375, 103.99932861328125, 59.02601623535156, -135.29672241210938, 18.970474243164062, 383.9123840332031, 181.88436889648438, 290.3797607421875, 197.86257934570312, 78.77214050292969, -3.8595733642578125, -23.465179443359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000208.npy"} +{"epoch": 0.3054331864904552, "step": 209, "batch_size": 64, "mean": 93.52122497558594, "std": 151.7918243408203, "min": -326.0837707519531, "p10": -79.75263519287108, "median": 88.13810348510742, "p90": 313.1155426025391, "max": 440.80902099609375, "pos_frac": 0.71875, "sample": [339.05224609375, 78.77662658691406, 175.3345947265625, -45.43822479248047, 365.93414306640625, 408.91009521484375, 318.0581359863281, 116.74105834960938, 125.54444122314453, 99.81448364257812, -61.3139762878418, 95.3148422241211, 162.50341796875, 95.91057586669922, 162.0978546142578, 130.73648071289062, 162.68624877929688, -167.8165740966797, -81.8470458984375, 59.264564514160156, 250.65859985351562, 226.58935546875, -4.158517837524414, -94.609130859375, 44.818603515625, -46.12831115722656, -23.578027725219727, 9.186485290527344, 95.13751220703125, 156.23898315429688, 115.93634033203125, 81.1386947631836, -128.50408935546875, -20.780567169189453, 64.01457977294922, 71.20123291015625, -74.86567687988281, -37.747859954833984, 367.7143249511719, 301.58282470703125, 51.402099609375, 440.80902099609375, 9.841255187988281, -21.747825622558594, -27.600860595703125, 34.71430206298828, 41.52264404296875, 74.9681396484375, 247.0990447998047, 131.74574279785156, 135.21751403808594, -36.98644256591797, -137.93951416015625, -326.0837707519531, 179.48257446289062, 96.11257934570312, 36.43581008911133, 50.909446716308594, 136.53903198242188, -149.0877685546875, 344.26776123046875, 296.1293640136719, 211.76919555664062, 271.7290344238281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000209.npy"} +{"epoch": 0.3069016152716593, "step": 210, "batch_size": 64, "mean": 107.38471984863281, "std": 147.74757385253906, "min": -244.6871337890625, "p10": -32.44748001098632, "median": 81.94678497314453, "p90": 310.7277893066407, "max": 503.7625732421875, "pos_frac": 0.75, "sample": [-7.098907470703125, 0.7410373687744141, 131.4471435546875, 68.07757568359375, -38.79005432128906, -85.98199462890625, 177.909423828125, -13.220983505249023, 134.82496643066406, -35.272117614746094, 194.49557495117188, 20.18988800048828, 34.37062454223633, 123.8178482055664, 272.72552490234375, 143.36563110351562, -6.192535400390625, 71.90609741210938, 204.9383544921875, 18.306827545166016, 282.31646728515625, 94.78675079345703, 30.245256423950195, 232.3291015625, 255.76193237304688, 4.625551223754883, 13.135833740234375, 317.31048583984375, 187.73056030273438, 370.66363525390625, 125.5974349975586, 246.1757049560547, 121.1238784790039, 295.3681640625, -119.82049560546875, -25.856658935546875, -89.81619262695312, -18.819320678710938, 31.32737922668457, 6.228572845458984, 139.50634765625, 166.78482055664062, 351.92083740234375, 100.88418579101562, -4.088598251342773, -14.207550048828125, 433.1382751464844, -13.109024047851562, 124.26219177246094, 156.62930297851562, 278.97454833984375, 503.7625732421875, 293.46246337890625, 48.221702575683594, -58.339866638183594, 91.98747253417969, -22.503990173339844, 4.935649871826172, 10.820619583129883, 348.9691162109375, 3.7842330932617188, 17.551963806152344, -244.6871337890625, 382.9874267578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000210.npy"} +{"epoch": 0.30837004405286345, "step": 211, "batch_size": 64, "mean": 92.20182800292969, "std": 163.1658477783203, "min": -214.251708984375, "p10": -61.749727630615226, "median": 59.897769927978516, "p90": 266.8033569335938, "max": 566.5140380859375, "pos_frac": 0.71875, "sample": [128.15647888183594, -52.693450927734375, 236.29156494140625, 103.62032318115234, 55.76565170288086, 161.11680603027344, -46.45977783203125, 112.88797760009766, 527.7872314453125, -214.251708984375, -41.80535125732422, 57.33330535888672, 26.573848724365234, 15.50250244140625, 65.08087158203125, 230.2621307373047, 147.0859375, 207.760498046875, 164.17422485351562, -10.607406616210938, -0.33484649658203125, -36.196807861328125, 203.18576049804688, 109.5970687866211, -65.63098907470703, 7.7254791259765625, -44.889434814453125, 522.816650390625, 41.68794631958008, 566.5140380859375, 459.88031005859375, -50.94347381591797, -134.6807861328125, 142.27586364746094, 222.54373168945312, 316.253173828125, -152.93292236328125, 143.24374389648438, 51.27374267578125, -7.326873779296875, 118.97679901123047, 397.42041015625, -27.10987091064453, 74.06084442138672, 9.806425094604492, 48.98905944824219, 71.43679809570312, -94.32582092285156, 15.82187271118164, 216.57485961914062, -157.6688690185547, 62.46223449707031, 195.18446350097656, 42.599830627441406, 151.1214599609375, 273.326416015625, 87.6716537475586, 251.5828857421875, 20.330413818359375, 128.00270080566406, 35.098392486572266, 15.847442626953125, -15.938949584960938, -187.9974365234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000211.npy"} +{"epoch": 0.30983847283406757, "step": 212, "batch_size": 64, "mean": 131.85598754882812, "std": 170.150634765625, "min": -241.6673126220703, "p10": -68.3579620361328, "median": 139.86361694335938, "p90": 314.61345825195315, "max": 593.81640625, "pos_frac": 0.75, "sample": [-241.6673126220703, -92.35271453857422, 314.7279357910156, 261.0855407714844, 57.11035919189453, 242.77487182617188, 110.12103271484375, -73.73216247558594, -30.853620529174805, -1.6504135131835938, 246.13795471191406, 173.60357666015625, -196.1531524658203, -40.246551513671875, -27.503742218017578, 8.689964294433594, 315.16253662109375, 210.218505859375, 424.85687255859375, 234.6907196044922, 105.35581970214844, -109.86200714111328, 29.553081512451172, 100.38743591308594, 149.45054626464844, 171.77682495117188, 225.93234252929688, -80.41940307617188, 298.0386657714844, -9.697893142700195, 129.63272094726562, 178.0751190185547, 593.81640625, 176.5470733642578, 54.65612030029297, -216.20066833496094, 303.2891845703125, -55.81816101074219, 139.07073974609375, 574.6016235351562, 289.2341613769531, 112.09963989257812, 49.49969482421875, 232.77239990234375, 14.850059509277344, 140.656494140625, -32.10812759399414, 54.466121673583984, 314.3463439941406, 47.73955154418945, 141.73324584960938, 365.2469482421875, 397.0732421875, 262.94537353515625, 307.5562744140625, -6.470355987548828, 235.64707946777344, 62.876094818115234, 151.52023315429688, 143.09146118164062, 68.06697845458984, 299.2960205078125, -38.181243896484375, 171.6195068359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000212.npy"} +{"epoch": 0.31130690161527164, "step": 213, "batch_size": 64, "mean": 125.99160766601562, "std": 153.23873901367188, "min": -200.09783935546875, "p10": -63.980883789062496, "median": 110.3159408569336, "p90": 338.30189208984376, "max": 446.0223693847656, "pos_frac": 0.796875, "sample": [287.1466064453125, -86.83132934570312, 41.53002166748047, -108.2071304321289, 226.6097869873047, 14.892684936523438, 51.178260803222656, 98.11820983886719, -200.09783935546875, -80.84298706054688, 10.24917221069336, 310.34033203125, 356.3276062011719, -31.21373176574707, 98.37950897216797, 177.33316040039062, 9.35325813293457, 62.17814636230469, 157.96791076660156, 324.43792724609375, 419.875244140625, 317.34149169921875, 50.19617462158203, 420.3418273925781, 171.57371520996094, 274.43951416015625, 365.8465881347656, -16.440296173095703, -3.7909469604492188, -12.162738800048828, 186.41497802734375, 124.28882598876953, 197.2703857421875, 122.25237274169922, -59.332916259765625, 18.5120849609375, 173.54568481445312, 340.0644836425781, 282.8717041015625, 151.93768310546875, -8.603893280029297, 195.4077911376953, 334.1891784667969, -65.97286987304688, 200.12025451660156, 60.586181640625, 166.85865783691406, 446.0223693847656, -136.96417236328125, 191.20074462890625, 249.56332397460938, 298.92388916015625, 370.260009765625, 52.0104866027832, 8.735977172851562, 3.148303985595703, -120.63542938232422, 80.6380615234375, 96.34577178955078, 67.00055694580078, 13.857536315917969, 13.304855346679688, 165.64219665527344, 137.92752075195312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000213.npy"} +{"epoch": 0.31277533039647576, "step": 214, "batch_size": 64, "mean": 113.03147888183594, "std": 147.8744659423828, "min": -175.60574340820312, "p10": -54.35195693969726, "median": 84.23110580444336, "p90": 296.1325164794922, "max": 525.519775390625, "pos_frac": 0.796875, "sample": [62.05211639404297, 413.0527038574219, 15.042583465576172, 191.35777282714844, 4.095766067504883, -9.187765121459961, -84.01930236816406, -175.60574340820312, 268.6060791015625, -81.06674194335938, 170.82217407226562, 40.81611633300781, 109.3017807006836, -0.6740150451660156, 33.35807800292969, 107.8150634765625, 218.6571044921875, -18.915361404418945, 46.67387390136719, 184.86134338378906, 71.59615325927734, 61.335838317871094, 105.6054916381836, 267.5733642578125, 246.372314453125, 73.39183807373047, 11.880622863769531, 0.10316658020019531, 124.62579345703125, -46.75257110595703, 95.07037353515625, 65.04422760009766, 15.982200622558594, 340.68603515625, -82.67926788330078, 525.519775390625, 351.326416015625, 5.4781341552734375, 27.038543701171875, 401.6438293457031, 214.24513244628906, 204.51217651367188, 266.51092529296875, 298.623291015625, -142.76046752929688, 496.56610107421875, 22.995725631713867, 209.58004760742188, 216.8170166015625, 100.44823455810547, -49.19316864013672, 164.26881408691406, -45.960205078125, 117.65415954589844, -58.0999755859375, 240.16488647460938, 68.1973876953125, 290.3207092285156, 65.6295166015625, 145.8204345703125, 122.85043334960938, 129.79600524902344, -56.5628662109375, 53.7040901184082], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000214.npy"} +{"epoch": 0.3142437591776799, "step": 215, "batch_size": 64, "mean": 118.9841079711914, "std": 136.97496032714844, "min": -148.7521514892578, "p10": -62.17883911132812, "median": 104.1790657043457, "p90": 284.74669189453124, "max": 518.4662475585938, "pos_frac": 0.828125, "sample": [65.93231201171875, 337.9312438964844, 126.52665710449219, 271.9446105957031, -53.30224609375, -94.1912841796875, 51.433860778808594, 122.72553253173828, 140.78807067871094, 101.8367919921875, 126.32444763183594, 46.154876708984375, 5.40629768371582, -33.625343322753906, 413.67034912109375, 257.3918151855469, 244.95233154296875, 283.27508544921875, 518.4662475585938, 162.86746215820312, 101.35153198242188, 392.70599365234375, 102.4393081665039, -78.45564270019531, 73.05867004394531, 198.08148193359375, -70.35646057128906, 69.84879302978516, 197.28546142578125, 259.9056396484375, 32.18054962158203, 67.9691162109375, 58.181549072265625, -9.958150863647461, 158.61978149414062, 90.53658294677734, -122.2544937133789, 147.1148223876953, 194.88735961914062, 264.64697265625, 349.99371337890625, 109.73209381103516, 196.4800262451172, -5.104034423828125, 140.57830810546875, 50.51325988769531, 61.77080535888672, -65.98309326171875, 50.26120376586914, 203.81578063964844, -148.7521514892578, 157.04830932617188, 384.14434814453125, 33.41514205932617, 63.06596374511719, 105.9188232421875, 285.37738037109375, 6.6734466552734375, 207.16989135742188, 36.585655212402344, 107.77599334716797, 38.781890869140625, -89.08529663085938, 110.50728607177734], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000215.npy"} +{"epoch": 0.315712187958884, "step": 216, "batch_size": 64, "mean": 116.20650482177734, "std": 168.2432098388672, "min": -297.67608642578125, "p10": -71.09631118774413, "median": 101.3721694946289, "p90": 364.2438110351563, "max": 566.099853515625, "pos_frac": 0.734375, "sample": [181.88998413085938, 397.01385498046875, -70.92273712158203, 144.56195068359375, 200.23626708984375, 40.94036865234375, -181.7615966796875, 156.79122924804688, 214.7274932861328, 6.236030578613281, 566.099853515625, 97.02784729003906, 105.71649169921875, -71.17070007324219, -14.60062026977539, -21.12237548828125, -89.8766098022461, 205.09120178222656, 36.93061447143555, -174.29495239257812, -6.1335906982421875, 302.1771240234375, -14.377372741699219, -8.837615966796875, 111.32168579101562, 151.2089385986328, 48.066619873046875, 183.51144409179688, 109.80255126953125, -2.726045608520508, 111.5474853515625, 208.02725219726562, 23.68341827392578, -55.00988006591797, 373.885986328125, 369.3604736328125, -72.39292907714844, 296.93963623046875, 42.46311950683594, 85.50865936279297, 64.99840545654297, 289.2521667480469, 4.198785781860352, -297.67608642578125, 56.540382385253906, 194.90283203125, 227.4705810546875, 400.39105224609375, 46.9688720703125, -84.07233428955078, 274.1953430175781, 42.22288513183594, 114.61946868896484, 175.10205078125, -59.847084045410156, 338.92578125, 207.38491821289062, 334.9000244140625, 402.085693359375, 383.91790771484375, 20.207229614257812, -65.53533172607422, 26.217330932617188, 352.304931640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000216.npy"} +{"epoch": 0.31718061674008813, "step": 217, "batch_size": 64, "mean": 137.11187744140625, "std": 155.8596954345703, "min": -177.6549835205078, "p10": -17.782174682617185, "median": 94.10555267333984, "p90": 371.1603149414063, "max": 528.1328125, "pos_frac": 0.84375, "sample": [137.6058349609375, 135.75563049316406, 94.78417205810547, 127.87317657470703, 63.01770782470703, 52.534095764160156, 66.383056640625, 237.18299865722656, 80.3468246459961, 92.03582763671875, 46.055450439453125, 375.046142578125, 144.56417846679688, 362.0933837890625, -116.3198013305664, -0.8191299438476562, 34.33708190917969, 8.988311767578125, 195.99639892578125, 528.1328125, -7.723812103271484, 63.19690704345703, 263.1900939941406, 434.3919372558594, 93.42693328857422, 210.12057495117188, -33.096168518066406, 67.45468139648438, -177.6549835205078, 104.3385009765625, 494.2815856933594, 194.4508056640625, 7.377042770385742, 68.46739196777344, 268.163330078125, 4.011810302734375, 227.60296630859375, 279.8141784667969, -17.339942932128906, 482.30291748046875, 428.11236572265625, -80.94893646240234, 361.1319580078125, 277.14434814453125, 138.502685546875, 462.1713562011719, 8.235885620117188, 95.9039306640625, 72.80756378173828, 65.25786590576172, 63.68119812011719, 155.61679077148438, 260.2296447753906, 3.942476272583008, 161.96990966796875, 76.16972351074219, -44.321372985839844, -17.971702575683594, 27.0955810546875, -32.65956115722656, 127.95765686035156, 210.5738983154297, 243.46266174316406, 18.723108291625977], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000217.npy"} +{"epoch": 0.3186490455212922, "step": 218, "batch_size": 64, "mean": 106.32524108886719, "std": 161.58865356445312, "min": -256.5267333984375, "p10": -76.16501464843749, "median": 83.36017990112305, "p90": 326.4840728759766, "max": 571.85791015625, "pos_frac": 0.734375, "sample": [93.24697875976562, 48.659400939941406, 173.6424560546875, 246.32786560058594, 144.80133056640625, 26.191577911376953, 330.3853454589844, 72.02809143066406, 6.801176071166992, 65.0412826538086, 155.5831298828125, 51.223602294921875, -18.44422149658203, 129.9713897705078, 446.72412109375, 169.3790740966797, 115.5692138671875, -29.161041259765625, 138.83792114257812, 193.44622802734375, 40.01202392578125, 63.237464904785156, -0.3558235168457031, 226.94723510742188, -59.71513366699219, -256.5267333984375, 317.381103515625, 96.6849365234375, 189.58084106445312, 201.5040283203125, 38.296630859375, 421.41204833984375, 423.41400146484375, 253.98529052734375, 199.8343505859375, 69.97196960449219, -24.06337547302246, -25.523569107055664, 88.70513153076172, -22.402103424072266, -38.257301330566406, -93.97601318359375, 144.30923461914062, -99.94415283203125, -74.31774139404297, -50.25971221923828, 571.85791015625, 333.71234130859375, 204.44313049316406, -76.95670318603516, -146.30801391601562, 234.31874084472656, 443.6822509765625, 11.13200569152832, 73.1258544921875, 78.01522827148438, 152.8314971923828, 108.18856048583984, 35.659889221191406, -153.31829833984375, 77.3488540649414, -153.8125762939453, 312.5782470703125, 108.12728881835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000218.npy"} +{"epoch": 0.3201174743024963, "step": 219, "batch_size": 64, "mean": 91.84480285644531, "std": 173.06005859375, "min": -410.1614685058594, "p10": -110.70987777709959, "median": 68.53975296020508, "p90": 346.35815124511726, "max": 480.4259033203125, "pos_frac": 0.75, "sample": [457.42327880859375, 169.88641357421875, 11.668142318725586, 326.855712890625, 3.515512466430664, 427.80670166015625, -410.1614685058594, 19.660789489746094, 37.8853759765625, 307.7984924316406, -11.175956726074219, 97.9596176147461, 207.453369140625, 448.88018798828125, 480.4259033203125, 169.4783935546875, 64.96900177001953, 258.46844482421875, 105.36776733398438, 109.14691162109375, 258.5706787109375, 38.12898254394531, -120.50682830810547, 4.614044189453125, -45.496849060058594, -56.73778533935547, 354.7163391113281, -4.531379699707031, 360.281982421875, 374.78192138671875, 41.61742401123047, 279.0205078125, -2.0500030517578125, 45.89070129394531, 16.487319946289062, 186.42794799804688, 109.66149139404297, 17.07184600830078, 104.19744873046875, 18.20819091796875, 297.4757385253906, 132.83534240722656, 97.8828125, -175.23641967773438, 59.14976501464844, 159.9532928466797, -176.83935546875, 6.90278434753418, 9.471607208251953, 233.4733428955078, 27.84124755859375, -126.8418960571289, -5.618370056152344, -59.21165084838867, -240.8579559326172, 79.5975570678711, 149.02886962890625, 87.47311401367188, -87.85032653808594, -30.049209594726562, 140.2393798828125, 72.11050415039062, 84.83969116210938, -121.36864471435547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000219.npy"} +{"epoch": 0.32158590308370044, "step": 220, "batch_size": 64, "mean": 77.24984741210938, "std": 126.11031341552734, "min": -254.4945068359375, "p10": -65.81688842773437, "median": 78.26408386230469, "p90": 199.88736114501955, "max": 422.3584289550781, "pos_frac": 0.75, "sample": [201.01553344726562, 49.611366271972656, 64.78952026367188, -29.75170135498047, 34.891014099121094, 327.65167236328125, 371.3300476074219, 4.873271942138672, 138.5681610107422, 146.1868133544922, 125.47855377197266, 265.91278076171875, 136.87115478515625, 142.986572265625, -225.9703369140625, 103.833984375, 171.22531127929688, 52.415618896484375, 135.38323974609375, 197.2549591064453, 73.88539123535156, 22.910518646240234, 4.1168060302734375, -254.4945068359375, -66.9185562133789, 193.7339324951172, -28.917795181274414, 51.81329345703125, 209.70144653320312, 65.46961975097656, -45.906585693359375, 160.58721923828125, 121.08195495605469, -18.510543823242188, 203.2058868408203, 116.9098129272461, 422.3584289550781, 163.0289764404297, -2.5494461059570312, 107.40166473388672, 17.62874984741211, 53.15781784057617, 4.367334365844727, 92.97431945800781, -96.06295013427734, 150.26126098632812, 61.864288330078125, 134.3290557861328, -139.25059509277344, 82.64277648925781, 72.25144958496094, -14.924949645996094, 143.23760986328125, -60.961151123046875, 49.398563385009766, 153.89671325683594, 180.8106689453125, -63.24633026123047, 176.65640258789062, 173.21484375, -18.115657806396484, 155.76443481445312, -143.87484741210938, -135.49432373046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000220.npy"} +{"epoch": 0.32305433186490456, "step": 221, "batch_size": 64, "mean": 169.42575073242188, "std": 160.10781860351562, "min": -175.195556640625, "p10": -21.388056182861316, "median": 171.666015625, "p90": 382.7446441650391, "max": 537.0731201171875, "pos_frac": 0.84375, "sample": [75.77404022216797, 112.07882690429688, 221.5959014892578, -7.110179901123047, 96.49252319335938, 537.0731201171875, 22.545692443847656, 391.0269775390625, 217.26290893554688, 274.05926513671875, 284.8736572265625, 23.930164337158203, 288.0059814453125, 248.46212768554688, 162.72183227539062, 276.0635986328125, -10.610137939453125, 457.1679382324219, 2.9527416229248047, 90.21085357666016, 164.373291015625, 294.4010009765625, 364.0237731933594, 475.5550537109375, 479.396728515625, 243.58047485351562, 230.9079132080078, -175.195556640625, 56.75657653808594, 68.8290023803711, 168.71853637695312, 194.40232849121094, -29.882553100585938, 50.43260955810547, 65.46151733398438, 174.61349487304688, 107.00293731689453, -127.37727355957031, 260.44293212890625, 386.105224609375, -26.007164001464844, 283.240966796875, 242.16220092773438, 325.59478759765625, 121.4217529296875, 228.38226318359375, 34.034149169921875, 374.9032897949219, 309.9325866699219, 228.93722534179688, -35.581722259521484, -106.94083404541016, -4.719856262207031, 245.2525634765625, 52.41481018066406, -67.814697265625, 18.578210830688477, 128.54095458984375, 45.54802322387695, 193.26785278320312, 300.44036865234375, 80.27931213378906, 464.999755859375, 189.2547149658203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000221.npy"} +{"epoch": 0.3245227606461087, "step": 222, "batch_size": 64, "mean": 126.48561096191406, "std": 159.88316345214844, "min": -224.42062377929688, "p10": -43.92554397583008, "median": 115.12967300415039, "p90": 356.7533508300782, "max": 539.1654663085938, "pos_frac": 0.796875, "sample": [-44.03253936767578, 442.8094787597656, 0.9259185791015625, -106.64602661132812, -13.794212341308594, -92.26589965820312, -15.108612060546875, 118.1082763671875, 4.535919189453125, 221.8070068359375, 107.51183319091797, 120.76795196533203, 418.93475341796875, 96.03800964355469, 122.87006378173828, 236.12088012695312, -224.42062377929688, 151.147705078125, 258.56890869140625, 176.6490936279297, 152.20187377929688, 104.39491271972656, -205.83404541015625, 324.6680603027344, -121.99872589111328, 278.01837158203125, 485.82293701171875, 94.260498046875, 31.351173400878906, 16.556861877441406, 153.64340209960938, 21.808547973632812, 160.21676635742188, -43.67588806152344, 197.7211151123047, 198.10154724121094, 44.551361083984375, 156.59092712402344, -23.646686553955078, 201.4569091796875, -27.93488311767578, 91.27702331542969, 84.5677261352539, 76.40480041503906, 331.23150634765625, 240.13641357421875, 113.51555633544922, 234.91201782226562, 149.62301635742188, 47.91542053222656, -36.10377502441406, 173.7422332763672, 367.6912841796875, 65.53402709960938, 403.0927429199219, 215.11849975585938, 116.74378967285156, -108.54827880859375, 220.27682495117188, 20.17070770263672, 539.1654663085938, 55.982177734375, 407.64599609375, 106.18144226074219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000222.npy"} +{"epoch": 0.32599118942731276, "step": 223, "batch_size": 64, "mean": 108.11112213134766, "std": 176.57351684570312, "min": -321.70465087890625, "p10": -107.57542266845702, "median": 109.78140640258789, "p90": 307.8881256103516, "max": 630.3362426757812, "pos_frac": 0.75, "sample": [66.46580505371094, -69.88190460205078, 206.37442016601562, 140.79107666015625, 141.03933715820312, 360.9715270996094, 193.42718505859375, 97.4629135131836, 204.0455322265625, 156.00531005859375, -148.3977813720703, 52.962127685546875, 90.61491394042969, 233.75411987304688, 300.67828369140625, 290.4259948730469, 138.64505004882812, 86.42503356933594, 533.1952514648438, -12.12430191040039, 146.88397216796875, -57.24413299560547, 86.04154968261719, -56.99100875854492, -55.993255615234375, 294.9594421386719, -140.6066436767578, 370.1899719238281, 104.39875030517578, -108.4780044555664, 1.1115264892578125, 20.970291137695312, 264.5784912109375, -321.70465087890625, 115.1640625, 148.54562377929688, -197.80418395996094, 194.84567260742188, 70.06427001953125, 630.3362426757812, 242.1010284423828, -105.46939849853516, 170.45606994628906, 259.4590759277344, -163.2891845703125, -57.04703140258789, 285.0249938964844, 327.37725830078125, 154.02772521972656, -17.982284545898438, 52.383697509765625, 42.07600402832031, 61.93218994140625, 423.89166259765625, 27.989707946777344, 24.442665100097656, -10.559179306030273, 10.895879745483398, 145.67852783203125, 137.61837768554688, 117.40570831298828, 163.91729736328125, -256.3447570800781, 310.9780578613281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000223.npy"} +{"epoch": 0.3274596182085169, "step": 224, "batch_size": 64, "mean": 123.00054168701172, "std": 184.1900177001953, "min": -331.42901611328125, "p10": -73.65997009277343, "median": 67.24323654174805, "p90": 364.91593933105474, "max": 662.2747192382812, "pos_frac": 0.8125, "sample": [246.63230895996094, 3.62603759765625, 127.01066589355469, 32.290184020996094, -75.87986755371094, 543.3571166992188, 237.7719268798828, 40.46234893798828, 69.25450897216797, -331.42901611328125, 46.19794464111328, 189.40953063964844, 139.3958740234375, 158.32073974609375, 437.92022705078125, 89.78790283203125, 57.438743591308594, 224.4812774658203, 191.25738525390625, 662.2747192382812, 287.1253662109375, 110.03985595703125, 351.2599792480469, 248.98785400390625, -136.6374969482422, -83.5498046875, 264.6682434082031, 62.65128707885742, 228.91943359375, -17.483596801757812, 6.3634796142578125, 34.41900634765625, 31.33085060119629, 640.787841796875, 281.8861389160156, 243.97735595703125, 118.75446319580078, -76.1294937133789, 19.505903244018555, 26.724102020263672, 248.5235595703125, 13.14373779296875, 47.26435852050781, -21.866798400878906, 107.45405578613281, -76.57671356201172, -68.48020935058594, 3.3914337158203125, -107.37217712402344, 9.84035873413086, 174.54522705078125, 133.3582763671875, 370.76849365234375, 10.638643264770508, 7.840370178222656, 65.23196411132812, 5.4806060791015625, 310.7666320800781, 37.468162536621094, -64.32183837890625, 87.69429016113281, 435.40643310546875, 427.9051818847656, -19.250797271728516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000224.npy"} +{"epoch": 0.328928046989721, "step": 225, "batch_size": 64, "mean": 144.70822143554688, "std": 182.31053161621094, "min": -242.634033203125, "p10": -90.03026199340819, "median": 146.2316131591797, "p90": 400.06481018066415, "max": 709.7904052734375, "pos_frac": 0.71875, "sample": [233.36151123046875, 163.72689819335938, 417.8214111328125, 354.59100341796875, 33.87830352783203, 110.5674819946289, 38.496002197265625, -98.44085693359375, -144.5419921875, 437.47314453125, 364.13958740234375, 282.30694580078125, -144.0321502685547, -70.4055404663086, 280.82208251953125, -36.800594329833984, -109.8977279663086, 17.47047996520996, -19.31142807006836, 60.69007873535156, 111.46615600585938, 120.03738403320312, 226.98297119140625, 175.72048950195312, -9.98591423034668, 148.25131225585938, 21.973983764648438, 169.69082641601562, -10.35384750366211, 247.18408203125, 312.13140869140625, 170.2938690185547, 224.74073791503906, 102.91243743896484, -33.70277786254883, 462.16754150390625, 444.75970458984375, -6.965522766113281, 258.2379455566406, 408.3313903808594, 99.07003784179688, 337.61669921875, 187.720947265625, -42.31603240966797, 161.81199645996094, 144.2119140625, 709.7904052734375, 284.7452087402344, 170.98501586914062, 77.19609069824219, 411.5115966796875, -2.408660888671875, 220.75106811523438, -103.13470458984375, -101.42073059082031, 380.776123046875, 295.99652099609375, 177.50497436523438, 177.96734619140625, -242.634033203125, 120.1275405883789, 141.46279907226562, -54.21361541748047, -7.580780029296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000225.npy"} +{"epoch": 0.3303964757709251, "step": 226, "batch_size": 64, "mean": 115.01837921142578, "std": 170.88479614257812, "min": -220.91390991210938, "p10": -39.51744155883788, "median": 77.23161315917969, "p90": 331.7192749023438, "max": 592.187744140625, "pos_frac": 0.75, "sample": [592.187744140625, -154.896484375, -44.515621185302734, 18.8863525390625, -12.015701293945312, 167.3683624267578, -9.431304931640625, 58.942996978759766, 176.036865234375, -27.855022430419922, 122.9085922241211, 13.689849853515625, -18.143508911132812, 32.393768310546875, -182.8189697265625, 93.06929016113281, -104.84654235839844, 317.697998046875, 310.4482421875, 38.870872497558594, 337.7283935546875, 353.6121520996094, 74.91948699951172, 128.65902709960938, 189.6487274169922, 295.14971923828125, 24.097007751464844, -24.55877685546875, 131.464111328125, 431.8475646972656, 293.567138671875, -71.9444808959961, 182.42828369140625, 79.54373931884766, 46.15312194824219, 547.5573120117188, 236.17721557617188, 519.2514038085938, 298.0694274902344, -7.772407531738281, -4.387298583984375, 141.71109008789062, -220.91390991210938, 157.366943359375, 45.82228469848633, 112.78779602050781, -0.7314071655273438, 2.4044036865234375, 19.59417152404785, 84.95661163330078, -10.427886962890625, 259.25482177734375, 110.0560302734375, 200.78192138671875, 39.786582946777344, 121.15336608886719, -101.75079345703125, 6.340044021606445, 489.5035400390625, 60.58319854736328, 35.677162170410156, 141.5442352294922, 9.634124755859375, 206.85354614257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000226.npy"} +{"epoch": 0.33186490455212925, "step": 227, "batch_size": 64, "mean": 144.60397338867188, "std": 174.37486267089844, "min": -362.8921203613281, "p10": -19.355463790893552, "median": 114.8946533203125, "p90": 363.80503845214844, "max": 643.6366577148438, "pos_frac": 0.859375, "sample": [311.219970703125, 101.30561828613281, 145.37832641601562, 381.39166259765625, 229.49703979492188, 39.34007263183594, 54.99861145019531, 107.88182830810547, 309.99395751953125, 10.31866455078125, 104.5632553100586, 114.31500244140625, 178.23599243164062, 24.080312728881836, -19.888736724853516, 26.66461181640625, 166.53399658203125, 89.64134216308594, 198.2292938232422, 643.6366577148438, 171.8036346435547, 86.80206298828125, 325.0457458496094, 222.88111877441406, 41.610923767089844, 289.91375732421875, 169.18011474609375, 247.8548583984375, 111.2874755859375, 95.360107421875, 175.38446044921875, 56.39329528808594, 333.66485595703125, 5.740684509277344, -83.562255859375, -99.39384460449219, 191.892333984375, 218.76724243164062, 503.09210205078125, 115.47430419921875, 188.23980712890625, 477.8398742675781, -108.52171325683594, 582.03076171875, 5.151044845581055, 20.37006378173828, 189.96084594726562, 34.964210510253906, 221.81753540039062, 36.17540740966797, 131.6300506591797, 363.3486328125, 117.7650146484375, -18.111160278320312, 6.140459060668945, -51.427223205566406, 364.0006408691406, -362.8921203613281, -7.849615097045898, 85.75389099121094, -58.203819274902344, 22.179086685180664, 134.7720184326172, 482.9903259277344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000227.npy"} +{"epoch": 0.3333333333333333, "step": 228, "batch_size": 64, "mean": 102.26102447509766, "std": 165.03652954101562, "min": -272.5915832519531, "p10": -102.06206588745115, "median": 103.093017578125, "p90": 313.5241882324219, "max": 492.72515869140625, "pos_frac": 0.734375, "sample": [-179.33950805664062, -233.90972900390625, -272.5915832519531, 211.96841430664062, 137.78573608398438, 323.51483154296875, -190.69952392578125, 166.7933349609375, 197.52871704101562, -155.38380432128906, 25.9862060546875, 293.2867736816406, 492.72515869140625, 321.61260986328125, 450.62799072265625, 315.88714599609375, 355.1817626953125, 174.2430877685547, 117.30018615722656, -33.692657470703125, -40.90081787109375, 296.65728759765625, 108.63336944580078, 308.0106201171875, 178.05474853515625, 156.2818145751953, 124.77227783203125, 17.187602996826172, -82.1101303100586, 115.39561462402344, 33.31709289550781, 202.9947052001953, 5.450340270996094, 204.59942626953125, 221.45126342773438, 80.58912658691406, 119.26374053955078, 77.09909057617188, 35.32630920410156, 29.5390625, 89.24883270263672, 241.845947265625, -87.46054077148438, -13.883895874023438, 95.04508972167969, 97.55266571044922, 453.6795959472656, 19.317703247070312, 82.51713562011719, 56.616355895996094, 265.21697998046875, -0.023151397705078125, -24.86615753173828, 191.28848266601562, 234.76133728027344, 164.31301879882812, 203.16517639160156, 110.2424545288086, -108.31986236572266, -25.139739990234375, -19.456382751464844, 31.003158569335938, -170.99249267578125, -51.40376281738281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000228.npy"} +{"epoch": 0.33480176211453744, "step": 229, "batch_size": 64, "mean": 109.92990112304688, "std": 164.21502685546875, "min": -409.8575439453125, "p10": -57.41001205444335, "median": 92.88893127441406, "p90": 342.3139190673828, "max": 567.6272583007812, "pos_frac": 0.75, "sample": [-26.446182250976562, 14.00429916381836, -116.68603515625, 412.17132568359375, 53.80149459838867, -186.71917724609375, 104.61966705322266, 185.02774047851562, 73.9691162109375, 218.8052978515625, 255.24224853515625, 340.08251953125, 96.00398254394531, 351.3622131347656, 89.77388000488281, 97.0441665649414, 252.84158325195312, 106.42646026611328, 35.84097671508789, 351.0220031738281, -90.38148498535156, 63.56494140625, -4.232051849365234, 215.4320831298828, 136.58555603027344, 42.62596893310547, 567.6272583007812, -150.74017333984375, 50.94459533691406, 241.41558837890625, 175.8643798828125, 250.445556640625, 187.30084228515625, 363.4146728515625, 343.2702331542969, 45.396278381347656, 327.7879638671875, 37.42688751220703, 82.41668701171875, -61.928375244140625, 52.556861877441406, 119.47206115722656, 18.250083923339844, 208.73834228515625, -37.423866271972656, 165.1696014404297, 190.0419921875, 225.57154846191406, -33.16943359375, -128.7744140625, 126.4371566772461, 437.59722900390625, 29.89353370666504, 81.91700744628906, 80.61162567138672, 124.89607238769531, -11.764518737792969, 181.3802490234375, -14.998947143554688, 165.5751495361328, -409.8575439453125, -0.7602157592773438, -21.403793334960938, -46.867164611816406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000229.npy"} +{"epoch": 0.33627019089574156, "step": 230, "batch_size": 64, "mean": 165.46487426757812, "std": 161.83778381347656, "min": -272.88641357421875, "p10": -28.05664730072021, "median": 157.3803253173828, "p90": 404.85068969726564, "max": 495.750244140625, "pos_frac": 0.84375, "sample": [80.48379516601562, 367.0692138671875, -15.607694625854492, 297.7232666015625, 216.58444213867188, 78.55870056152344, -29.275606155395508, 252.7642364501953, 98.11361694335938, 129.92572021484375, 213.3223419189453, 177.93563842773438, 21.49773406982422, 246.48858642578125, 9.977127075195312, 112.35055541992188, -52.16053009033203, 80.51699829101562, 98.52059936523438, 127.49461364746094, 26.9697265625, -97.3505859375, -25.21240997314453, 205.89437866210938, 139.34091186523438, 170.61587524414062, 224.00367736816406, 353.7427978515625, 327.6338806152344, 157.49566650390625, 79.60831451416016, 15.296195983886719, 335.716796875, 453.1526794433594, 170.8915557861328, -105.7057113647461, 156.636474609375, 174.9552001953125, 237.1592559814453, 450.89532470703125, -34.8765983581543, -10.98590087890625, -272.88641357421875, -56.160865783691406, 45.20896911621094, 49.84431457519531, 408.0469970703125, 134.408935546875, 376.6272888183594, 454.3701171875, 168.15621948242188, 107.05096435546875, 115.93324279785156, 23.9307804107666, 414.2451171875, 315.7606201171875, 470.4193420410156, 215.6398468017578, 397.39263916015625, 207.82102966308594, 217.73252868652344, 157.26498413085938, 225.03367614746094, 495.750244140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000230.npy"} +{"epoch": 0.3377386196769457, "step": 231, "batch_size": 64, "mean": 115.23086547851562, "std": 154.40179443359375, "min": -136.15185546875, "p10": -42.93104019165039, "median": 90.8141975402832, "p90": 342.79926452636727, "max": 652.5733642578125, "pos_frac": 0.796875, "sample": [460.7806396484375, 84.30445861816406, -42.58086395263672, 268.50164794921875, 13.531913757324219, 80.50186920166016, 350.9131164550781, -14.415283203125, 66.90554809570312, 107.95462799072266, 102.07162475585938, 358.0809326171875, -69.3526611328125, 88.16224670410156, 478.8570861816406, 159.8236541748047, 210.7355194091797, 87.908203125, 144.81451416015625, 213.20297241210938, 400.4708557128906, 94.20967102050781, 115.86710357666016, 14.969894409179688, 45.629638671875, -82.31043243408203, 323.866943359375, -54.92485427856445, 203.08749389648438, 652.5733642578125, 462.3946228027344, 238.68218994140625, 134.54747009277344, 36.377105712890625, -136.15185546875, 73.68389892578125, 101.34820556640625, 165.05245971679688, 92.43875122070312, -43.08111572265625, -9.94793701171875, 30.792268753051758, 223.73739624023438, 68.55899047851562, -34.061485290527344, 125.9549789428711, -36.157623291015625, -100.9006576538086, 52.39701461791992, 120.30093383789062, 287.51507568359375, 156.72500610351562, 89.18964385986328, 1.1389427185058594, -27.266437530517578, 118.95439147949219, -98.27354431152344, 100.61271667480469, 27.15576171875, 7.142333984375, 11.691268920898438, 140.2918701171875, 97.87435913085938, 31.917037963867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000231.npy"} +{"epoch": 0.3392070484581498, "step": 232, "batch_size": 64, "mean": 96.52035522460938, "std": 164.16481018066406, "min": -166.6179962158203, "p10": -75.64318771362305, "median": 57.91950798034668, "p90": 327.5772888183594, "max": 631.88671875, "pos_frac": 0.734375, "sample": [183.71475219726562, 93.8177719116211, 12.864139556884766, 39.25128936767578, 17.241470336914062, -147.2773895263672, 334.5607604980469, 254.96566772460938, -28.144729614257812, 250.43609619140625, 47.46190643310547, 452.82769775390625, 80.99774932861328, -71.7704849243164, -24.861663818359375, 70.6149673461914, -32.31683349609375, 196.0487060546875, -107.1068115234375, 77.6779556274414, 149.9492645263672, 579.6434936523438, 71.63803100585938, 89.23883056640625, 128.30267333984375, -36.0123405456543, 631.88671875, -8.064895629882812, 35.223724365234375, 260.9204406738281, 139.066162109375, 205.05929565429688, -57.4381103515625, 76.69921875, -90.48443603515625, 139.73988342285156, -105.05086517333984, 26.07433319091797, -77.30291748046875, 437.82647705078125, 30.571380615234375, -63.279869079589844, 11.751419067382812, -104.34828186035156, 44.69777297973633, 73.3098373413086, 50.495941162109375, -166.6179962158203, 277.132568359375, 136.0036163330078, 349.98529052734375, 27.202526092529297, 0.22088623046875, 329.57037353515625, 225.2569580078125, -33.46263122558594, 118.44831085205078, 322.9267578125, -24.025251388549805, 50.272560119628906, 59.89023208618164, 37.616641998291016, 69.81683349609375, 55.94878387451172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000232.npy"} +{"epoch": 0.3406754772393539, "step": 233, "batch_size": 64, "mean": 146.88909912109375, "std": 210.91017150878906, "min": -228.91180419921875, "p10": -69.30752716064453, "median": 92.56890869140625, "p90": 458.0004516601563, "max": 665.9832763671875, "pos_frac": 0.734375, "sample": [14.742769241333008, 9.62942886352539, 484.21160888671875, -33.28443908691406, 193.64102172851562, 196.28640747070312, -164.9335479736328, -115.66504669189453, 0.6105194091796875, -168.54425048828125, 115.39112854003906, 36.810699462890625, 165.04026794433594, 271.8822021484375, 280.7849426269531, 665.9832763671875, 126.08563995361328, 74.40963745117188, 516.6041259765625, 333.4521484375, 46.68782043457031, 150.75404357910156, 79.27790832519531, 558.1654052734375, 17.579063415527344, 303.02490234375, 1.0062103271484375, -71.79141235351562, -192.875, -59.922489166259766, 99.12205505371094, 458.2084045410156, 209.87911987304688, 159.77587890625, 289.06427001953125, 457.5152282714844, 40.873451232910156, 429.78887939453125, -63.51179504394531, 44.65394592285156, 129.11859130859375, 422.23834228515625, -27.230709075927734, 340.2275085449219, 72.03366088867188, 361.329345703125, -51.90940475463867, -228.91180419921875, -136.4980010986328, -27.84295654296875, 45.50562286376953, -24.26581573486328, 335.88385009765625, 85.72611999511719, -30.170440673828125, 222.08421325683594, -22.234691619873047, 473.1295166015625, -41.108524322509766, 611.9312744140625, 361.3551940917969, 86.01576232910156, 282.87774658203125, 201.2041015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000233.npy"} +{"epoch": 0.342143906020558, "step": 234, "batch_size": 64, "mean": 151.15582275390625, "std": 175.16961669921875, "min": -153.66893005371094, "p10": -54.30388183593748, "median": 136.221923828125, "p90": 370.6693298339844, "max": 664.4743041992188, "pos_frac": 0.78125, "sample": [-79.1217041015625, 280.95819091796875, 295.72711181640625, 221.898193359375, 349.9168395996094, 73.10918426513672, 150.8636474609375, 191.13330078125, 431.6614074707031, -37.35453796386719, 261.5061340332031, 149.4317169189453, -153.66893005371094, -1.1686058044433594, 82.01914978027344, 255.86865234375, 66.45889282226562, -102.5635757446289, 75.22402954101562, 290.78826904296875, 664.4743041992188, 142.25457763671875, -10.273614883422852, -25.17211151123047, 57.229331970214844, -22.192581176757812, 175.160888671875, 353.9813537597656, 5.6920318603515625, 334.7762451171875, -8.364641189575195, 163.09791564941406, 436.35040283203125, 109.66414642333984, 511.44390869140625, 130.18927001953125, 30.375144958496094, 156.46951293945312, 67.88310241699219, 267.92620849609375, 278.6670227050781, 153.34080505371094, 279.2081604003906, 64.21215057373047, 17.080039978027344, 86.9898681640625, -93.998046875, -8.448997497558594, 246.47662353515625, 73.24131774902344, 78.92444610595703, 451.70098876953125, 602.6033935546875, -99.76038360595703, 277.5317687988281, 165.31103515625, -61.56788635253906, 6.742696762084961, 182.28089904785156, 47.93168640136719, 123.89312744140625, -100.33065795898438, 377.8213195800781, 180.46835327148438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000234.npy"} +{"epoch": 0.3436123348017621, "step": 235, "batch_size": 64, "mean": 97.06028747558594, "std": 173.10166931152344, "min": -429.21685791015625, "p10": -69.84274444580078, "median": 109.66214752197266, "p90": 300.5439849853517, "max": 550.0272216796875, "pos_frac": 0.75, "sample": [32.39302062988281, 221.06448364257812, -429.21685791015625, 29.848236083984375, 161.67108154296875, 492.40826416015625, 162.451904296875, 318.4369812011719, -42.052738189697266, 199.1664581298828, -152.85360717773438, -20.7060546875, 155.64321899414062, 273.73944091796875, 46.319305419921875, 31.368911743164062, -61.80589294433594, 32.80583953857422, 108.60639953613281, -302.8197021484375, 0.18134689331054688, 20.710906982421875, -2.9579620361328125, 59.19959259033203, 355.1390075683594, 218.5701141357422, -36.090187072753906, -57.50543212890625, 166.943115234375, 208.94004821777344, 121.67828369140625, 121.84141540527344, 312.0316467285156, -72.73321533203125, 122.17732238769531, 198.63214111328125, 1.1854076385498047, 230.85589599609375, 25.886276245117188, 175.8502197265625, 37.28208541870117, 27.350860595703125, 550.0272216796875, 234.55953979492188, 141.80352783203125, 160.0301055908203, 168.24652099609375, -40.438499450683594, -201.2022705078125, 137.64144897460938, 497.3404541015625, 265.04876708984375, -145.3917236328125, 6.7751922607421875, 110.7178955078125, -13.108953475952148, -75.26947784423828, 186.0337371826172, 88.68173217773438, 16.654220581054688, 182.9866943359375, 144.9435272216797, 367.2394714355469, -63.09831237792969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000235.npy"} +{"epoch": 0.34508076358296624, "step": 236, "batch_size": 64, "mean": 155.12411499023438, "std": 140.9580078125, "min": -116.2854995727539, "p10": -16.265092849731445, "median": 131.43243408203125, "p90": 348.24792785644536, "max": 463.7901306152344, "pos_frac": 0.875, "sample": [366.3043518066406, 76.88054656982422, 18.316970825195312, 145.3653564453125, 194.58204650878906, 136.36216735839844, 434.4786071777344, 18.431732177734375, 70.2791748046875, 130.39669799804688, 204.3623046875, 338.5730895996094, 98.60628509521484, 200.02691650390625, 310.71563720703125, 337.1770324707031, 79.28657531738281, 255.5199432373047, -15.633583068847656, 352.394287109375, 141.612060546875, -41.16081237792969, 79.98922729492188, 288.3225402832031, 205.52230834960938, 20.340587615966797, 132.46817016601562, 332.8818359375, 255.07443237304688, 51.061378479003906, 463.7901306152344, 112.42720794677734, -34.31718444824219, 250.2266387939453, 5.907390594482422, 249.04937744140625, 137.71405029296875, 448.44366455078125, 95.5072250366211, -16.53573989868164, 121.9354019165039, 114.73145294189453, 30.65570640563965, 50.899574279785156, 7.997703552246094, 195.15760803222656, 108.58171081542969, 108.29566192626953, -70.65924072265625, -116.2854995727539, 161.93179321289062, 455.2524719238281, 353.12017822265625, 115.69131469726562, 216.43826293945312, 113.01187133789062, 314.3359069824219, 96.9717025756836, 4.154014587402344, 197.4384307861328, -64.63041687011719, 336.01873779296875, -25.293476104736328, 171.4425506591797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000236.npy"} +{"epoch": 0.3465491923641703, "step": 237, "batch_size": 64, "mean": 120.29085540771484, "std": 172.5857391357422, "min": -333.42486572265625, "p10": -66.97137527465819, "median": 114.28156661987305, "p90": 356.88437500000026, "max": 678.9408569335938, "pos_frac": 0.765625, "sample": [182.07969665527344, 194.9283447265625, 209.61578369140625, 154.05197143554688, -0.4024848937988281, 113.43570709228516, 105.5893783569336, 142.57281494140625, 28.101268768310547, 162.55764770507812, 54.91279602050781, 31.978485107421875, 411.12945556640625, 128.59976196289062, 207.29107666015625, -45.02207946777344, -107.32722473144531, 150.02590942382812, -70.9963150024414, 97.1489486694336, 133.69134521484375, -107.67494201660156, 383.36962890625, 678.9408569335938, -57.57984924316406, 398.2938232421875, 294.858154296875, 17.374786376953125, 12.661605834960938, 13.84377670288086, 480.0618896484375, 115.12742614746094, 201.32691955566406, 230.39939880371094, 26.22321319580078, 201.19117736816406, 33.50171661376953, 123.12097930908203, 295.08544921875, -39.830711364746094, 218.83889770507812, 98.09066009521484, 188.97039794921875, 256.1301574707031, 407.2777099609375, 45.629486083984375, -0.010162353515625, 26.220298767089844, -333.42486572265625, 273.92095947265625, -31.68170738220215, 78.29499816894531, -168.8955078125, -42.97200012207031, 187.57440185546875, 278.0091552734375, -74.33135986328125, 462.5086669921875, 7.342536926269531, -125.72897338867188, -46.858917236328125, 119.59606170654297, 70.65679931640625, 219.1993408203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000237.npy"} +{"epoch": 0.34801762114537443, "step": 238, "batch_size": 64, "mean": 118.46604919433594, "std": 172.0164031982422, "min": -166.58364868164062, "p10": -45.28942718505858, "median": 79.00665283203125, "p90": 284.3832458496094, "max": 781.1458129882812, "pos_frac": 0.765625, "sample": [-6.2221832275390625, -28.781524658203125, 4.49053955078125, 135.3350067138672, 20.911930084228516, -5.666740417480469, 130.8827362060547, 273.1304931640625, -104.85067749023438, 428.83782958984375, 74.18378448486328, 159.93252563476562, 69.02415466308594, 64.11944580078125, -22.286874771118164, 198.96385192871094, 288.6617431640625, -60.456695556640625, 46.266929626464844, 46.90510940551758, 2.807058334350586, 638.44677734375, 32.9326171875, 151.8203582763672, -52.36424255371094, 513.299560546875, 104.61592102050781, 14.830177307128906, 134.23313903808594, -98.97431945800781, 136.89199829101562, 119.47442626953125, 157.41134643554688, 117.25252532958984, 358.8226318359375, 21.459850311279297, 57.107357025146484, 126.91291809082031, 781.1458129882812, 139.390380859375, -15.134281158447266, 13.0079345703125, 53.46733474731445, -113.39958953857422, -16.327957153320312, 239.25289916992188, 212.96429443359375, 183.67111206054688, 65.21479797363281, 454.9225769042969, 203.47862243652344, 18.558639526367188, 172.56100463867188, 223.70120239257812, -166.58364868164062, 274.40008544921875, 268.3064270019531, 37.383819580078125, -19.78496551513672, -19.605880737304688, 166.5897216796875, 159.25711059570312, -68.80096435546875, 83.82952117919922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000238.npy"} +{"epoch": 0.34948604992657856, "step": 239, "batch_size": 64, "mean": 130.94488525390625, "std": 164.232666015625, "min": -269.0634460449219, "p10": -46.35030937194824, "median": 120.25017166137695, "p90": 352.55278930664065, "max": 502.6390075683594, "pos_frac": 0.78125, "sample": [381.73876953125, 171.75413513183594, -154.73318481445312, 355.635498046875, -91.44029235839844, 188.0789794921875, 64.47164916992188, 161.10675048828125, -47.474788665771484, -145.81796264648438, 46.57497024536133, 42.545623779296875, 74.28132629394531, 259.78741455078125, 73.83317565917969, 6.778053283691406, 502.6390075683594, 117.13041687011719, 174.55706787109375, 127.43669891357422, 219.1327362060547, 177.3876190185547, 252.3537139892578, -61.67171859741211, -12.979560852050781, 40.46241760253906, 177.83041381835938, 373.3576354980469, 214.32896423339844, 108.20878601074219, -6.686178207397461, 22.817169189453125, 321.3807373046875, 414.57269287109375, -180.8488311767578, 86.66300201416016, -36.948265075683594, 345.35980224609375, -19.241928100585938, 335.6672058105469, 24.78201675415039, 340.8192138671875, 315.17413330078125, 151.82699584960938, -14.274154663085938, 41.921714782714844, -269.0634460449219, 136.15818786621094, -33.65365219116211, 150.33119201660156, 138.6648406982422, 276.00933837890625, 305.078857421875, 64.58047485351562, 479.87213134765625, 210.6678009033203, 385.9906005859375, 79.80690002441406, 105.50132751464844, 123.36992645263672, 51.030914306640625, -43.726524353027344, 261.40350341796875, 18.170766830444336], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000239.npy"} +{"epoch": 0.3509544787077827, "step": 240, "batch_size": 64, "mean": 109.36317443847656, "std": 183.02085876464844, "min": -239.240234375, "p10": -173.282275390625, "median": 121.3446159362793, "p90": 356.85751342773443, "max": 539.915283203125, "pos_frac": 0.765625, "sample": [137.50247192382812, 143.4476776123047, 152.5899658203125, 98.71183013916016, 138.16114807128906, 347.964599609375, 143.7794952392578, 412.04852294921875, -211.2548828125, -52.32499694824219, 189.76055908203125, 157.0896453857422, -239.240234375, 166.3980712890625, 360.66876220703125, 76.72814178466797, -181.84600830078125, 67.25567626953125, 36.146217346191406, 239.6959228515625, 188.70962524414062, 404.56884765625, 154.8074951171875, 156.42697143554688, 67.00370788574219, 6.375881195068359, -221.72967529296875, 121.2240982055664, 78.23245239257812, 506.873046875, 155.3924102783203, 118.31962585449219, 85.15202331542969, 28.228111267089844, -201.92242431640625, 317.9942321777344, 239.6692352294922, 174.8298797607422, -208.02838134765625, 69.89598846435547, -63.91463088989258, 275.9406433105469, -153.30023193359375, 35.28791427612305, 539.915283203125, -119.21353149414062, 278.4532470703125, -27.5972843170166, 259.3970947265625, 182.1095428466797, 121.46513366699219, 424.1380615234375, -46.174224853515625, 93.12065887451172, -109.09541320800781, 141.30039978027344, 95.73656463623047, -63.63861083984375, 111.87655639648438, 176.07249450683594, 48.87992858886719, 132.78013610839844, 470.1935119628906, -229.79566955566406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000240.npy"} +{"epoch": 0.3524229074889868, "step": 241, "batch_size": 64, "mean": 170.4112548828125, "std": 201.1011505126953, "min": -230.14488220214844, "p10": -34.05300331115723, "median": 139.50295639038086, "p90": 418.9946441650391, "max": 837.898193359375, "pos_frac": 0.828125, "sample": [96.22814178466797, 85.73231506347656, -214.85134887695312, -21.5361270904541, 45.07501220703125, 241.40066528320312, 235.81167602539062, 452.05621337890625, 77.64004516601562, 181.81387329101562, 115.64031982421875, -52.684814453125, -15.830005645751953, 499.77178955078125, 1.5157155990600586, 165.69427490234375, -35.250484466552734, 345.94049072265625, 162.66302490234375, 153.0823516845703, 359.68353271484375, 122.75051879882812, 100.94632720947266, 119.78849792480469, 837.898193359375, 403.5307922363281, 214.59146118164062, 125.9235610961914, 570.3349609375, 41.515380859375, 775.4867553710938, 425.62200927734375, 308.224609375, 66.40941619873047, 90.70945739746094, 156.1827850341797, 229.81192016601562, 95.64087677001953, -31.258880615234375, 163.49635314941406, 113.28961944580078, 49.58794403076172, 102.24652099609375, -11.2208251953125, 218.43753051757812, 63.95703125, 295.3316955566406, -57.31001663208008, 214.34535217285156, 184.65380859375, 69.97074890136719, 34.533775329589844, 252.2874755859375, 483.0439758300781, 285.42669677734375, 171.49331665039062, -190.02825927734375, 279.18157958984375, 264.8397521972656, 259.72393798828125, -230.14488220214844, 380.1669006347656, 90.46884155273438, -115.16342163085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000241.npy"} +{"epoch": 0.35389133627019087, "step": 242, "batch_size": 64, "mean": 123.42953491210938, "std": 179.42074584960938, "min": -274.43609619140625, "p10": -48.05109252929686, "median": 84.22797775268555, "p90": 316.7211334228516, "max": 704.0394897460938, "pos_frac": 0.8125, "sample": [-30.23047637939453, 1.6023826599121094, 148.60562133789062, 254.06141662597656, 35.633846282958984, -274.43609619140625, 9.127593994140625, -9.902015686035156, 319.68865966796875, 143.385009765625, 29.699010848999023, 264.67633056640625, 651.8072509765625, 10.573209762573242, 13.792854309082031, 73.26961517333984, 113.07073211669922, 120.3460693359375, 49.4783935546875, 8.56574821472168, 219.09500122070312, 65.63922119140625, 245.9420166015625, 227.01773071289062, 259.1728515625, -67.49950408935547, 84.25887298583984, 45.360801696777344, 120.38080596923828, -117.17776489257812, 156.1302490234375, 206.72122192382812, 93.54869842529297, -138.2017822265625, 257.4004821777344, 106.2078628540039, 309.7969055175781, 458.16754150390625, 7.185554504394531, 82.40132141113281, 75.17181396484375, 207.96739196777344, -28.628646850585938, 704.0394897460938, 36.27277374267578, 72.03510284423828, 477.15338134765625, 84.19708251953125, 20.516983032226562, -7.53901481628418, -116.45732879638672, 474.5418701171875, 152.66285705566406, -24.94361114501953, 61.47319030761719, 116.050537109375, 147.80299377441406, -139.20716857910156, 248.16636657714844, 232.44287109375, 1.8144550323486328, 457.7498474121094, 147.53225708007812, -55.688499450683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000242.npy"} +{"epoch": 0.355359765051395, "step": 243, "batch_size": 64, "mean": 151.84548950195312, "std": 185.93345642089844, "min": -315.9930725097656, "p10": -64.8328269958496, "median": 135.6540985107422, "p90": 386.5651733398438, "max": 702.1101684570312, "pos_frac": 0.828125, "sample": [390.646484375, 30.356029510498047, 80.74124908447266, 453.7283935546875, 181.744384765625, 64.58187103271484, 131.16551208496094, 168.8026123046875, -98.02363586425781, -105.33147430419922, -70.85836791992188, 93.86856079101562, 227.12619018554688, 22.212594985961914, -140.1192169189453, 12.064727783203125, -143.45071411132812, 339.84393310546875, 148.9220733642578, 171.0294189453125, -315.9930725097656, 48.25147247314453, 174.52566528320312, 20.237770080566406, 26.394296646118164, 331.785888671875, 52.056243896484375, 198.61410522460938, 85.90938568115234, 406.1422119140625, 292.5263977050781, 267.7518310546875, -50.773231506347656, 152.34976196289062, -109.40695190429688, 340.0814208984375, 262.7274475097656, 568.9251708984375, 364.607666015625, 261.2310791015625, 299.3892822265625, 134.53115844726562, 377.0421142578125, 702.1101684570312, 455.1817626953125, 42.7244873046875, 132.15545654296875, 43.7520637512207, 371.3472900390625, -11.796249389648438, 155.03515625, 215.98245239257812, 24.30218505859375, 142.8004150390625, 496.5339050292969, 26.60022735595703, 136.77703857421875, 75.50660705566406, -6.729576110839844, 242.644287109375, -4.7256927490234375, 61.54579162597656, 59.17359924316406, 209.23187255859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000243.npy"} +{"epoch": 0.3568281938325991, "step": 244, "batch_size": 64, "mean": 159.75762939453125, "std": 171.19309997558594, "min": -165.942626953125, "p10": -33.00220394134521, "median": 137.2669448852539, "p90": 425.1889862060549, "max": 638.5193481445312, "pos_frac": 0.828125, "sample": [64.27830505371094, 32.73474884033203, 114.22038269042969, 180.67330932617188, 28.165035247802734, 54.88274002075195, 461.1360778808594, 139.13995361328125, -57.4036865234375, -39.91180419921875, 171.08750915527344, 246.33612060546875, 14.75885009765625, 315.54351806640625, 40.163116455078125, 135.39393615722656, -1.0893592834472656, 448.8059387207031, 22.36069107055664, 125.5273208618164, -165.942626953125, 118.61225891113281, 311.062744140625, -64.63131713867188, 178.07568359375, 31.501684188842773, 167.38327026367188, 226.9217529296875, 144.6389923095703, 38.71783447265625, 260.94989013671875, 128.58084106445312, 341.9237365722656, 351.8547668457031, 498.69622802734375, 548.0985717773438, 159.4373321533203, 448.0484313964844, 55.89987564086914, 638.5193481445312, 239.33929443359375, -12.953620910644531, 371.85028076171875, 180.00146484375, 146.15235900878906, -88.86498260498047, 130.2507781982422, -26.400056838989258, -35.831695556640625, 258.08441162109375, 22.173416137695312, 0.48630523681640625, 148.2867889404297, 82.52452850341797, -26.318511962890625, 112.56452941894531, 362.5175476074219, 252.10479736328125, -69.62858581542969, 151.93710327148438, 84.3370361328125, 486.37493896484375, 229.27476501464844, 311.0728454589844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000244.npy"} +{"epoch": 0.35829662261380324, "step": 245, "batch_size": 64, "mean": 123.15798950195312, "std": 183.0565643310547, "min": -432.4191589355469, "p10": -28.937536239624016, "median": 92.18741989135742, "p90": 329.82048950195326, "max": 639.23876953125, "pos_frac": 0.78125, "sample": [117.65863800048828, 425.1253662109375, 164.740478515625, 144.8321990966797, 192.98641967773438, 145.0490264892578, 59.08708190917969, -18.20257568359375, 214.31817626953125, 134.83377075195312, 87.44973754882812, 101.68243408203125, 155.72657775878906, 13.423160552978516, 136.09951782226562, 166.67599487304688, 214.1640625, 290.6279296875, 86.81997680664062, 28.804035186767578, 557.7926025390625, 131.8410186767578, -16.208097457885742, 349.97705078125, 65.53155517578125, -5.7851104736328125, 216.7460479736328, -21.167598724365234, -208.2283172607422, -32.26750946044922, 293.76318359375, 150.09561157226562, 639.23876953125, -178.52525329589844, 183.11807250976562, -3.7873306274414062, 3.2672042846679688, 87.58683776855469, 292.0934753417969, 86.83230590820312, -15.96976089477539, 226.65814208984375, 169.79624938964844, -88.66262817382812, 85.12938690185547, -432.4191589355469, 290.7850341796875, 289.6677551269531, 39.06562042236328, 221.04234313964844, 345.27362060546875, 22.555130004882812, 592.8584594726562, -71.87347412109375, -0.08988571166992188, 515.8294677734375, 79.82347106933594, 96.78800201416016, 23.507888793945312, -136.61932373046875, 50.330196380615234, 53.60023498535156, 62.61553192138672, 8.602622985839844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000245.npy"} +{"epoch": 0.35976505139500736, "step": 246, "batch_size": 64, "mean": 126.73939514160156, "std": 187.04763793945312, "min": -256.18084716796875, "p10": -82.37894210815429, "median": 78.96263885498047, "p90": 369.1800720214844, "max": 721.2029418945312, "pos_frac": 0.796875, "sample": [221.66732788085938, 361.795166015625, 147.66574096679688, 87.27603912353516, 389.88751220703125, 218.4527130126953, 721.2029418945312, -182.29617309570312, 28.872615814208984, -256.18084716796875, 50.36675262451172, 74.63347625732422, 83.1014404296875, 0.8324604034423828, 166.3389434814453, 373.57000732421875, 293.5887451171875, 64.9463882446289, 45.99688720703125, 48.38983154296875, 49.7889404296875, 372.34503173828125, 143.5487823486328, -87.3871078491211, 109.06956481933594, -116.6612548828125, 229.04661560058594, -63.21807861328125, 342.5193786621094, -110.53870391845703, 4.8347320556640625, -24.543113708496094, 103.44734191894531, 114.56434631347656, 318.0731201171875, 32.61687469482422, 25.783987045288086, -18.809398651123047, -3.9720458984375, 231.73382568359375, 395.7715148925781, 121.8711929321289, 44.56598663330078, 226.5117645263672, 74.1995849609375, 185.6536865234375, 74.82383728027344, -70.69322204589844, 3.6531639099121094, 227.0698699951172, -98.53762817382812, 91.74685668945312, 319.1868591308594, 202.28163146972656, 25.912139892578125, 40.939422607421875, 8.510679244995117, 573.4638061523438, 315.16656494140625, 632.7523803710938, -45.37458038330078, 261.9660949707031, -96.10771179199219, 3.6369667053222656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000246.npy"} +{"epoch": 0.36123348017621143, "step": 247, "batch_size": 64, "mean": 177.74020385742188, "std": 202.03518676757812, "min": -304.3945617675781, "p10": -31.061357879638642, "median": 156.26173400878906, "p90": 450.514569091797, "max": 681.200439453125, "pos_frac": 0.875, "sample": [-231.46311950683594, 81.02452087402344, -117.10267639160156, 372.6690673828125, 103.67027282714844, 468.34539794921875, 146.11203002929688, 424.49737548828125, -67.4732437133789, 348.4113464355469, 387.03326416015625, 495.5346374511719, 203.86270141601562, 294.80316162109375, 63.97564697265625, 314.32305908203125, 492.7130126953125, 49.260902404785156, 342.1850891113281, 300.1370849609375, 166.41143798828125, 40.021575927734375, -0.0989990234375, 108.38433074951172, 23.846343994140625, 255.85423278808594, 29.497535705566406, 79.46647644042969, 84.6397476196289, 106.12495422363281, 241.24777221679688, 352.37713623046875, 50.95972442626953, 170.16722106933594, 57.11335754394531, 413.2218017578125, 553.700439453125, 355.733154296875, 111.62126159667969, -153.41085815429688, 3.115337371826172, 210.6221466064453, 4.31585693359375, 183.27134704589844, 44.293601989746094, 9.4229736328125, 170.2990264892578, 416.2763671875, 637.9405517578125, 92.16388702392578, 34.10785675048828, -147.599365234375, 79.69117736816406, 681.200439453125, 232.72198486328125, -44.33094024658203, 177.59536743164062, 300.0087890625, 199.3346405029297, -304.3945617675781, 461.664794921875, 27.866073608398438, 141.4564971923828, 244.93011474609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000247.npy"} +{"epoch": 0.36270190895741555, "step": 248, "batch_size": 64, "mean": 152.0039825439453, "std": 141.36956787109375, "min": -198.24229431152344, "p10": -5.183817672729489, "median": 146.79666137695312, "p90": 351.2314056396485, "max": 433.3505859375, "pos_frac": 0.875, "sample": [336.7567138671875, 389.0773010253906, 105.35183715820312, 183.58108520507812, 182.19869995117188, 0.6936531066894531, 163.21705627441406, 339.7463684082031, 138.6881561279297, -63.91998291015625, 3.401641845703125, 262.6323547363281, 214.3822021484375, 33.0714111328125, 224.1013946533203, 56.29322814941406, 18.829805374145508, 69.70301818847656, 155.16233825683594, -6.218341827392578, 154.74954223632812, 273.8818664550781, 251.884521484375, 424.6744689941406, 9.2840576171875, -27.584945678710938, 114.44062042236328, 365.86907958984375, 125.77162170410156, 194.34719848632812, 165.45057678222656, -198.24229431152344, 138.84378051757812, 294.83966064453125, 155.476806640625, 175.23025512695312, 69.11042785644531, 24.010635375976562, -31.510498046875, 47.44010543823242, 203.63201904296875, 232.72178649902344, 181.56179809570312, 411.77813720703125, 99.94798278808594, 95.57707214355469, 433.3505859375, 271.7124938964844, 26.651153564453125, 426.927734375, 2.0472335815429688, 128.92897033691406, 327.72607421875, 283.550048828125, 232.15988159179688, 356.153564453125, -2.769927978515625, 44.57756042480469, 110.16368103027344, 291.39483642578125, 127.04029083251953, 5.064289093017578, -6.558742523193359, -89.80105590820312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000248.npy"} +{"epoch": 0.3641703377386197, "step": 249, "batch_size": 64, "mean": 79.55818939208984, "std": 177.6361541748047, "min": -316.85430908203125, "p10": -142.95180511474607, "median": 67.83724594116211, "p90": 284.6143310546875, "max": 600.3974609375, "pos_frac": 0.65625, "sample": [-34.92710876464844, 287.74810791015625, -50.01747131347656, -100.11552429199219, 112.2773208618164, -120.39739227294922, -5.2201995849609375, 139.4564971923828, 59.317535400390625, 350.8115539550781, 32.989990234375, 49.4979248046875, 78.33290100097656, 66.3015365600586, 220.58677673339844, -316.85430908203125, -66.36189270019531, 358.7130126953125, 169.90533447265625, -86.10600280761719, 156.05276489257812, 205.4206085205078, -180.06243896484375, 137.7081298828125, -36.68470764160156, 343.0525207519531, -159.71531677246094, 277.30218505859375, 129.89505004882812, 55.77775955200195, -25.54891586303711, -11.73208236694336, 207.5032958984375, 50.27345275878906, 126.78824615478516, -24.430465698242188, 180.90948486328125, -315.59979248046875, 109.786376953125, 261.8013610839844, 151.16635131835938, -22.201065063476562, 600.3974609375, -99.83309936523438, -225.19757080078125, -152.3633575439453, 21.359045028686523, 10.26211166381836, 91.9361572265625, 211.21893310546875, 107.22147369384766, 205.583740234375, -212.410400390625, 226.24612426757812, 270.52288818359375, 265.3255615234375, 67.85554504394531, -120.99151611328125, -0.05513763427734375, 192.1979522705078, 67.8189468383789, 409.1368713378906, 379.1109619140625, 12.979881286621094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000249.npy"} +{"epoch": 0.3656387665198238, "step": 250, "batch_size": 64, "mean": 150.18911743164062, "std": 155.456298828125, "min": -180.5838623046875, "p10": -35.56586246490478, "median": 141.55087280273438, "p90": 374.32860717773457, "max": 518.1981201171875, "pos_frac": 0.796875, "sample": [267.2301330566406, 37.025909423828125, 38.43315124511719, -16.359264373779297, -43.180694580078125, 194.37574768066406, 97.4283218383789, 431.375732421875, 152.85484313964844, -114.18275451660156, 45.237327575683594, 518.1981201171875, 256.90777587890625, 125.70222473144531, 241.62261962890625, 163.50379943847656, 29.60601043701172, 125.01760864257812, 322.7181396484375, 327.29962158203125, 140.20965576171875, -83.67656707763672, -37.90277862548828, 77.3724365234375, 84.7055892944336, 192.364990234375, 71.01778411865234, -30.11305809020996, 61.354217529296875, -5.6572265625, -18.238046646118164, 37.42478942871094, 174.7821502685547, 288.41070556640625, -54.46490478515625, 483.6640625, 142.89208984375, 158.10714721679688, -94.6751480102539, -5.175712585449219, 211.59776306152344, 394.48388671875, 255.37754821777344, 124.910400390625, 150.6446533203125, 57.528297424316406, 250.71585083007812, 123.3349380493164, 284.9794921875, 125.97013854980469, 69.74281311035156, -0.5352783203125, 198.2034912109375, 217.37686157226562, -180.5838623046875, 203.3070068359375, 396.6014404296875, 210.56918334960938, 170.91653442382812, 414.9328308105469, 319.55419921875, 40.38262176513672, 490.5218811035156, 298.3533935546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000250.npy"} +{"epoch": 0.3671071953010279, "step": 251, "batch_size": 64, "mean": 101.44243621826172, "std": 181.41836547851562, "min": -373.406982421875, "p10": -59.88611373901366, "median": 79.30688858032227, "p90": 327.623959350586, "max": 671.151123046875, "pos_frac": 0.703125, "sample": [382.8231201171875, 12.321380615234375, 317.3006896972656, -19.87828826904297, 209.35211181640625, -266.9097900390625, -373.406982421875, 671.151123046875, -28.091758728027344, 174.0836944580078, 186.42425537109375, -152.28494262695312, 349.41046142578125, 58.096214294433594, 105.263427734375, -38.225982666015625, -250.94415283203125, 474.8141174316406, -7.106664657592773, 172.1980743408203, 16.79379653930664, 248.97763061523438, -14.203086853027344, -44.896873474121094, 5.08795166015625, 136.19712829589844, -125.16868591308594, -66.31007385253906, 84.39877319335938, 223.93344116210938, 65.14117431640625, -118.45879364013672, 287.75274658203125, -40.30706024169922, 51.545005798339844, 198.31716918945312, 69.09436798095703, 116.35274505615234, 154.95126342773438, 141.75177001953125, 10.24481201171875, 219.1123809814453, -23.43064308166504, 290.2877197265625, -4.479156494140625, 348.7198181152344, 205.5858917236328, 181.85923767089844, 74.21500396728516, 137.60089111328125, -39.003944396972656, 21.247329711914062, 563.4791870117188, 70.94255065917969, 332.0482177734375, 157.40603637695312, -31.86141014099121, 190.0782470703125, 103.06787109375, -16.075946807861328, 29.979576110839844, 133.45913696289062, 38.05518341064453, 132.43739318847656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000251.npy"} +{"epoch": 0.368575624082232, "step": 252, "batch_size": 64, "mean": 142.23912048339844, "std": 146.38037109375, "min": -459.35223388671875, "p10": -13.54698467254638, "median": 148.0460968017578, "p90": 320.26939392089844, "max": 491.2230224609375, "pos_frac": 0.859375, "sample": [294.687255859375, 344.4160461425781, 46.58473587036133, 215.81068420410156, 248.1120147705078, 66.2020263671875, 148.517578125, 320.6166076660156, 71.27975463867188, 128.95590209960938, 219.13856506347656, 9.522125244140625, 170.12600708007812, 55.99275207519531, 111.07954406738281, 157.5541229248047, 56.32846450805664, 138.4088592529297, 165.85684204101562, -46.767845153808594, 247.6035614013672, 175.9010772705078, 46.82114791870117, 408.33966064453125, 252.47634887695312, -131.6114959716797, -20.559682846069336, 107.08026123046875, -16.9407958984375, 147.57461547851562, -25.47031021118164, 186.04400634765625, 30.873430252075195, 149.94204711914062, 64.17837524414062, 114.01531982421875, 134.36862182617188, 419.0415954589844, 357.43096923828125, 306.7021484375, 44.52208709716797, 168.220703125, 161.4969940185547, 40.701019287109375, 221.1298828125, 319.459228515625, 277.87872314453125, 258.2944641113281, 194.5418701171875, 141.4529266357422, -0.0436859130859375, -459.35223388671875, 333.2509765625, 249.2002410888672, 123.770751953125, 105.97010803222656, -5.628091812133789, 162.02301025390625, 491.2230224609375, -57.57070541381836, 63.26981735229492, 32.65099334716797, 171.64193725585938, 188.96661376953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000252.npy"} +{"epoch": 0.3700440528634361, "step": 253, "batch_size": 64, "mean": 133.81402587890625, "std": 169.00259399414062, "min": -391.6184997558594, "p10": -64.36259231567382, "median": 117.86876678466797, "p90": 366.36648254394544, "max": 556.162841796875, "pos_frac": 0.8125, "sample": [-62.007713317871094, 25.22557830810547, 272.95806884765625, 49.75419616699219, 17.556884765625, 130.63433837890625, 218.481689453125, 51.46080780029297, 178.2941436767578, 88.17509460449219, 100.61013793945312, 556.162841796875, 246.48849487304688, 65.26348114013672, 179.75506591796875, 106.49447631835938, 397.60858154296875, 91.50543212890625, -73.17341613769531, 102.46192932128906, -25.58094596862793, -391.6184997558594, 233.90093994140625, 380.64044189453125, 333.0605773925781, 9.773126602172852, 428.0255126953125, 13.936500549316406, -19.791717529296875, 35.10206985473633, 18.238059997558594, -101.93190002441406, 114.0738525390625, 254.91030883789062, 289.64306640625, 219.8475341796875, 260.795166015625, -124.0226821899414, 253.69810485839844, 112.51029205322266, 242.1747283935547, 327.58148193359375, 151.56829833984375, 271.1899719238281, 390.4998779296875, 96.87586975097656, 121.66368103027344, 395.0836486816406, 124.10668182373047, -219.98358154296875, 312.1522216796875, 173.9764404296875, -88.23794555664062, 59.214393615722656, -65.371826171875, 89.21070098876953, 153.08778381347656, 215.41673278808594, 204.15501403808594, -35.492523193359375, -45.410221099853516, 54.581268310546875, 435.2214050292969, 161.9139404296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000253.npy"} +{"epoch": 0.37151248164464024, "step": 254, "batch_size": 64, "mean": 117.6561279296875, "std": 198.08157348632812, "min": -260.4185485839844, "p10": -109.58972625732422, "median": 91.55871963500977, "p90": 391.4845123291017, "max": 805.4329833984375, "pos_frac": 0.734375, "sample": [120.970703125, -22.570114135742188, 141.57440185546875, 91.78518676757812, 99.37693786621094, 113.08673095703125, 19.786376953125, -88.93453216552734, 135.43966674804688, 32.43647766113281, 132.884033203125, 142.74461364746094, 255.80447387695312, 34.634185791015625, -103.66981506347656, -260.4185485839844, 178.37310791015625, -40.74470138549805, 89.06695556640625, 203.421630859375, 3.8107032775878906, 236.3358612060547, 510.2555236816406, 326.504150390625, 366.9327087402344, -172.53732299804688, 402.0067138671875, 308.7757263183594, -10.660408020019531, 14.034189224243164, 91.3322525024414, 197.250244140625, 66.3857192993164, -192.72647094726562, 21.22787094116211, 181.50918579101562, -72.2375717163086, 421.0008544921875, -112.1268310546875, 60.62826919555664, -29.75469207763672, -203.5771484375, 59.483360290527344, 468.17572021484375, -0.32208824157714844, 210.33584594726562, 5.36534309387207, 335.7857666015625, 61.792518615722656, 176.61062622070312, 805.4329833984375, 147.8241729736328, 84.42178344726562, -18.430641174316406, -161.98812866210938, 554.3615112304688, 136.502685546875, 233.76504516601562, 158.81851196289062, 55.21067810058594, -116.45858764648438, 514.278564453125, 151.3648681640625, -21.755523681640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000254.npy"} +{"epoch": 0.37298091042584436, "step": 255, "batch_size": 64, "mean": 125.03014373779297, "std": 176.63027954101562, "min": -420.80224609375, "p10": -51.005148315429686, "median": 99.80470275878906, "p90": 331.5616577148438, "max": 581.118896484375, "pos_frac": 0.765625, "sample": [581.118896484375, -92.90435791015625, 19.8765869140625, 336.56787109375, 157.63455200195312, -42.980316162109375, -46.95691680908203, -36.69123840332031, -420.80224609375, 57.903377532958984, 280.48004150390625, 84.88603210449219, -9.14501953125, 22.014995574951172, 217.53048706054688, 195.9097900390625, -147.4005126953125, -210.53228759765625, 170.6682891845703, 276.4939880371094, 248.69296264648438, 35.74867248535156, 95.58252716064453, 20.14975357055664, 94.89163208007812, -52.74010467529297, -95.76195526123047, 372.8148193359375, -5.218669891357422, 173.89389038085938, 200.60606384277344, 224.3284912109375, 62.30678176879883, 319.8804931640625, 361.5740051269531, 319.6819152832031, 256.1285095214844, 346.24456787109375, 315.35040283203125, 541.6181640625, 52.92328643798828, -4.213521957397461, 272.2919921875, 136.9984130859375, 25.801708221435547, 170.8505401611328, 45.07810974121094, 225.89743041992188, -111.04478454589844, 319.00128173828125, 406.077392578125, 50.40177917480469, 124.0669937133789, 47.00969696044922, -39.72423553466797, 125.36514282226562, 25.937782287597656, 104.0268783569336, 28.574485778808594, 144.16685485839844, -42.171653747558594, 72.45889282226562, 318.7265625, 273.9832763671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000255.npy"} +{"epoch": 0.3744493392070485, "step": 256, "batch_size": 64, "mean": 132.7100067138672, "std": 170.6522216796875, "min": -289.64593505859375, "p10": -66.04427719116211, "median": 113.23696899414062, "p90": 343.82243957519535, "max": 578.8585205078125, "pos_frac": 0.765625, "sample": [97.6912841796875, 514.4669799804688, 83.03165435791016, 242.06150817871094, 193.01815795898438, 245.73416137695312, 40.37928771972656, 174.32489013671875, -26.213287353515625, 22.066497802734375, 301.4258728027344, 225.637939453125, 207.72994995117188, 345.6507568359375, 258.72064208984375, -131.05453491210938, 58.408931732177734, 174.3168487548828, -289.64593505859375, 31.14572525024414, 278.58587646484375, 116.80796813964844, -122.00347900390625, 271.13604736328125, 63.1202278137207, 261.6910705566406, -62.21062469482422, -71.06803894042969, -67.68727111816406, 181.78761291503906, 360.3160400390625, -2.841522216796875, 210.44552612304688, 94.21890258789062, 55.26411437988281, 122.08992767333984, 339.5563659667969, -68.39111328125, 47.45384216308594, 42.45002746582031, -195.67088317871094, 36.314361572265625, 20.739906311035156, 3.27099609375, 178.39300537109375, -28.213077545166016, 191.4898681640625, 109.66596984863281, -25.4854736328125, 260.98406982421875, -51.4112548828125, 101.43980407714844, -10.320014953613281, 193.00201416015625, 349.31158447265625, 383.24212646484375, 141.38381958007812, 66.95635986328125, 578.8585205078125, 519.415771484375, 221.3887939453125, 300.1774597167969, -5.495718002319336, 334.3837585449219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000256.npy"} +{"epoch": 0.37591776798825255, "step": 257, "batch_size": 64, "mean": 141.07919311523438, "std": 180.25241088867188, "min": -256.56219482421875, "p10": -46.79545249938964, "median": 106.59395980834961, "p90": 415.37260437011724, "max": 642.541748046875, "pos_frac": 0.78125, "sample": [446.89239501953125, -39.297542572021484, 8.720413208007812, 110.25357055664062, 363.8237609863281, -14.493545532226562, 203.31817626953125, 422.7191162109375, 237.03558349609375, 146.36300659179688, -2.6893138885498047, -50.00884246826172, 96.65084838867188, 294.87225341796875, 154.48223876953125, 23.42974853515625, 218.53964233398438, 201.4907684326172, 27.00368881225586, 427.3916320800781, 34.89430236816406, 398.2307434082031, 261.4036865234375, -146.12576293945312, 84.92247009277344, 116.78524780273438, -71.06675720214844, 13.274932861328125, -29.70760726928711, 451.0910339355469, 190.26708984375, 226.3928985595703, 254.23233032226562, 642.541748046875, 80.88272094726562, -54.6051025390625, 102.9343490600586, 9.853694915771484, 111.80892181396484, -256.56219482421875, 85.87659454345703, 395.0321350097656, 38.634952545166016, 239.488525390625, 185.58348083496094, 212.30145263671875, 73.22693634033203, 3.5531692504882812, 507.38555908203125, 243.39385986328125, 226.838623046875, 283.17340087890625, -30.11035919189453, 97.03966522216797, 49.922821044921875, 547.2301025390625, -92.85154724121094, -11.911605834960938, 163.44415283203125, -146.4692840576172, 40.34046936035156, 29.283599853515625, -18.17364501953125, 208.89016723632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000257.npy"} +{"epoch": 0.37738619676945667, "step": 258, "batch_size": 64, "mean": 135.947265625, "std": 191.71681213378906, "min": -493.3393249511719, "p10": -53.54913406372069, "median": 128.76442337036133, "p90": 355.68637390136723, "max": 648.8013305664062, "pos_frac": 0.765625, "sample": [280.5433044433594, 30.614744186401367, -95.652587890625, 249.228271484375, 46.43603515625, 134.96978759765625, 255.28314208984375, 230.15176391601562, 218.72659301757812, -6.23065185546875, -34.49358367919922, -24.93250274658203, -57.723854064941406, 35.9300537109375, 90.53738403320312, -29.587661743164062, -24.94147491455078, 101.56918334960938, 152.97900390625, -10.571968078613281, 47.53326416015625, 255.25375366210938, 512.34228515625, 204.0016326904297, 486.6824645996094, 244.74163818359375, 200.09820556640625, 27.782516479492188, 155.64340209960938, 338.1463623046875, 187.86317443847656, 278.5513916015625, 312.01849365234375, 167.2371368408203, -205.89773559570312, 95.2667236328125, 265.3620910644531, -341.8861083984375, 24.477394104003906, 113.6497573852539, 386.92596435546875, -10.42586898803711, -86.37205505371094, 363.2035217285156, 30.216617584228516, -493.3393249511719, 648.8013305664062, 110.37531280517578, -127.6708984375, 115.56627655029297, 232.41143798828125, -43.80812072753906, 289.811279296875, 334.26214599609375, 378.2645568847656, 36.253318786621094, 281.48175048828125, 402.20037841796875, 152.5262451171875, 255.50921630859375, 91.30403137207031, 122.5590591430664, 242.1392822265625, 76.7265396118164], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000258.npy"} +{"epoch": 0.3788546255506608, "step": 259, "batch_size": 64, "mean": 115.67851257324219, "std": 178.36978149414062, "min": -246.0644073486328, "p10": -96.26671905517577, "median": 86.70748901367188, "p90": 359.23869323730474, "max": 530.7528076171875, "pos_frac": 0.75, "sample": [210.74766540527344, -16.785619735717773, 154.67120361328125, 197.9558868408203, 17.244823455810547, -98.89163208007812, 313.01947021484375, 392.1914367675781, 420.896728515625, 180.5048065185547, 49.48461151123047, -39.47637176513672, 104.16810607910156, -66.75162506103516, 7.038360595703125, -101.65045166015625, 343.0863342285156, -52.94073486328125, 451.210693359375, 154.31552124023438, 108.09956359863281, 114.38134765625, 83.82904052734375, 54.41611099243164, 291.2735595703125, 18.645706176757812, 53.27772521972656, 47.32569885253906, 530.7528076171875, -190.98915100097656, 61.329071044921875, 29.709985733032227, 486.003173828125, 158.0654754638672, 13.487174987792969, -166.3542938232422, 123.65959930419922, 332.8436279296875, -137.77850341796875, 317.3255615234375, -159.22560119628906, 19.726463317871094, 330.98736572265625, -90.14192199707031, 89.5859375, 48.84779357910156, 366.1611328125, 304.4259338378906, -246.0644073486328, 152.1597442626953, -27.612546920776367, 297.34466552734375, 270.0945739746094, -21.432708740234375, 387.51092529296875, -80.2789535522461, 125.86264038085938, 320.83984375, 132.4329833984375, -67.45304107666016, 60.259334564208984, 35.68314743041992, 139.80776977539062, 64.5613784790039], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000259.npy"} +{"epoch": 0.3803230543318649, "step": 260, "batch_size": 64, "mean": 157.31895446777344, "std": 175.4031982421875, "min": -175.98851013183594, "p10": -17.702655792236325, "median": 126.90476989746094, "p90": 390.98991699218755, "max": 913.0970458984375, "pos_frac": 0.875, "sample": [214.73025512695312, -77.46114349365234, 43.88721466064453, -106.0298080444336, 19.48659324645996, 161.76242065429688, 102.5741195678711, 150.8784637451172, 395.04888916015625, 47.235015869140625, 56.279850006103516, 381.51898193359375, 119.35658264160156, 96.21571350097656, 140.9129180908203, 913.0970458984375, 147.71234130859375, 235.89382934570312, 65.0498046875, 461.449951171875, -175.98851013183594, 159.38021850585938, -25.145292282104492, 161.63604736328125, -109.49713897705078, 4.227897644042969, 146.45787048339844, -18.700088500976562, 234.87844848632812, 112.92694091796875, 107.57040405273438, 348.3974609375, -15.375312805175781, 241.55799865722656, 134.4529571533203, 327.649169921875, 342.3625183105469, 160.8382568359375, 115.41484069824219, 97.30438232421875, 94.79698181152344, 58.52830505371094, 340.04791259765625, 141.0206298828125, 146.018798828125, 49.60996627807617, 29.912673950195312, 92.48834228515625, 180.65106201171875, 190.01670837402344, 208.535400390625, 160.2567138671875, 117.98584747314453, 403.176513671875, 39.401100158691406, -47.79425811767578, 81.26327514648438, 437.25665283203125, 30.64227294921875, 571.7263793945312, 88.98302459716797, 409.5088806152344, 43.4849853515625, 280.9462890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000260.npy"} +{"epoch": 0.38179148311306904, "step": 261, "batch_size": 64, "mean": 147.43002319335938, "std": 203.79600524902344, "min": -327.4521789550781, "p10": -69.56644935607909, "median": 115.84900283813477, "p90": 387.3689727783203, "max": 918.1975708007812, "pos_frac": 0.765625, "sample": [380.9388122558594, 348.3477478027344, 54.771583557128906, -230.12779235839844, 115.78053283691406, 918.1975708007812, 36.79875183105469, 355.72393798828125, -23.15090560913086, 207.72512817382812, -1.5984973907470703, 85.07808685302734, -94.26531982421875, 420.28106689453125, 99.00660705566406, 212.29177856445312, 222.92526245117188, 324.23455810546875, 357.2962341308594, -21.30791473388672, 81.55503845214844, 77.855224609375, 115.91747283935547, 190.8851318359375, 181.6472625732422, 321.44873046875, 504.22698974609375, 20.719818115234375, 288.0435791015625, 217.3784637451172, 57.5784912109375, 27.884620666503906, -0.01654815673828125, 72.20906066894531, 235.18878173828125, 58.27130889892578, 259.44427490234375, 357.340087890625, 272.7066650390625, 147.04220581054688, 227.89920043945312, -63.95321273803711, 474.5182800292969, 416.7254638671875, -5.912729263305664, 390.124755859375, -179.5943603515625, 187.78073120117188, 7.030342102050781, 220.57037353515625, 105.19953155517578, 464.690673828125, -43.56024932861328, -327.4521789550781, -182.8145294189453, 107.47266387939453, -152.46356201171875, -17.432458877563477, 180.34097290039062, 186.49830627441406, 57.18219757080078, 68.22069549560547, 130.149169921875, -71.97212219238281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000261.npy"} +{"epoch": 0.3832599118942731, "step": 262, "batch_size": 64, "mean": 120.58787536621094, "std": 160.24339294433594, "min": -237.8165740966797, "p10": -71.37945175170896, "median": 122.001220703125, "p90": 291.6870025634767, "max": 713.6873779296875, "pos_frac": 0.796875, "sample": [46.21343994140625, 398.1966247558594, 110.48160552978516, 200.99261474609375, -237.8165740966797, 329.0727233886719, 467.66693115234375, 101.79609680175781, 147.9425048828125, 255.06365966796875, -133.6666259765625, -231.23806762695312, 129.76145935058594, 713.6873779296875, 146.28884887695312, -162.45703125, 149.3999786376953, 242.38665771484375, -6.563055038452148, 181.65914916992188, 151.52455139160156, 78.12621307373047, -83.5943603515625, 307.3827209472656, 138.4698944091797, -11.581680297851562, 31.130451202392578, 362.4285888671875, 118.77255249023438, 243.91455078125, 48.484954833984375, 85.00926971435547, 101.33935546875, -42.79049301147461, 86.81227111816406, 59.70557403564453, 180.32037353515625, -15.978179931640625, 162.86105346679688, 167.69122314453125, 125.22988891601562, 130.04551696777344, 1.1747055053710938, 239.10467529296875, 228.46104431152344, 77.55284881591797, 238.8162841796875, 102.99510955810547, -102.80789947509766, -42.87799835205078, 65.12303161621094, 40.22004699707031, 49.46482849121094, 311.3273010253906, -136.54852294921875, -42.089927673339844, 115.7078857421875, 175.9344482421875, 243.10081481933594, 228.33160400390625, 170.21214294433594, 22.553024291992188, 251.3123321533203, 206.38357543945312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000262.npy"} +{"epoch": 0.38472834067547723, "step": 263, "batch_size": 64, "mean": 172.56646728515625, "std": 208.0592498779297, "min": -331.70025634765625, "p10": -70.11700592041015, "median": 136.90958404541016, "p90": 440.61325683593765, "max": 719.70556640625, "pos_frac": 0.8125, "sample": [318.9903869628906, -331.70025634765625, 135.43202209472656, 124.05223083496094, 369.37799072265625, 49.53343963623047, 316.38067626953125, 58.11981201171875, 50.05430603027344, 110.37789154052734, -269.96624755859375, 152.91937255859375, 78.6976547241211, 298.755859375, 293.1785583496094, 175.00416564941406, 19.350141525268555, 389.94952392578125, 100.24588775634766, 159.70713806152344, -23.994577407836914, -91.82180786132812, 34.233314514160156, 300.6237487792969, -29.61636734008789, -74.11691284179688, 330.60919189453125, 138.38714599609375, -30.485252380371094, 83.70614624023438, 39.26420593261719, 19.467771530151367, 348.9866638183594, 60.22386169433594, -60.78388977050781, 495.24383544921875, 272.28863525390625, 101.84260559082031, 373.4999694824219, 522.766845703125, 117.11759185791016, 454.04876708984375, 107.37921142578125, 253.18087768554688, 313.3009338378906, 151.17575073242188, 474.6904296875, -83.55055236816406, 224.71600341796875, 258.6698303222656, 391.36322021484375, 409.26373291015625, 671.61962890625, -153.6625213623047, 2.4603118896484375, 546.716796875, -44.728248596191406, -82.9701919555664, 202.8461456298828, 211.5775146484375, 126.03245544433594, 719.70556640625, 132.30194091796875, 232.21292114257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000263.npy"} +{"epoch": 0.38619676945668135, "step": 264, "batch_size": 64, "mean": 145.2810821533203, "std": 207.505126953125, "min": -256.8216247558594, "p10": -77.58253021240233, "median": 120.32405853271484, "p90": 440.3517608642578, "max": 618.5989379882812, "pos_frac": 0.78125, "sample": [212.47320556640625, 296.7690734863281, 48.073795318603516, 614.5197143554688, 80.78538513183594, -252.0602264404297, -166.4322509765625, 407.0829772949219, 71.39105224609375, 470.2510986328125, -69.8214340209961, 35.29723358154297, 120.31343078613281, -153.3834228515625, 152.55731201171875, 161.4384765625, 278.4801330566406, 437.55511474609375, 373.3212585449219, 295.5103759765625, 38.81207275390625, 226.87844848632812, 260.84515380859375, 184.89781188964844, 50.45616149902344, 19.627975463867188, -54.57611846923828, -13.601791381835938, -80.9087142944336, 202.6470489501953, -61.644622802734375, 19.869117736816406, 46.746070861816406, 618.5989379882812, 3.2264404296875, 441.5503234863281, 169.40365600585938, 221.41799926757812, 29.818771362304688, 243.5989227294922, 54.550933837890625, 234.8946990966797, 120.33468627929688, 109.38861083984375, -12.402153015136719, 602.9356079101562, 387.4884033203125, -238.42816162109375, 189.169189453125, -172.45242309570312, 201.3249053955078, 13.86136245727539, 232.89569091796875, 482.70367431640625, -17.324020385742188, 148.98117065429688, 55.90015411376953, 41.008697509765625, -256.8216247558594, 530.73681640625, 310.8588562011719, 245.1224822998047, -4.945743560791016, 56.42132568359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000264.npy"} +{"epoch": 0.3876651982378855, "step": 265, "batch_size": 64, "mean": 142.77999877929688, "std": 233.81333923339844, "min": -433.81591796875, "p10": -105.16548461914061, "median": 111.35327529907227, "p90": 409.7297973632813, "max": 831.9027099609375, "pos_frac": 0.75, "sample": [68.32732391357422, 399.7268981933594, 320.0338439941406, 221.32176208496094, 47.42272186279297, 153.22702026367188, 359.23028564453125, 22.022193908691406, 672.08740234375, 80.783447265625, 253.20370483398438, -271.3466796875, 26.251285552978516, -30.34231948852539, -127.13641357421875, 24.371326446533203, 371.9036560058594, -96.88069152832031, 97.40705108642578, -43.861175537109375, 230.25479125976562, 831.9027099609375, -433.81591796875, 537.9511108398438, -134.82754516601562, 178.98757934570312, 202.93484497070312, 466.2265319824219, -66.40972900390625, 703.0846557617188, -227.3003387451172, 294.7897033691406, 66.48846435546875, -54.62582015991211, 87.15603637695312, -95.93368530273438, 7.4482421875, -71.58717346191406, 379.067626953125, -108.71611022949219, 414.0167541503906, 103.91448974609375, 74.22610473632812, 375.74188232421875, -36.55919647216797, 202.0403289794922, 20.962478637695312, 158.21612548828125, -30.475540161132812, 270.4077453613281, 89.99262237548828, 144.57061767578125, 156.60977172851562, 364.4299621582031, 123.07133483886719, 139.7793731689453, 222.9051055908203, 118.79206085205078, 54.832942962646484, -275.1116638183594, 439.53729248046875, 92.374755859375, 338.56768798828125, 234.2482452392578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000265.npy"} +{"epoch": 0.3891336270190896, "step": 266, "batch_size": 64, "mean": 113.11186218261719, "std": 193.6034698486328, "min": -264.2123107910156, "p10": -88.4216468811035, "median": 76.62957382202148, "p90": 400.6787872314453, "max": 673.8787841796875, "pos_frac": 0.71875, "sample": [-38.09442138671875, 524.9966430664062, 51.230369567871094, 123.02564239501953, 230.3359832763672, 118.91868591308594, -54.52943420410156, 274.7876892089844, 22.358795166015625, 426.45123291015625, 124.25614929199219, 277.4827880859375, -74.57896423339844, 256.2646179199219, -76.94804382324219, 48.27519989013672, 251.60159301757812, 78.89199829101562, 183.97804260253906, 74.36714935302734, 141.7378692626953, -263.80499267578125, -93.33890533447266, 382.0263671875, 131.8872528076172, 398.6001281738281, 197.8722686767578, 25.37194061279297, 35.91315460205078, 401.56964111328125, 443.9792175292969, 272.10601806640625, 673.8787841796875, 472.217041015625, 209.1808624267578, 112.966064453125, 44.16898727416992, 260.75164794921875, 31.01679229736328, 48.143943786621094, 71.29373168945312, 151.76177978515625, 58.622413635253906, 33.70343780517578, 156.6363983154297, -2.339824676513672, -264.2123107910156, -225.57867431640625, 271.09130859375, 4.728998184204102, -169.40673828125, -73.0488052368164, -30.364273071289062, -102.5185546875, -36.17827606201172, -46.82672882080078, -19.1904239654541, -36.03020477294922, -189.84600830078125, 423.4201354980469, 181.144775390625, 236.0634765625, 13.735038757324219, 83.18285369873047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000266.npy"} +{"epoch": 0.39060205580029367, "step": 267, "batch_size": 64, "mean": 134.9067840576172, "std": 204.47283935546875, "min": -485.0990295410156, "p10": -69.78702926635742, "median": 109.8705940246582, "p90": 343.85541992187507, "max": 718.05322265625, "pos_frac": 0.796875, "sample": [90.5652847290039, 204.92425537109375, 352.1053466796875, -4.463459014892578, 85.33807373046875, -15.22120475769043, 277.81927490234375, 168.1767578125, 18.9481201171875, 50.5809211730957, -93.1339111328125, -86.38967895507812, 11.92486572265625, 324.6055908203125, 149.0611572265625, 120.29634857177734, -72.18636322021484, 212.69882202148438, 486.67572021484375, 718.05322265625, 95.56803894042969, 92.0768814086914, 127.64217376708984, 5.2046966552734375, -141.6077423095703, 94.90657806396484, 77.61004638671875, 77.6037368774414, 318.314453125, 210.9810028076172, 565.817138671875, 406.1628723144531, 665.9906005859375, 40.446380615234375, -13.82950210571289, 251.9872283935547, 4.344554901123047, -87.4034194946289, 4.671733856201172, 139.55551147460938, 287.3945617675781, 19.035781860351562, -6.419893264770508, 560.0572509765625, -485.0990295410156, -31.74919891357422, -361.806640625, 191.28692626953125, 198.87066650390625, 118.84231567382812, 200.05226135253906, 211.08908081054688, 98.53001403808594, 273.4106140136719, 56.47455596923828, 147.58468627929688, -64.18858337402344, 294.0720520019531, 64.73011016845703, 118.68899536132812, 292.7445373535156, 139.40744018554688, 273.5516662597656, 101.05219268798828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000267.npy"} +{"epoch": 0.3920704845814978, "step": 268, "batch_size": 64, "mean": 142.48526000976562, "std": 207.6282196044922, "min": -342.66015625, "p10": -44.1714458465576, "median": 126.06000518798828, "p90": 359.5283081054688, "max": 761.3951416015625, "pos_frac": 0.8125, "sample": [18.21765899658203, 479.973876953125, 69.64456939697266, -18.219261169433594, 64.08549499511719, 60.62323760986328, 159.29019165039062, 87.31082153320312, 363.0863037109375, 106.17477416992188, 171.56161499023438, -310.3543701171875, 146.31729125976562, 62.62425231933594, 262.29364013671875, -232.87521362304688, 89.70952606201172, 47.60235595703125, -294.6347961425781, 137.6925811767578, 165.2232666015625, 761.3951416015625, 98.75868225097656, 150.20562744140625, 74.16737365722656, 287.15179443359375, 498.9560546875, 534.2427978515625, -29.155872344970703, 332.5841064453125, -71.79706573486328, 67.64625549316406, -69.41796875, 5.409511566162109, 636.2422485351562, 154.50221252441406, 202.06695556640625, 145.51148986816406, 248.57232666015625, 222.18455505371094, 131.63174438476562, 264.00634765625, 18.622520446777344, -50.21302795410156, 308.889404296875, 12.82379150390625, 180.12303161621094, 316.009765625, 4.9869384765625, 257.0393371582031, 177.86663818359375, 120.48826599121094, 663.7533569335938, 189.79229736328125, 64.48092651367188, 100.72431945800781, 110.76313018798828, -5.079460144042969, 351.226318359375, -15.240583419799805, 232.9541015625, -342.66015625, -30.074420928955078, 171.5684051513672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000268.npy"} +{"epoch": 0.3935389133627019, "step": 269, "batch_size": 64, "mean": 173.07493591308594, "std": 224.96292114257812, "min": -403.6544494628906, "p10": -79.86783905029297, "median": 171.82573699951172, "p90": 451.83419799804693, "max": 705.7399291992188, "pos_frac": 0.75, "sample": [135.76829528808594, 328.0625915527344, 150.906494140625, 276.78497314453125, -91.59209442138672, 197.3472900390625, 296.56182861328125, 153.51156616210938, -80.31503295898438, -42.35425567626953, 294.8567810058594, -3.4139404296875, 455.2379455566406, 11.08404541015625, 306.2744140625, 284.49176025390625, -12.938575744628906, 190.13990783691406, -61.09715270996094, -403.6544494628906, 291.9317626953125, 17.554279327392578, 211.8466796875, -227.75048828125, -78.82438659667969, 383.51812744140625, 15.948097229003906, 631.5399169921875, 499.08880615234375, 211.79437255859375, 152.035888671875, 57.03070068359375, 521.4175415039062, 301.6248779296875, -16.721878051757812, 325.0858154296875, 441.9279479980469, 99.86552429199219, 345.00714111328125, 2.009521484375, 1.3404617309570312, 145.3451385498047, -194.75682067871094, 28.633453369140625, 123.02510070800781, 589.0582275390625, 302.076904296875, 513.9859008789062, 272.2279357910156, -24.955825805664062, 320.3723449707031, 263.464599609375, -8.49310302734375, 705.7399291992188, -126.65951538085938, 443.8921203613281, 3.78778076171875, 44.22191619873047, -39.8879508972168, 300.42266845703125, 426.9295654296875, 404.3490905761719, -179.2135467529297, 190.29705810546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000269.npy"} +{"epoch": 0.39500734214390604, "step": 270, "batch_size": 64, "mean": 125.37371826171875, "std": 195.267578125, "min": -424.5915222167969, "p10": -75.3889404296875, "median": 116.11680221557617, "p90": 351.4475830078125, "max": 710.8721923828125, "pos_frac": 0.8125, "sample": [213.49050903320312, 324.4560546875, 433.050048828125, -270.1706237792969, -424.5915222167969, -2.4582977294921875, 33.747093200683594, 251.37808227539062, 109.73345184326172, 301.5246887207031, 346.467041015625, -46.357757568359375, 128.18167114257812, 185.21763610839844, 2.598724365234375, 62.35668182373047, 92.81551361083984, 131.7134552001953, -148.51736450195312, 220.39639282226562, 287.56103515625, 152.34478759765625, 7.183174133300781, 226.25131225585938, 435.13671875, 279.1471252441406, 16.0687255859375, 103.25562286376953, 208.38418579101562, 116.54984283447266, -284.4937744140625, 366.3900146484375, 710.8721923828125, -78.69618225097656, -93.5965576171875, 250.34417724609375, 349.0474853515625, 31.66167640686035, 110.95048522949219, 14.49648666381836, 552.08203125, -214.019287109375, 155.14202880859375, 21.512447357177734, 150.26736450195312, 115.68376159667969, 29.921005249023438, 232.8250732421875, -67.67204284667969, -18.245342254638672, 210.64404296875, 287.8501281738281, 54.15723419189453, 18.979536056518555, 23.018753051757812, 118.53512573242188, 6.858795166015625, 56.10227966308594, -20.51593780517578, 352.4761962890625, 160.82582092285156, 140.12368774414062, 62.31107711791992, 441.16412353515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000270.npy"} +{"epoch": 0.3964757709251101, "step": 271, "batch_size": 64, "mean": 154.854248046875, "std": 176.26657104492188, "min": -130.7291259765625, "p10": -23.587857246398926, "median": 107.76160430908203, "p90": 415.420281982422, "max": 675.0081176757812, "pos_frac": 0.796875, "sample": [675.0081176757812, 357.60205078125, 104.12223052978516, -15.608837127685547, 215.2474822998047, -28.359912872314453, 356.2643127441406, 373.6837463378906, 34.68073272705078, -42.77631378173828, 284.71368408203125, -130.7291259765625, 429.923828125, -107.09333038330078, 86.51018524169922, -23.96851348876953, 120.31210327148438, -10.504112243652344, 107.43443298339844, -3.117645263671875, -82.66830444335938, 14.318267822265625, 455.20318603515625, 337.8095703125, 487.6854248046875, 381.57867431640625, 79.35958099365234, -22.69965934753418, 84.39755249023438, 127.60295867919922, -8.095195770263672, 187.80638122558594, 99.607666015625, 8.186759948730469, -83.55722045898438, 124.05462646484375, 62.34405517578125, 2.8976669311523438, 371.9088439941406, 74.34553527832031, 7.939216613769531, 519.7882690429688, 44.48902130126953, -14.30345344543457, 129.03709411621094, 40.934017181396484, 108.08877563476562, 144.23680114746094, 325.5187683105469, 195.17544555664062, 455.082275390625, 248.18905639648438, 104.40279388427734, 38.72621154785156, 441.29534912109375, 6.403358459472656, 231.80706787109375, 339.5733337402344, 65.37518310546875, 228.5391082763672, 159.1190185546875, 275.25445556640625, 139.83697509765625, 190.7328338623047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000271.npy"} +{"epoch": 0.39794419970631423, "step": 272, "batch_size": 64, "mean": 188.56448364257812, "std": 181.40406799316406, "min": -185.11183166503906, "p10": -59.958739471435536, "median": 192.63511657714844, "p90": 416.173486328125, "max": 618.6817626953125, "pos_frac": 0.828125, "sample": [294.76263427734375, 234.24520874023438, -13.40165901184082, -105.617919921875, -26.178146362304688, 325.1820068359375, 226.0143280029297, 417.9644775390625, 422.44244384765625, 304.9771728515625, 321.5752868652344, 383.8498229980469, -69.83806610107422, 157.05914306640625, -164.62213134765625, 66.14385986328125, 89.56023406982422, 192.1554718017578, 179.34030151367188, 154.4393310546875, -65.27957153320312, 284.50701904296875, 310.6584777832031, 207.2618408203125, 324.2593994140625, 121.3205337524414, 0.7661628723144531, 278.8727111816406, 113.37650299072266, 600.9951171875, 39.82518005371094, 186.1311798095703, 193.11476135253906, 154.12594604492188, 42.26127624511719, 376.5932922363281, 442.2773742675781, -185.11183166503906, 198.47845458984375, 228.08868408203125, 618.6817626953125, 17.333831787109375, 503.8885498046875, 143.9298553466797, 305.0562744140625, -78.80884552001953, 290.36328125, 36.9872932434082, 226.68431091308594, 379.0953063964844, -38.92066955566406, 208.36392211914062, 5.69561767578125, -98.77317810058594, -47.54346466064453, 285.28314208984375, 251.68408203125, 69.14067840576172, 400.126708984375, 147.94482421875, 174.119140625, 411.9945068359375, 446.9162292480469, 166.30767822265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000272.npy"} +{"epoch": 0.39941262848751835, "step": 273, "batch_size": 64, "mean": 185.6915283203125, "std": 198.50830078125, "min": -267.08868408203125, "p10": -71.54261245727538, "median": 170.5712661743164, "p90": 437.0334411621095, "max": 666.9568481445312, "pos_frac": 0.8125, "sample": [243.0594482421875, 303.793212890625, 65.52462768554688, 171.56478881835938, 570.9861450195312, 479.0562744140625, 154.61080932617188, -72.64087677001953, 598.6105346679688, -68.97999572753906, 409.9940185546875, 193.9339599609375, -32.481597900390625, 88.38262176513672, 8.242341995239258, 146.38629150390625, 2.7190208435058594, 199.184326171875, 195.15194702148438, 347.4300537109375, 217.3047637939453, 42.38676452636719, 66.658935546875, 169.57774353027344, 88.28874206542969, 71.23511505126953, 346.2392578125, 154.79708862304688, 167.20986938476562, 185.77096557617188, 314.34588623046875, -18.381431579589844, 370.7090148925781, 350.25384521484375, -22.953035354614258, 603.4659423828125, 538.0652465820312, 240.52879333496094, 176.68711853027344, 282.47344970703125, -163.37347412109375, -115.90032958984375, 276.4932861328125, -75.1389389038086, 257.4095458984375, 318.1490478515625, 357.4544677734375, 52.5849609375, 123.67601013183594, -267.08868408203125, 666.9568481445312, 132.36599731445312, 102.71464538574219, -80.10113525390625, -7.560821533203125, 338.8234558105469, 15.642044067382812, 99.16375732421875, 289.15325927734375, -73.56520080566406, 334.6429443359375, 115.54122161865234, 388.4008483886719, 448.62176513671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000273.npy"} +{"epoch": 0.4008810572687225, "step": 274, "batch_size": 64, "mean": 137.7598876953125, "std": 183.34228515625, "min": -219.91419982910156, "p10": -91.91508026123046, "median": 121.70694732666016, "p90": 415.1996582031252, "max": 512.55810546875, "pos_frac": 0.75, "sample": [56.073829650878906, 75.38705444335938, 36.75303649902344, -154.2250518798828, 222.56488037109375, 168.4761962890625, 224.73008728027344, -27.20258331298828, 117.23455047607422, -31.784698486328125, -125.10250854492188, 457.2338562011719, 315.91741943359375, 219.40509033203125, 233.03701782226562, 149.4137725830078, 20.687889099121094, 453.3565673828125, -219.91419982910156, 289.6108093261719, -86.30928039550781, 115.38624572753906, -62.534454345703125, 67.19436645507812, 61.55718231201172, 259.197998046875, 494.1496887207031, -94.31756591796875, 119.10091400146484, 512.1041259765625, -44.29933547973633, 119.18313598632812, 339.7173767089844, 147.3948974609375, 20.51873207092285, 148.42970275878906, 433.889892578125, -66.66073608398438, 162.10792541503906, 94.99742126464844, 282.1814880371094, -23.768692016601562, 118.59156799316406, 255.3621063232422, 368.74676513671875, -74.4572982788086, 124.23075866699219, 29.730947494506836, 169.90167236328125, 512.55810546875, 277.2360534667969, 116.39759826660156, 3.2948074340820312, 157.713623046875, -163.65869140625, 313.30322265625, -141.017822265625, -111.39535522460938, 506.2310791015625, 199.24777221679688, 124.969482421875, 371.589111328125, -12.210418701171875, 189.39413452148438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000274.npy"} +{"epoch": 0.4023494860499266, "step": 275, "batch_size": 64, "mean": 148.71762084960938, "std": 182.68162536621094, "min": -224.30813598632812, "p10": -69.08231048583983, "median": 146.82706451416016, "p90": 394.8623260498047, "max": 550.0008544921875, "pos_frac": 0.796875, "sample": [180.8595733642578, 62.78117370605469, 278.3246154785156, 8.906253814697266, 10.976568222045898, -78.91230010986328, 361.0611877441406, 134.6954803466797, 480.5027770996094, -82.46404266357422, 264.9060974121094, 460.7295227050781, 509.36474609375, 81.54816436767578, 158.53036499023438, 223.3612823486328, -32.612552642822266, 3.49493408203125, 132.8076171875, 189.7936553955078, -134.96673583984375, 227.59458923339844, 400.3073425292969, -16.92858123779297, 51.346229553222656, -191.21646118164062, 284.47027587890625, 165.28369140625, 176.34072875976562, 77.09581756591797, -94.55846405029297, 26.402297973632812, 332.9693603515625, 246.36312866210938, 5.381589889526367, 20.690444946289062, -195.15786743164062, -41.610504150390625, -39.41761016845703, 34.93913269042969, 75.93008422851562, 280.30377197265625, -224.30813598632812, 168.33033752441406, 248.24082946777344, 382.15728759765625, 207.084716796875, 550.0008544921875, 179.4649200439453, 461.6917724609375, 436.22149658203125, -22.933549880981445, 243.96884155273438, 365.3583984375, 61.578468322753906, 297.5687561035156, 135.12376403808594, 343.0783996582031, -46.145668029785156, 204.41018676757812, 35.630638122558594, 313.825439453125, 82.385498046875, 54.977630615234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000275.npy"} +{"epoch": 0.40381791483113066, "step": 276, "batch_size": 64, "mean": 148.5008544921875, "std": 199.32806396484375, "min": -240.4735107421875, "p10": -91.3485458374023, "median": 154.88316345214844, "p90": 401.8749908447267, "max": 798.1087036132812, "pos_frac": 0.765625, "sample": [110.11258697509766, 798.1087036132812, 152.4369659423828, 416.10321044921875, 491.05255126953125, -3.3592166900634766, -5.4404296875, 512.327880859375, 14.285078048706055, 152.65284729003906, 157.1134796142578, 29.93506622314453, -26.13861083984375, 234.59878540039062, 273.60699462890625, 240.868408203125, 194.25009155273438, -228.29946899414062, 151.9206085205078, 15.549747467041016, 34.78080368041992, 302.7705078125, 179.6354217529297, 289.51837158203125, -108.75321960449219, -240.4735107421875, -31.767902374267578, 87.13804626464844, 217.3872833251953, 338.6398010253906, -50.737640380859375, 184.4895477294922, 58.93163299560547, 192.88998413085938, 332.01312255859375, 281.4653015136719, -143.16726684570312, -221.6210479736328, 1.725320816040039, -18.403568267822266, -126.37509155273438, -30.46820068359375, 57.85944747924805, 368.6758117675781, 75.3163833618164, 164.64642333984375, 187.0540008544922, 217.7423858642578, 267.1458740234375, 59.67255401611328, 438.02105712890625, -36.45066833496094, 165.6609649658203, 276.7261047363281, 76.0560302734375, 239.0835723876953, 222.8949432373047, 178.03839111328125, 361.67327880859375, 77.90145874023438, -147.16482543945312, 589.164794921875, 36.45501708984375, 416.5782470703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000276.npy"} +{"epoch": 0.4052863436123348, "step": 277, "batch_size": 64, "mean": 156.3212890625, "std": 202.14923095703125, "min": -280.46435546875, "p10": -106.0804618835449, "median": 141.84002685546875, "p90": 459.7262786865235, "max": 606.288818359375, "pos_frac": 0.765625, "sample": [-0.16156387329101562, 371.94525146484375, -153.56715393066406, 139.07012939453125, 53.787776947021484, 248.94268798828125, 351.29632568359375, 144.60992431640625, -77.04188537597656, 187.26708984375, 249.4940185546875, -42.728248596191406, 335.6636962890625, -43.566383361816406, 240.3054962158203, 355.947021484375, 70.01085662841797, 144.72317504882812, 198.5430450439453, 307.87103271484375, -118.52556610107422, 357.3395080566406, 95.62901306152344, 81.1429214477539, -186.00497436523438, -26.552955627441406, 492.05841064453125, 10.026153564453125, -19.600921630859375, 94.82061767578125, -280.46435546875, 496.22528076171875, 164.66741943359375, 300.0782165527344, -245.7745361328125, -163.30477905273438, 606.288818359375, 276.660888671875, 146.59463500976562, -6.49932861328125, 231.51580810546875, 15.806838989257812, 499.90234375, 55.15272903442383, 11.77142333984375, 117.35230255126953, 217.69992065429688, 382.5531005859375, 449.82611083984375, 475.45867919921875, -152.5955810546875, 277.1455993652344, 77.9637222290039, 75.61945343017578, 11.692062377929688, -0.7522087097167969, 255.6286163330078, 118.75177764892578, 463.9692077636719, 474.025634765625, 117.49179077148438, 176.3357391357422, 99.58954620361328, 395.4415588378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000277.npy"} +{"epoch": 0.4067547723935389, "step": 278, "batch_size": 64, "mean": 178.59414672851562, "std": 198.81155395507812, "min": -138.64137268066406, "p10": -49.43115043640135, "median": 138.73369598388672, "p90": 472.77458190917974, "max": 608.4790649414062, "pos_frac": 0.765625, "sample": [-117.75872802734375, 398.9120788574219, -82.77291107177734, 49.12741470336914, 285.5068054199219, 335.02313232421875, 203.69937133789062, 60.5097770690918, 153.7920684814453, 107.95276641845703, 237.47396850585938, 201.20957946777344, 23.91400718688965, -24.451080322265625, 114.2203369140625, 284.51104736328125, -25.427993774414062, 326.03228759765625, 460.5507507324219, 47.22264862060547, -10.72227668762207, 574.4598999023438, 401.74066162109375, 205.888427734375, 241.52151489257812, 327.83001708984375, 104.49613189697266, -11.337882995605469, 151.67013549804688, 531.5180053710938, 441.2208557128906, -67.77760314941406, 113.91848754882812, -25.831298828125, 567.2404174804688, 478.01336669921875, 17.559959411621094, -20.48175048828125, 142.90863037109375, 228.05056762695312, 103.00501251220703, 350.76019287109375, 191.37432861328125, 68.58221435546875, 40.358245849609375, 531.065673828125, 123.2339859008789, 255.87709045410156, -54.52011489868164, 580.3096313476562, 184.11708068847656, 134.5587615966797, 47.227783203125, 28.72173309326172, 340.1633605957031, 299.043212890625, -37.55690002441406, -18.454885482788086, 608.4790649414062, -138.64137268066406, 424.0128479003906, 132.88394165039062, -125.80441284179688, -69.90455627441406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000278.npy"} +{"epoch": 0.40822320117474303, "step": 279, "batch_size": 64, "mean": 172.298583984375, "std": 230.1483154296875, "min": -350.374755859375, "p10": -63.445133972167966, "median": 148.60889434814453, "p90": 472.3817047119141, "max": 849.2435302734375, "pos_frac": 0.765625, "sample": [67.76362609863281, 74.95360565185547, 30.135242462158203, 43.777374267578125, -0.896820068359375, -60.413352966308594, 407.0142822265625, 475.3942565917969, -3.7512893676757812, -53.382564544677734, 3.9839401245117188, 238.2412872314453, 171.78192138671875, 36.27268981933594, 493.3965148925781, 590.1666259765625, 66.14442443847656, 179.245361328125, 277.21478271484375, 275.0309143066406, 656.4932861328125, 77.21825408935547, 13.748266220092773, -246.57485961914062, 62.20872497558594, 59.07404327392578, -20.393310546875, 166.079833984375, 86.96456909179688, 539.8953857421875, 108.95269775390625, 41.966331481933594, 218.6536865234375, -64.74446868896484, 267.8547668457031, 465.3524169921875, 244.35891723632812, 849.2435302734375, 496.724365234375, 304.5892333984375, -43.29637145996094, -43.8328857421875, -154.31411743164062, -350.374755859375, 156.41586303710938, 430.5660705566406, 94.60565185546875, 226.6034698486328, 378.18255615234375, 140.8019256591797, 397.6475830078125, 334.00115966796875, 70.3415298461914, -92.09683227539062, 464.1505432128906, 217.89068603515625, 458.13177490234375, 245.56285095214844, -118.046630859375, -229.05006408691406, 357.55072021484375, 208.07559204101562, -19.402912139892578, 257.2574462890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000279.npy"} +{"epoch": 0.40969162995594716, "step": 280, "batch_size": 64, "mean": 185.492431640625, "std": 194.63612365722656, "min": -308.223876953125, "p10": -72.43897247314453, "median": 174.5429916381836, "p90": 417.4059112548828, "max": 604.3931274414062, "pos_frac": 0.765625, "sample": [118.63014221191406, 305.63275146484375, 160.64700317382812, 235.6936492919922, -12.717697143554688, -44.97248077392578, 398.3603210449219, 503.082275390625, 498.22747802734375, 51.29278564453125, 233.38856506347656, -15.259635925292969, -60.14686965942383, 417.9215087890625, 416.2028503417969, 393.048583984375, 135.2284698486328, 281.742919921875, 159.54420471191406, 303.5212097167969, 108.21922302246094, 512.1751708984375, 402.62890625, 220.0813751220703, 231.12576293945312, 91.76763916015625, 371.16424560546875, 325.71435546875, -3.95037841796875, 235.08001708984375, 155.00527954101562, 153.312744140625, 523.2154541015625, 128.55453491210938, -65.65713500976562, 170.85594177246094, -142.772216796875, 387.8056945800781, 258.59930419921875, 60.684608459472656, 265.2461242675781, 122.33264923095703, -85.05125427246094, 604.3931274414062, -92.49519348144531, -308.223876953125, 124.9289321899414, 477.9390563964844, 178.23004150390625, 134.55645751953125, 190.9436492919922, 408.7120361328125, -75.34547424316406, 179.3138885498047, -33.552337646484375, -90.65935516357422, 201.3316650390625, 159.75511169433594, 82.36740112304688, 347.0078125, 353.44146728515625, -149.99917602539062, -28.523117065429688, 302.18768310546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000280.npy"} +{"epoch": 0.4111600587371512, "step": 281, "batch_size": 64, "mean": 167.29629516601562, "std": 194.29412841796875, "min": -629.353515625, "p10": -28.54589118957517, "median": 161.98432159423828, "p90": 409.01147460937506, "max": 592.7565307617188, "pos_frac": 0.875, "sample": [125.77974700927734, 292.70343017578125, 137.98345947265625, 242.09486389160156, 58.08340835571289, 224.92501831054688, 24.588912963867188, 186.66796875, 174.1100616455078, 52.51628875732422, 256.19085693359375, 385.32244873046875, 267.64166259765625, 69.04297637939453, 245.5022735595703, -83.04705810546875, 186.02548217773438, 537.2951049804688, 66.30070495605469, 149.85858154296875, 34.186553955078125, 391.8079833984375, 284.5826416015625, 592.7565307617188, 234.46920776367188, 291.2095642089844, 181.41004943847656, 326.05999755859375, -39.38411331176758, 584.6779174804688, 300.3790588378906, 87.83978271484375, 204.10757446289062, 416.3843994140625, 188.77728271484375, -629.353515625, 91.34663391113281, 59.13887023925781, 42.09966278076172, 77.45037841796875, 494.80670166015625, 19.094772338867188, 8.884353637695312, 270.07952880859375, 176.733642578125, 252.34686279296875, 48.47803497314453, 105.78977966308594, 496.4046630859375, 87.28524017333984, -91.88285064697266, 135.74334716796875, -143.2393341064453, 90.10919189453125, -50.03070068359375, -3.2567062377929688, -58.462646484375, 380.2032470703125, 233.63185119628906, 81.97969818115234, 145.062255859375, 280.3582763671875, 7.378416061401367, 419.9326477050781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000281.npy"} +{"epoch": 0.41262848751835535, "step": 282, "batch_size": 64, "mean": 129.69415283203125, "std": 220.09547424316406, "min": -235.2471923828125, "p10": -110.71448211669922, "median": 114.64042282104492, "p90": 364.25322570800785, "max": 1235.080810546875, "pos_frac": 0.765625, "sample": [459.23974609375, 156.32591247558594, 66.79872131347656, -206.27703857421875, 183.8607635498047, 241.634033203125, 99.785888671875, 22.133865356445312, 128.552978515625, 372.37701416015625, 261.7898254394531, 219.27899169921875, 364.8630676269531, 100.61710357666016, -35.76756286621094, 226.462158203125, 1235.080810546875, 170.59103393554688, 177.72805786132812, 116.86187744140625, 147.27734375, 224.56887817382812, 14.319927215576172, -151.60720825195312, 511.37890625, 390.36334228515625, 66.66830444335938, 162.27696228027344, 154.55300903320312, 509.34466552734375, 283.71612548828125, 198.72122192382812, 279.8090515136719, 7.8235321044921875, -30.248096466064453, 151.02963256835938, -97.92832946777344, -104.9188461303711, 66.74755859375, -143.0244140625, -23.3079833984375, 37.09783935546875, 112.4189682006836, 137.84207153320312, 258.0120849609375, 7.279884338378906, -130.80276489257812, 344.9171142578125, 216.57867431640625, 123.62161254882812, -82.5706558227539, -68.06724548339844, 25.554885864257812, 52.39822769165039, 90.89976501464844, -171.10833740234375, 279.40850830078125, 24.112897872924805, -113.19832611083984, 362.83026123046875, -235.2471923828125, 40.75852584838867, -31.7314395904541, 39.91947937011719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000282.npy"} +{"epoch": 0.41409691629955947, "step": 283, "batch_size": 64, "mean": 200.64541625976562, "std": 209.287353515625, "min": -440.1202392578125, "p10": -47.55516815185546, "median": 190.51596069335938, "p90": 488.37937622070314, "max": 636.6910400390625, "pos_frac": 0.828125, "sample": [-63.71481704711914, -75.8042221069336, 224.1058807373047, 26.919340133666992, 378.6461181640625, 333.47235107421875, -57.42376708984375, 240.3588104248047, 401.9041442871094, 38.785911560058594, 63.139915466308594, 142.63206481933594, -24.872737884521484, 85.49452209472656, 104.36893463134766, 224.0353546142578, -440.1202392578125, 62.1422233581543, 418.45538330078125, -57.86803436279297, 502.0285949707031, 455.16278076171875, 191.0099334716797, 329.1278076171875, 47.3942985534668, 164.17398071289062, -50.47911071777344, 489.579833984375, 350.22735595703125, 508.91741943359375, 47.326629638671875, 290.2384338378906, -155.2294158935547, 205.80142211914062, 477.19366455078125, 77.07432556152344, 84.67062377929688, -8.969532012939453, 83.4926528930664, -40.732635498046875, 540.6580200195312, 190.02198791503906, 327.6020202636719, 213.023681640625, 284.1288757324219, 178.79779052734375, 92.29033660888672, 223.27841186523438, 427.84088134765625, 322.4447326660156, 580.5191650390625, 16.583396911621094, 375.35223388671875, 176.249267578125, 66.38797760009766, 293.7470397949219, 548.047607421875, 201.87123107910156, 407.06817626953125, 636.6910400390625, 485.57830810546875, 113.18860626220703, 101.7398452758789, -34.47096252441406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000283.npy"} +{"epoch": 0.4155653450807636, "step": 284, "batch_size": 64, "mean": 183.906494140625, "std": 199.7742156982422, "min": -204.06301879882812, "p10": -38.084731292724605, "median": 165.7076187133789, "p90": 452.7003204345704, "max": 753.590576171875, "pos_frac": 0.875, "sample": [-88.16464233398438, 107.01260375976562, 318.18798828125, 39.531494140625, 104.47791290283203, 49.69678497314453, 341.09991455078125, 18.34429168701172, 257.10211181640625, 428.981689453125, 363.05035400390625, -54.793582916259766, 462.8654479980469, 27.682764053344727, 279.4590148925781, 56.9095344543457, 32.181312561035156, 245.62960815429688, 312.87445068359375, 504.6548767089844, 56.710693359375, 649.7744750976562, 173.08306884765625, 286.5901184082031, 141.2330322265625, 21.864486694335938, 158.33216857910156, 89.3819580078125, 287.9736328125, -36.34788513183594, 26.752334594726562, 697.5629272460938, 189.81561279296875, -56.190330505371094, 207.50711059570312, 91.4444580078125, -83.7505874633789, 65.54064178466797, 218.88592529296875, 753.590576171875, 73.52458190917969, 186.58438110351562, 194.35745239257812, 26.055543899536133, -89.5816650390625, -204.06301879882812, 295.63372802734375, 301.1319274902344, 131.0217742919922, 252.95802307128906, 568.8989868164062, 177.70782470703125, 306.5960693359375, 226.1791229248047, 38.315673828125, 10.938159942626953, -38.82909393310547, 6.492042541503906, 371.55181884765625, 248.9883270263672, 27.838600158691406, 507.0194091796875, 28.092918395996094, 376.0650329589844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000284.npy"} +{"epoch": 0.4170337738619677, "step": 285, "batch_size": 64, "mean": 157.22454833984375, "std": 190.25119018554688, "min": -491.768310546875, "p10": -59.79582595825194, "median": 149.9861068725586, "p90": 380.6869567871094, "max": 598.0444946289062, "pos_frac": 0.84375, "sample": [366.9393310546875, 245.19161987304688, 45.83740997314453, 386.57879638671875, 124.6105728149414, 122.54119873046875, 140.64175415039062, 82.35968780517578, 160.15440368652344, 559.155517578125, 293.55908203125, 152.04359436035156, 320.9173278808594, 116.49689483642578, 129.90296936035156, 552.4676513671875, -17.5018310546875, 109.35696411132812, 145.4490966796875, 238.41854858398438, 422.2738342285156, -233.79879760742188, 57.888389587402344, 309.1543273925781, 411.6089782714844, 6.212409973144531, -65.2474365234375, 92.96881103515625, 92.85760498046875, 46.156898498535156, 257.07513427734375, 208.6230926513672, -93.93180084228516, 54.15511703491211, 274.8907775878906, 321.906494140625, 23.084388732910156, -491.768310546875, 239.83303833007812, 35.733673095703125, 159.1700897216797, 1.447174072265625, 598.0444946289062, 272.8970642089844, -47.075401306152344, 308.72442626953125, 230.20782470703125, 133.74154663085938, 229.98037719726562, 331.4783630371094, 168.12527465820312, -141.86131286621094, 288.7049255371094, 191.95315551757812, -165.10182189941406, -32.42282485961914, 400.199462890625, 90.18489074707031, 95.83971405029297, 203.7449188232422, -157.7315673828125, 328.373046875, 181.020751953125, 147.92861938476562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000285.npy"} +{"epoch": 0.4185022026431718, "step": 286, "batch_size": 64, "mean": 177.40138244628906, "std": 222.14573669433594, "min": -350.8638610839844, "p10": -93.3379135131836, "median": 183.7704315185547, "p90": 477.2913330078125, "max": 662.489013671875, "pos_frac": 0.78125, "sample": [62.529563903808594, -80.54180145263672, 503.5749816894531, 173.8790740966797, 167.796875, 217.8358154296875, 31.85314178466797, 210.5685272216797, 324.7898254394531, -350.8638610839844, 296.3067932128906, 248.96255493164062, 40.22050476074219, -67.7086410522461, 411.9643859863281, 193.3760986328125, 559.6024169921875, 6.363616943359375, 205.45101928710938, 175.8138427734375, 481.42486572265625, 57.065826416015625, -297.5858154296875, 450.38482666015625, 387.39202880859375, -124.61956787109375, -95.57379150390625, 348.33856201171875, 315.0783386230469, 549.125244140625, 502.16497802734375, -152.009033203125, 214.718994140625, 105.86074829101562, -33.59260559082031, 78.39955139160156, 374.8265380859375, 143.9893035888672, 46.812225341796875, -80.98204040527344, 161.4745330810547, -68.78874969482422, 193.591064453125, -88.12086486816406, 112.39724731445312, 400.15289306640625, 457.57806396484375, 109.7510986328125, -119.95697784423828, 583.429931640625, -69.83438873291016, 244.05789184570312, 69.35159301757812, 467.64642333984375, 64.6923828125, 310.087890625, 191.72702026367188, 241.3475341796875, 271.276611328125, -141.63931274414062, 662.489013671875, 385.9985046386719, 217.76791381835938, 94.2166519165039], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000286.npy"} +{"epoch": 0.4199706314243759, "step": 287, "batch_size": 64, "mean": 148.47613525390625, "std": 196.89947509765625, "min": -267.3529357910156, "p10": -59.70682525634764, "median": 131.13842010498047, "p90": 442.34926147460953, "max": 718.28466796875, "pos_frac": 0.796875, "sample": [136.48963928222656, 20.35992431640625, 8.866674423217773, 68.73320770263672, 129.18626403808594, -30.019378662109375, 304.0462341308594, 591.626220703125, 166.8810272216797, 144.41033935546875, 515.653076171875, 91.28474426269531, 196.0498504638672, 513.1119995117188, -47.597015380859375, -64.89674377441406, 36.81847381591797, 517.5424194335938, 140.44647216796875, 326.54486083984375, 177.93356323242188, 6.81085205078125, 139.8153076171875, -267.3529357910156, 718.28466796875, 111.9686279296875, 59.774757385253906, -31.378173828125, -35.97633361816406, -16.066871643066406, 133.090576171875, 4.832298278808594, -145.35842895507812, 107.88999938964844, 55.9699592590332, 232.47129821777344, 64.88357543945312, 237.05711364746094, 135.5165252685547, 256.73236083984375, -10.785707473754883, 291.3516845703125, 183.78646850585938, 106.98146057128906, 26.793319702148438, 125.89651489257812, 78.13018798828125, 17.563339233398438, 459.30224609375, 397.2320556640625, -170.59042358398438, 58.777976989746094, 223.677978515625, 136.9819793701172, 369.6527404785156, -178.75363159179688, -150.8481903076172, -71.11907196044922, 239.09210205078125, 306.8487854003906, 476.3111572265625, 402.79229736328125, 215.4197235107422, 255.54013061523438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000287.npy"} +{"epoch": 0.42143906020558003, "step": 288, "batch_size": 64, "mean": 185.25796508789062, "std": 195.3400115966797, "min": -290.50347900390625, "p10": -2.6403162002563363, "median": 171.15890502929688, "p90": 416.2668395996094, "max": 685.1873779296875, "pos_frac": 0.890625, "sample": [223.82962036132812, 177.17922973632812, -7.433223724365234, 189.25721740722656, 305.920166015625, 417.22314453125, 8.543134689331055, 685.1873779296875, 53.74885559082031, 378.5315246582031, 262.4872131347656, 103.56478881835938, -78.0883560180664, 15.50238037109375, 82.44033813476562, 156.2942657470703, 131.48655700683594, 374.8249816894531, 272.1251220703125, 350.6325988769531, 172.90869140625, 280.3168029785156, 274.87646484375, 13.69830322265625, 88.97636413574219, 238.91061401367188, 238.47409057617188, 60.52326202392578, 169.40911865234375, 72.45606994628906, 184.31570434570312, 227.55450439453125, 555.0787963867188, 159.02679443359375, 414.03546142578125, 288.548095703125, 157.77333068847656, 15.21038818359375, 184.53268432617188, 89.0013198852539, -286.8145751953125, 583.70361328125, 109.8653564453125, 302.239501953125, 74.44313049316406, 168.27584838867188, 73.34488677978516, 51.54450988769531, 582.65576171875, 109.68157196044922, 215.75282287597656, 570.3743286132812, 208.38638305664062, -290.50347900390625, 65.67807006835938, 499.764404296875, 275.6109619140625, 39.01622009277344, 64.14984130859375, 361.5322570800781, -13.979255676269531, 385.32861328125, -199.57899475097656, -82.84548950195312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000288.npy"} +{"epoch": 0.42290748898678415, "step": 289, "batch_size": 64, "mean": 209.41796875, "std": 222.99002075195312, "min": -173.83245849609375, "p10": -47.55567016601562, "median": 149.1488037109375, "p90": 527.4105834960939, "max": 698.21826171875, "pos_frac": 0.859375, "sample": [165.62765502929688, -50.983917236328125, 611.8580932617188, 668.6036376953125, -155.6293487548828, 566.8831787109375, 430.0162353515625, 318.1545104980469, 81.8175048828125, 597.5146484375, 479.4252014160156, 416.9608154296875, 263.205078125, 506.26873779296875, 367.5694580078125, 274.52789306640625, 38.16087341308594, 698.21826171875, -37.98882293701172, 269.8521728515625, 127.44149780273438, 48.328094482421875, 138.14620971679688, 10.970203399658203, -97.61892700195312, 21.083309173583984, 286.50579833984375, 437.3718566894531, 65.61373901367188, 144.59014892578125, 438.8018493652344, 626.5111694335938, 388.0589904785156, -55.29606246948242, 37.798866271972656, 25.717575073242188, 536.4713745117188, 39.37666702270508, -59.80857849121094, 15.617231369018555, 111.83062744140625, 482.66357421875, -39.556427001953125, 237.87728881835938, 180.23486328125, 415.82684326171875, 429.0880432128906, 2.116395950317383, 140.7063751220703, -110.87483215332031, 141.2711944580078, 153.70745849609375, 99.38119506835938, 35.98353576660156, 469.50048828125, 178.25653076171875, 37.795135498046875, 27.35222625732422, 101.30758666992188, 223.73373413085938, 259.9410095214844, -173.83245849609375, 250.17813110351562, 62.51903533935547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000289.npy"} +{"epoch": 0.4243759177679883, "step": 290, "batch_size": 64, "mean": 210.49253845214844, "std": 255.2123565673828, "min": -334.87432861328125, "p10": -68.56448745727539, "median": 194.30931854248047, "p90": 596.8032104492188, "max": 951.947509765625, "pos_frac": 0.828125, "sample": [510.72784423828125, 171.73513793945312, 106.86881256103516, 164.0107421875, -334.87432861328125, 378.2066345214844, 439.897216796875, -70.02755737304688, 28.117324829101562, 130.7261505126953, -197.12631225585938, 377.3943786621094, 241.20306396484375, 40.586090087890625, 233.1905517578125, 225.98214721679688, 551.0110473632812, -33.783111572265625, 223.57070922851562, 19.93472671508789, 59.17958068847656, 71.54197692871094, -185.48138427734375, 157.08740234375, 114.005126953125, -255.63897705078125, 167.04766845703125, 319.8685302734375, 951.947509765625, 685.9820556640625, 71.01557922363281, 231.35072326660156, 301.24591064453125, 618.7108154296875, 193.0584716796875, 65.74165344238281, 293.5506286621094, 110.63932800292969, 336.5488586425781, 239.0670166015625, 37.65618896484375, 213.2917938232422, 195.56016540527344, 298.94635009765625, 35.90324401855469, 278.82928466796875, -65.1506576538086, -23.727327346801758, 323.89697265625, -128.10354614257812, 686.8070068359375, 394.33538818359375, 251.70712280273438, 588.070556640625, 14.839183807373047, 636.1763916015625, 600.5457763671875, 277.8836364746094, -1.3515510559082031, 52.053924560546875, 68.85415649414062, 822.058837890625, 240.67117309570312, -82.05105590820312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000290.npy"} +{"epoch": 0.42584434654919234, "step": 291, "batch_size": 64, "mean": 190.36480712890625, "std": 222.8323211669922, "min": -414.1673889160156, "p10": -52.285391235351554, "median": 199.96786499023438, "p90": 447.607211303711, "max": 649.671875, "pos_frac": 0.765625, "sample": [207.56236267089844, 192.3733673095703, 9.485671997070312, 25.333139419555664, -9.396743774414062, 277.26031494140625, 620.956298828125, 293.1699523925781, 472.696044921875, 283.8470153808594, 73.63954162597656, 416.60577392578125, 268.3681640625, -26.955223083496094, 649.671875, -45.56121826171875, 435.82672119140625, 242.6634063720703, -18.06292152404785, 242.4158172607422, 442.97216796875, -1.8536529541015625, 309.794677734375, 90.62240600585938, 267.83062744140625, 420.1885986328125, 140.40750122070312, 190.1298065185547, 449.5936584472656, 29.811626434326172, 307.31719970703125, 113.4247817993164, 339.29327392578125, -273.3763427734375, 55.31494140625, -140.86862182617188, -45.943382263183594, 53.108299255371094, -414.1673889160156, 580.5804443359375, 364.5389404296875, 434.7288513183594, 395.58587646484375, 225.40789794921875, -84.69845581054688, -36.175201416015625, -79.02987670898438, 373.65618896484375, 51.40388488769531, 151.10043334960938, 82.26643371582031, 171.12991333007812, 360.269287109375, 311.1983947753906, -11.724456787109375, 218.84579467773438, 417.69732666015625, 70.65754699707031, 538.5852661132812, -55.003395080566406, -151.57693481445312, 373.30108642578125, 533.1115112304688, 1.9916973114013672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000291.npy"} +{"epoch": 0.42731277533039647, "step": 292, "batch_size": 64, "mean": 203.92965698242188, "std": 262.3307800292969, "min": -653.1588134765625, "p10": -71.30237159729002, "median": 199.55672454833984, "p90": 542.2282531738281, "max": 807.18115234375, "pos_frac": 0.796875, "sample": [237.5003662109375, 30.86966323852539, -7.5624542236328125, 195.82589721679688, 317.9940490722656, 227.86483764648438, 112.8507080078125, 119.46868133544922, 203.20809936523438, -653.1588134765625, 734.9251708984375, 50.059532165527344, 609.6227416992188, 142.612060546875, 339.08941650390625, -82.16793823242188, -269.19940185546875, 452.3919372558594, 60.80487823486328, 325.150634765625, 3.5456924438476562, 756.5081787109375, 412.9684143066406, 218.63174438476562, 451.8395080566406, 541.841552734375, 275.34552001953125, 479.6505126953125, 176.10845947265625, 135.09732055664062, -160.02877807617188, 248.7496337890625, 10.016939163208008, 539.6542358398438, -11.475959777832031, 136.63919067382812, -93.85385131835938, 169.39744567871094, 113.18858337402344, 117.67961120605469, 36.25669860839844, 542.3939819335938, 221.66635131835938, 195.9053497314453, 217.49310302734375, 239.7931365966797, 407.8848571777344, 368.833740234375, 807.18115234375, -28.389284133911133, 183.61111450195312, 322.7503967285156, 242.30760192871094, -224.2810516357422, 254.1982879638672, -195.85150146484375, 728.7909545898438, -45.94938278198242, -40.201656341552734, 571.8243408203125, 211.1774139404297, 236.42861938476562, 138.09657287597656, -10.076700210571289], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000292.npy"} +{"epoch": 0.4287812041116006, "step": 293, "batch_size": 64, "mean": 167.72970581054688, "std": 258.4353332519531, "min": -333.13763427734375, "p10": -142.8237518310547, "median": 144.8123779296875, "p90": 476.6916961669922, "max": 999.3621826171875, "pos_frac": 0.734375, "sample": [39.07621765136719, 44.9227294921875, 21.893896102905273, 78.24738311767578, 240.82388305664062, 224.21034240722656, 492.1089782714844, 176.93226623535156, 490.11895751953125, 49.87744903564453, 476.8979187011719, -83.20875549316406, 253.179931640625, 15.813236236572266, 999.3621826171875, -260.2066345214844, 295.3479309082031, 182.93704223632812, 107.09255981445312, 304.5707702636719, 391.7350769042969, -144.171142578125, 301.31329345703125, -61.66657257080078, 44.33599853515625, 81.55152130126953, -124.93732452392578, 164.0971221923828, -57.56797790527344, 523.9264526367188, 452.10394287109375, 354.4917297363281, 344.01806640625, 439.4097595214844, 342.1193542480469, -164.2631378173828, 80.81907653808594, 164.53564453125, 231.27407836914062, -39.42717742919922, -110.25493621826172, 81.82950592041016, 7.10174560546875, 258.8061828613281, 324.27166748046875, 398.6826477050781, 256.9227294921875, 329.8837890625, 125.52763366699219, -333.13763427734375, -22.935333251953125, -250.71070861816406, -147.28651428222656, 476.21051025390625, 896.2132568359375, 119.92598724365234, -139.67984008789062, 181.48199462890625, 76.94673156738281, -105.44978332519531, -151.26434326171875, 403.4626159667969, 585.0216674804688, -0.5649662017822266], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000293.npy"} +{"epoch": 0.4302496328928047, "step": 294, "batch_size": 64, "mean": 180.9390106201172, "std": 252.9709930419922, "min": -326.4017639160156, "p10": -118.647622680664, "median": 113.79232788085938, "p90": 541.6396484375, "max": 899.999755859375, "pos_frac": 0.859375, "sample": [39.68280792236328, 80.87774658203125, 322.6628723144531, 426.7301330566406, 63.13330078125, 30.749187469482422, -152.62045288085938, 142.76907348632812, 5.89093017578125, 56.97296905517578, 18.896896362304688, 27.980026245117188, -51.82442855834961, 580.3214111328125, 36.766746520996094, 317.1308898925781, 180.03501892089844, 446.23388671875, 19.150196075439453, 173.93678283691406, 768.9276123046875, -171.1724090576172, 263.45452880859375, 60.641693115234375, 691.422607421875, 439.5895690917969, 315.69512939453125, 171.0049591064453, 191.03079223632812, -147.2861328125, 289.3864440917969, 322.6746520996094, 311.0080871582031, 899.999755859375, -182.20462036132812, -169.56752014160156, 699.9041748046875, 177.28858947753906, 523.8421630859375, 12.508552551269531, -326.4017639160156, -206.38674926757812, 274.36492919921875, 331.36566162109375, 636.3304443359375, 116.22959899902344, 34.69974136352539, 134.3026123046875, 531.3383178710938, 48.79432678222656, 329.8326416015625, 342.6609802246094, -21.495948791503906, 111.35505676269531, 43.72098922729492, 68.19818115234375, 38.368709564208984, 23.737388610839844, 31.468334197998047, 165.2220458984375, 546.0545043945312, 66.93276977539062, 6.840240478515625, 18.93878746032715], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000294.npy"} +{"epoch": 0.43171806167400884, "step": 295, "batch_size": 64, "mean": 203.06219482421875, "std": 272.74285888671875, "min": -322.7943115234375, "p10": -150.92278671264646, "median": 197.32894134521484, "p90": 549.9784240722656, "max": 1021.1690673828125, "pos_frac": 0.765625, "sample": [-322.7943115234375, -76.83148193359375, 643.3818359375, 12.060426712036133, -291.65679931640625, 216.96347045898438, -170.9935760498047, 315.4996032714844, 272.7560119628906, -280.3798828125, 367.28009033203125, -48.28575134277344, 191.60317993164062, 399.2694091796875, 457.982421875, 273.9661560058594, 203.05470275878906, 329.2642517089844, -62.431663513183594, 87.92839813232422, 391.34002685546875, -45.201194763183594, 117.4473876953125, 386.74102783203125, 15.507774353027344, 128.4146728515625, 539.9781494140625, -165.83375549316406, 739.4041748046875, 180.38510131835938, 446.7496032714844, 68.56796264648438, -5.115543365478516, -163.94390869140625, 545.92236328125, 581.81201171875, 104.33150482177734, -120.54016876220703, -309.09893798828125, 48.85737609863281, 262.3689270019531, 471.3667297363281, 374.61468505859375, 103.49943542480469, -36.02709197998047, 108.02482604980469, 237.2077178955078, 58.18323516845703, 208.12637329101562, 582.6065673828125, 458.66229248046875, 410.97503662109375, 77.21340942382812, 1021.1690673828125, 551.7167358398438, 448.0397644042969, 150.7298583984375, 229.33731079101562, 237.72482299804688, 206.5956268310547, 73.0083999633789, 98.27424621582031, 661.46875, -2.2685775756835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000295.npy"} +{"epoch": 0.4331864904552129, "step": 296, "batch_size": 64, "mean": 143.65863037109375, "std": 274.4779968261719, "min": -754.7303466796875, "p10": -224.58747253417968, "median": 144.15277099609375, "p90": 519.9936279296875, "max": 842.971435546875, "pos_frac": 0.78125, "sample": [192.8964385986328, 51.48725128173828, 134.10418701171875, 842.971435546875, 66.50634765625, 522.9747314453125, -253.5963134765625, -114.55769348144531, -55.83332061767578, -754.7303466796875, 122.05972290039062, 262.89654541015625, -359.45294189453125, 191.06321716308594, 173.5232391357422, -312.526611328125, 194.7974090576172, 435.99139404296875, 607.2120361328125, 402.6131591796875, 244.66339111328125, 287.83673095703125, 2.3249740600585938, -115.97825622558594, 112.70388793945312, 399.98095703125, 74.64053344726562, -270.877685546875, 29.7833251953125, -231.98597717285156, 569.9010620117188, 54.94175720214844, 280.4371032714844, 61.122398376464844, 243.35488891601562, 539.9286499023438, 170.63648986816406, 117.21646118164062, -207.3242950439453, 176.00253295898438, -270.24237060546875, 306.0119323730469, -67.8783950805664, -64.33517456054688, 513.0377197265625, 102.23889923095703, 127.80657196044922, 242.77101135253906, 178.623046875, 190.77377319335938, 154.20135498046875, 13.347450256347656, 16.09107208251953, 759.2525024414062, 184.79588317871094, 233.49075317382812, 98.85477447509766, 73.3558578491211, 566.5496215820312, -105.71749877929688, 186.0039520263672, 96.59272003173828, 367.8834228515625, 400.9344177246094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000296.npy"} +{"epoch": 0.434654919236417, "step": 297, "batch_size": 64, "mean": 159.1259002685547, "std": 210.94931030273438, "min": -257.0852355957031, "p10": -62.014016723632814, "median": 123.6854476928711, "p90": 468.7324859619141, "max": 702.2529296875, "pos_frac": 0.765625, "sample": [158.0176544189453, 297.9625244140625, 363.54852294921875, 567.2190551757812, 24.660564422607422, 24.78228187561035, 372.3638916015625, 210.50588989257812, -209.6540069580078, -63.8658332824707, 71.95040893554688, 133.94647216796875, 204.27349853515625, 418.70452880859375, 308.08319091796875, -39.33440399169922, -39.8380126953125, 31.3275146484375, -76.45596313476562, -12.081367492675781, 187.23158264160156, -195.0623321533203, 94.35758972167969, -32.60026550292969, 519.1779174804688, 93.96466064453125, 318.6398010253906, 79.677001953125, 216.0191650390625, 261.0835876464844, 167.74468994140625, 111.27377319335938, 459.49578857421875, 200.66246032714844, 483.69573974609375, 24.216609954833984, 96.99302673339844, 12.610237121582031, 525.2127075195312, 287.4793395996094, 6.3727264404296875, 197.005859375, -257.0852355957031, 359.984130859375, 72.30745697021484, -60.360755920410156, 124.87802124023438, 64.30691528320312, 702.2529296875, -41.74122619628906, 158.26608276367188, 472.6910705566406, 231.3828125, -15.315185546875, -233.12042236328125, 357.5257263183594, -39.335655212402344, 122.49287414550781, 241.94320678710938, 617.3983154296875, 113.48468017578125, 357.70428466796875, -62.722557067871094, 35.75251770019531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000297.npy"} +{"epoch": 0.43612334801762115, "step": 298, "batch_size": 64, "mean": 210.67361450195312, "std": 304.3543701171875, "min": -510.85504150390625, "p10": -98.88865280151367, "median": 156.20315551757812, "p90": 623.8251098632815, "max": 1336.5654296875, "pos_frac": 0.6875, "sample": [105.90558624267578, -130.54360961914062, 12.672130584716797, -510.85504150390625, 37.844879150390625, -93.1925048828125, 27.690996170043945, 736.6243286132812, 240.62655639648438, -26.49462890625, 180.17919921875, 506.09063720703125, -10.035476684570312, -116.15318298339844, -146.17593383789062, 56.15807342529297, 157.27508544921875, 113.25311279296875, 114.41983795166016, 314.9956359863281, 799.587890625, -8.4471435546875, 404.6700439453125, -188.00909423828125, 640.0811767578125, 337.9031982421875, -97.78889465332031, 506.91204833984375, 585.894287109375, 724.6159057617188, -99.35997772216797, -121.4158935546875, -51.81084442138672, -0.8617019653320312, -50.825294494628906, 655.7597045898438, 713.5599365234375, 141.05552673339844, 393.90142822265625, 231.6541748046875, -35.75787353515625, -11.655838012695312, 1336.5654296875, 260.6585998535156, 582.8233032226562, 240.14474487304688, 464.60162353515625, 439.5422668457031, -3.6079978942871094, 71.11807250976562, 201.00186157226562, 445.06732177734375, 333.0810546875, 116.10449981689453, 508.74688720703125, 175.2768096923828, -57.47027587890625, 358.4345703125, -32.38462829589844, 155.1312255859375, 222.38211059570312, 226.88877868652344, 60.44775390625, 338.6082763671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000298.npy"} +{"epoch": 0.43759177679882527, "step": 299, "batch_size": 64, "mean": 184.48007202148438, "std": 261.30841064453125, "min": -386.64422607421875, "p10": -125.35406265258787, "median": 156.04216766357422, "p90": 558.1940979003907, "max": 852.5237426757812, "pos_frac": 0.765625, "sample": [-134.94970703125, -253.7271728515625, -386.64422607421875, 393.8914489746094, 58.793670654296875, 36.5665283203125, 351.88287353515625, 131.58489990234375, 3.438882827758789, 130.34912109375, 637.7859497070312, 547.197509765625, 745.232421875, -102.96422576904297, 184.83375549316406, 740.9564819335938, -198.57049560546875, 333.7088928222656, 175.10287475585938, 237.9367218017578, -142.45440673828125, 309.9245300292969, 190.5107421875, 138.6407470703125, 447.1689147949219, 115.8995361328125, 852.5237426757812, 9.512531280517578, 229.13558959960938, -11.58005142211914, -6.693063735961914, 125.29246520996094, -95.7232666015625, 69.96604919433594, 208.97744750976562, 562.9069213867188, 136.2744598388672, 175.3262481689453, -15.504180908203125, 169.44619750976562, 99.4713363647461, 340.8248291015625, 304.90777587890625, -43.16452407836914, 177.5643310546875, -91.13227081298828, -222.47828674316406, 525.662109375, -5.8460693359375, 776.5595703125, -153.922607421875, 43.125282287597656, 288.8419494628906, 304.8602600097656, 245.15969848632812, 393.5196533203125, 82.310791015625, 636.5615844726562, 241.12094116210938, 142.6381378173828, 33.879119873046875, 334.9535217285156, 2.6152420043945312, 246.73406982421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000299.npy"} +{"epoch": 0.4390602055800294, "step": 300, "batch_size": 64, "mean": 223.01820373535156, "std": 282.8573303222656, "min": -369.0091857910156, "p10": -176.35675659179685, "median": 239.493408203125, "p90": 596.8988830566407, "max": 770.5997314453125, "pos_frac": 0.796875, "sample": [309.8787536621094, 304.92791748046875, 164.68206787109375, 336.8060302734375, 644.109130859375, -369.0091857910156, -159.6893310546875, 621.8292846679688, 419.822021484375, 40.70084762573242, -79.22994232177734, 305.73089599609375, 144.56590270996094, 561.8768310546875, 336.3076477050781, 396.18206787109375, 126.23883056640625, 507.981201171875, 39.639503479003906, 73.19393920898438, -30.572509765625, 228.36215209960938, 250.62466430664062, -29.322669982910156, 477.5173034667969, 409.59429931640625, -146.84536743164062, 85.73365783691406, 198.5152130126953, 312.70440673828125, 92.05158996582031, 730.7183837890625, 260.1745910644531, -340.6798400878906, 510.80010986328125, -297.246826171875, 588.7449340820312, 325.0780334472656, 481.9105224609375, 190.391845703125, 101.5246353149414, 433.01654052734375, 662.01416015625, 344.6693115234375, 40.39401626586914, 355.4223327636719, -52.2259407043457, -237.07135009765625, 726.498291015625, 414.8533630371094, 474.072509765625, -264.69256591796875, 293.1900939941406, -215.85104370117188, 40.5601692199707, 128.0408935546875, 498.9662170410156, 79.74832153320312, 600.3934326171875, 770.5997314453125, -183.49993896484375, 33.48414993286133, 198.22430419921875, 6.0349578857421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000300.npy"} +{"epoch": 0.44052863436123346, "step": 301, "batch_size": 64, "mean": 209.16920471191406, "std": 260.0733337402344, "min": -198.63282775878906, "p10": -75.1329978942871, "median": 149.82035064697266, "p90": 551.4416259765626, "max": 939.3267211914062, "pos_frac": 0.8125, "sample": [225.32191467285156, 301.1949462890625, -71.37323760986328, 351.64801025390625, 378.65081787109375, 300.72027587890625, 775.11669921875, 19.632675170898438, 471.6485595703125, -9.204450607299805, 852.514404296875, 369.94122314453125, 46.00995635986328, 265.02215576171875, -163.50404357910156, 157.4540252685547, 99.53382110595703, 651.7515258789062, 56.80940246582031, 353.0867919921875, 760.4154663085938, 47.6266975402832, -80.34210968017578, 68.40019226074219, 425.3974304199219, 15.64813232421875, 23.09026336669922, 48.34815979003906, 272.5283203125, 379.8998107910156, 266.290283203125, 297.4354248046875, 84.02590942382812, 179.22256469726562, 51.33464813232422, 76.74267578125, 214.21397399902344, -159.1791229248047, 11.326539993286133, -11.49298095703125, 55.60797119140625, 37.86188507080078, 225.41299438476562, -97.04451751708984, 44.14888000488281, 939.3267211914062, 328.9676208496094, 185.76272583007812, 724.4197387695312, 106.33139038085938, 515.5680541992188, 142.18667602539062, 343.72210693359375, 530.626708984375, 399.73834228515625, 67.06828308105469, -76.74432373046875, -198.63282775878906, 560.3623046875, -178.76144409179688, 105.96955871582031, -62.524871826171875, -21.10811996459961, 305.6560363769531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000301.npy"} +{"epoch": 0.4419970631424376, "step": 302, "batch_size": 64, "mean": 178.58872985839844, "std": 232.7600555419922, "min": -253.24072265625, "p10": -87.3170150756836, "median": 167.67567443847656, "p90": 494.82401733398456, "max": 842.2054443359375, "pos_frac": 0.765625, "sample": [33.67564392089844, 405.6905517578125, 719.3792114257812, -15.412715911865234, 191.33245849609375, -21.565397262573242, 215.4721221923828, 26.7279052734375, 191.14244079589844, 23.464385986328125, 216.64199829101562, 48.009613037109375, 80.21709442138672, 73.88541412353516, 169.901123046875, 105.1303939819336, 391.920166015625, -6.469539642333984, 447.4788818359375, 346.1546325683594, 209.03553771972656, 193.9251708984375, 173.1444091796875, 308.90252685546875, -86.99533081054688, -101.32030487060547, 367.9771728515625, 560.448974609375, 111.25765228271484, -233.4407501220703, 225.18283081054688, -81.3749771118164, 167.6682586669922, -19.365951538085938, -87.45487976074219, 521.8349609375, 842.2054443359375, 512.2789306640625, 175.8502960205078, -124.60888671875, -53.674468994140625, 421.8729248046875, -253.24072265625, 260.4464111328125, 454.09588623046875, 43.94243621826172, 128.8390655517578, 666.7507934570312, 167.68309020996094, 360.4976806640625, 59.845436096191406, 265.45849609375, -163.487548828125, 258.8773193359375, 87.48884582519531, 229.1629638671875, -61.95588684082031, 418.75634765625, 59.95713806152344, 645.51953125, -110.17947387695312, 71.10417175292969, 112.78827667236328, 81.20159912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000302.npy"} +{"epoch": 0.4434654919236417, "step": 303, "batch_size": 64, "mean": 160.39822387695312, "std": 242.84255981445312, "min": -437.1929626464844, "p10": -112.02570495605468, "median": 149.0877227783203, "p90": 534.3194091796876, "max": 644.4631958007812, "pos_frac": 0.703125, "sample": [301.60382080078125, -9.58304214477539, 50.16314697265625, 597.4494018554688, -99.19058227539062, 368.4919738769531, 503.01611328125, 636.6475830078125, -9.93679428100586, -112.98883056640625, 346.63134765625, 184.53530883789062, 330.32830810546875, -95.00999450683594, 71.64574432373047, 154.94537353515625, 70.52571105957031, -7.961393356323242, -60.98375701904297, 387.31658935546875, -437.1929626464844, -54.05900192260742, -84.6559066772461, 297.3996276855469, 438.9646301269531, 549.5357666015625, -158.08631896972656, 269.37908935546875, -241.80181884765625, 112.73985290527344, -109.77841186523438, 290.7158203125, 644.4631958007812, 224.93519592285156, 450.64111328125, 271.1927795410156, -167.946533203125, 160.6531982421875, 127.84626007080078, 624.0447998046875, 579.3077392578125, 31.69000244140625, 157.272705078125, 521.5833740234375, 139.26266479492188, 194.55807495117188, 256.9775390625, 322.16021728515625, 148.86331176757812, -27.71993064880371, 114.88082885742188, -54.71056365966797, 210.2848663330078, 36.05104446411133, 149.3121337890625, 539.7777099609375, -199.087158203125, -41.50431823730469, 26.872791290283203, 59.17935562133789, 261.52081298828125, 3.769256591796875, -175.4957275390625, 194.04327392578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000303.npy"} +{"epoch": 0.44493392070484583, "step": 304, "batch_size": 64, "mean": 180.16664123535156, "std": 169.39306640625, "min": -267.34478759765625, "p10": -10.012712097167967, "median": 168.5331802368164, "p90": 396.77074584960945, "max": 585.0170288085938, "pos_frac": 0.859375, "sample": [0.002166748046875, 260.1563720703125, 285.9625244140625, 76.73693084716797, -8.18695068359375, -10.795181274414062, -6.064300537109375, 49.21931457519531, 152.71548461914062, 135.82351684570312, 290.1548156738281, 369.03704833984375, 180.8660888671875, 327.39019775390625, 57.76957702636719, 268.9940185546875, 297.1437072753906, 330.51678466796875, -69.11731719970703, 15.994754791259766, 157.87081909179688, 377.145751953125, 35.57221984863281, 73.10684204101562, 297.3309631347656, 77.48048400878906, 285.59283447265625, 53.192474365234375, 319.3586730957031, 90.22885131835938, 124.97798156738281, 75.84872436523438, 585.0170288085938, 104.7092056274414, -56.459266662597656, -267.34478759765625, 230.85177612304688, -37.204742431640625, 43.41639709472656, 274.088623046875, -52.800445556640625, 460.46270751953125, 89.68266296386719, 42.82127380371094, 462.54327392578125, 160.08334350585938, 11.791481018066406, 71.76908874511719, 233.54949951171875, 344.99462890625, 234.88021850585938, -85.70750427246094, 176.98301696777344, 492.1534118652344, 407.00128173828125, 93.69132995605469, 244.20333862304688, 284.107177734375, 405.18145751953125, 521.9627075195312, 329.0472412109375, 229.98028564453125, 302.426025390625, 220.75709533691406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000304.npy"} +{"epoch": 0.44640234948604995, "step": 305, "batch_size": 64, "mean": 159.6190643310547, "std": 244.72914123535156, "min": -327.30889892578125, "p10": -101.19686279296874, "median": 100.47954559326172, "p90": 489.8157135009766, "max": 1082.4947509765625, "pos_frac": 0.765625, "sample": [396.3315124511719, 287.0711669921875, 102.18058776855469, 203.04891967773438, 159.1953887939453, 663.6962890625, 253.97555541992188, 34.14643859863281, 273.9232177734375, 526.3040771484375, 91.26869201660156, 81.34734344482422, 94.24410247802734, -102.26751708984375, -98.69866943359375, -0.5286331176757812, 286.70684814453125, 495.0993957519531, 48.83136749267578, 74.78203582763672, 1082.4947509765625, 321.18048095703125, 40.08285140991211, 283.38525390625, 289.4412841796875, -74.58441162109375, 7.930822372436523, 241.45213317871094, 224.59019470214844, -108.45767211914062, -43.24609375, 269.89447021484375, -44.42875671386719, 331.4030456542969, 98.77850341796875, 137.2489776611328, 592.500732421875, 301.73516845703125, -32.16923522949219, 157.99395751953125, 121.08623504638672, -42.10570526123047, 94.59675598144531, 96.63054656982422, 600.5863647460938, 554.28466796875, 232.89463806152344, -26.304046630859375, -278.93402099609375, 127.08087921142578, -324.296875, 477.48712158203125, 410.38134765625, 50.45055389404297, -107.82977294921875, 31.12091064453125, -123.73480987548828, 42.201560974121094, 264.6587829589844, -327.30889892578125, 62.392799377441406, 304.78466796875, 10.427413940429688, 17.18451690673828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000305.npy"} +{"epoch": 0.447870778267254, "step": 306, "batch_size": 64, "mean": 159.2891387939453, "std": 200.1483612060547, "min": -398.86669921875, "p10": -45.20945205688475, "median": 114.12085723876953, "p90": 416.66199645996096, "max": 876.305419921875, "pos_frac": 0.8125, "sample": [4.713855743408203, -1.6908683776855469, 93.14822387695312, 52.083831787109375, 23.63543701171875, -57.11427307128906, 347.2126159667969, 5.144523620605469, 98.84149169921875, -80.5817642211914, 226.45823669433594, 382.9200134277344, 187.08352661132812, 418.17279052734375, 61.66497802734375, 68.2263412475586, 10.375370025634766, 443.1620788574219, 109.17000579833984, 31.423355102539062, 119.07170867919922, -56.743492126464844, -23.676132202148438, 448.1488342285156, 268.8565673828125, 290.9148254394531, -20.02627944946289, -7.34144401550293, 413.1368103027344, 105.16789245605469, 141.13983154296875, 181.87680053710938, 244.3486328125, 426.137939453125, 34.63397216796875, 131.2550811767578, 79.0343017578125, -108.80579376220703, 18.157047271728516, 270.0487060546875, -398.86669921875, 337.81976318359375, 96.0148696899414, -32.67341613769531, 50.77461242675781, 275.330322265625, -50.58203887939453, 131.44830322265625, 553.5738525390625, 505.74444580078125, 196.75418090820312, 310.20501708984375, 211.25978088378906, -87.12755584716797, 390.7803955078125, 7.300792694091797, 81.96258544921875, 36.391319274902344, 294.62103271484375, 131.16061401367188, 876.305419921875, 195.97836303710938, 344.3143310546875, 356.6292724609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000306.npy"} +{"epoch": 0.44933920704845814, "step": 307, "batch_size": 64, "mean": 165.57266235351562, "std": 213.3192901611328, "min": -404.1173095703125, "p10": -89.22013397216797, "median": 117.20365905761719, "p90": 467.6151916503908, "max": 783.3717651367188, "pos_frac": 0.796875, "sample": [374.9013977050781, 148.85830688476562, 113.81035614013672, 324.48553466796875, 188.88092041015625, 144.9401397705078, -122.51904296875, 81.36328887939453, -81.03565979003906, 535.8094482421875, 155.902587890625, -26.35588836669922, 91.49226379394531, -131.87884521484375, 424.0733642578125, -404.1173095703125, 486.8730163574219, -171.8448944091797, 225.85507202148438, -67.50582122802734, 207.44378662109375, 288.294189453125, 89.47737884521484, 160.86439514160156, 483.42816162109375, 237.75265502929688, 37.27705001831055, 115.8406982421875, 60.262325286865234, 549.320068359375, 96.30686950683594, 487.8838806152344, 79.60733795166016, 105.44419860839844, -86.35981750488281, 118.56661987304688, -90.44598388671875, 37.89207458496094, 430.71826171875, 368.71441650390625, -21.093284606933594, 347.01611328125, 59.487754821777344, 333.8163146972656, 202.3826446533203, 31.057662963867188, 405.15789794921875, 261.81658935546875, 18.622156143188477, 102.48265075683594, 239.16893005371094, 15.094669342041016, 358.2056884765625, 566.0908813476562, 279.0421447753906, 783.3717651367188, 92.92736053466797, -106.55653381347656, 199.9869842529297, 109.88189697265625, -20.567642211914062, 315.8348693847656, 70.56526184082031, -117.41931915283203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000307.npy"} +{"epoch": 0.45080763582966227, "step": 308, "batch_size": 64, "mean": 187.2371368408203, "std": 191.56558227539062, "min": -234.85244750976562, "p10": -41.93237667083739, "median": 182.48423767089844, "p90": 464.8689178466797, "max": 567.525390625, "pos_frac": 0.8125, "sample": [165.91908264160156, 439.4415283203125, -234.85244750976562, 252.56875610351562, 31.6697998046875, 493.069091796875, -151.35137939453125, 10.830814361572266, 36.0462646484375, 89.59571075439453, 455.74481201171875, -8.458858489990234, 225.99472045898438, 101.28701782226562, 364.60565185546875, 488.8844909667969, -125.71182250976562, 175.59288024902344, 188.20489501953125, 18.790870666503906, 255.8253631591797, 499.9164733886719, 277.3861083984375, 468.15960693359375, -103.613037109375, 269.503173828125, 154.56039428710938, 136.56016540527344, 129.00758361816406, -47.20085144042969, 261.72998046875, 131.09759521484375, 185.31454467773438, 230.12425231933594, 282.4542236328125, 457.1906433105469, 484.94720458984375, 201.09735107421875, -170.51271057128906, 60.90618896484375, 96.95584106445312, 349.41839599609375, 312.9098205566406, 168.53109741210938, 163.9851531982422, 567.525390625, 237.60916137695312, 49.07765197753906, -10.471778869628906, 363.16119384765625, 521.0478515625, 276.94366455078125, 179.6539306640625, -14.24378776550293, -29.63926887512207, 66.28207397460938, 346.1310119628906, 319.935791015625, 225.0388641357422, -11.884849548339844, 394.59686279296875, 317.58514404296875, -112.74131774902344, 23.442527770996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000308.npy"} +{"epoch": 0.4522760646108664, "step": 309, "batch_size": 64, "mean": 186.04051208496094, "std": 229.60084533691406, "min": -632.3908081054688, "p10": -88.00826416015624, "median": 167.0595245361328, "p90": 471.99248962402345, "max": 709.281005859375, "pos_frac": 0.796875, "sample": [159.04705810546875, 68.68255615234375, 114.98577117919922, -102.27011108398438, 320.14306640625, 167.001953125, 126.65984344482422, -17.303739547729492, 530.8104858398438, 52.5113525390625, 249.12322998046875, 432.499267578125, 113.70446014404297, 506.9980163574219, 65.95020294189453, 51.063575744628906, -88.10612487792969, 290.55999755859375, 482.5101623535156, 349.9544372558594, 260.28582763671875, -632.3908081054688, 243.39633178710938, -117.59999084472656, 71.89022827148438, 170.37222290039062, 261.4957275390625, 325.63433837890625, 33.05860900878906, 69.84217834472656, -153.97906494140625, 642.3009033203125, 31.3876953125, -3.4001617431640625, 131.35830688476562, 423.1337890625, 709.281005859375, 464.2544860839844, 137.05990600585938, -22.467849731445312, -8.396419525146484, 85.80867767333984, 64.51121520996094, 428.8436584472656, 313.41107177734375, 282.0113830566406, 475.30877685546875, 393.09027099609375, 319.84722900390625, -4.920627593994141, 438.5065002441406, -173.0512237548828, 226.75411987304688, 315.7897033691406, -115.23143005371094, 70.23663330078125, 554.851318359375, 416.7225646972656, -87.77992248535156, 167.11709594726562, 174.07421875, 27.890666961669922, 396.3174133300781, 225.4409942626953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000309.npy"} +{"epoch": 0.45374449339207046, "step": 310, "batch_size": 64, "mean": 177.672607421875, "std": 192.5367431640625, "min": -266.35772705078125, "p10": -47.50640106201172, "median": 168.41859436035156, "p90": 420.31712646484385, "max": 610.8153686523438, "pos_frac": 0.828125, "sample": [-46.22079086303711, 169.75161743164062, 400.75848388671875, -72.11394500732422, 118.18818664550781, 338.3069152832031, 55.8923225402832, 1.2869510650634766, 27.32200813293457, 219.89016723632812, -209.45318603515625, 398.946533203125, 460.8541564941406, -48.057376861572266, 63.735755920410156, 298.3170166015625, 214.4145965576172, 250.09861755371094, 88.27444458007812, 155.2415771484375, -16.683826446533203, 105.311767578125, 321.51470947265625, 64.318359375, 178.47650146484375, 559.6090698242188, 331.4693603515625, 8.365543365478516, 35.57502746582031, 278.638916015625, -266.35772705078125, 610.8153686523438, -34.60200881958008, 150.716552734375, 602.5712890625, 194.89593505859375, 267.3247375488281, -28.983335494995117, -53.798255920410156, 26.018699645996094, 73.54251098632812, 167.0855712890625, 337.2257080078125, 258.204833984375, 60.12512969970703, 277.4456787109375, 428.69940185546875, -96.52222442626953, 351.29779052734375, 271.5313415527344, 324.6506652832031, -85.04895782470703, 327.52252197265625, 132.82371520996094, 55.49894714355469, 59.86420440673828, 71.22967529296875, 495.77978515625, 542.0916748046875, 251.66600036621094, 55.91794204711914, 364.7665710449219, 252.86119079589844, 172.15603637695312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000310.npy"} +{"epoch": 0.4552129221732746, "step": 311, "batch_size": 64, "mean": 201.23855590820312, "std": 260.6631164550781, "min": -271.769775390625, "p10": -114.54959411621091, "median": 163.13463592529297, "p90": 591.0501831054688, "max": 837.2427978515625, "pos_frac": 0.703125, "sample": [475.37969970703125, 413.10296630859375, 419.28424072265625, -1.8151168823242188, -160.090087890625, 570.8736572265625, -136.40371704101562, 323.4921875, 166.74130249023438, 599.697265625, 280.58526611328125, 79.94451904296875, 373.7470703125, 797.5931396484375, 321.91357421875, 133.975341796875, 472.4888916015625, 148.4852294921875, -32.906089782714844, 668.142822265625, 174.6905517578125, 253.43585205078125, 528.3192138671875, 234.05653381347656, 41.529197692871094, 173.2401885986328, -29.569564819335938, -184.480224609375, 247.43450927734375, 139.56109619140625, -91.86148071289062, 315.5302429199219, -176.89773559570312, 142.0481719970703, 269.20233154296875, -124.2730712890625, -6.120323181152344, 837.2427978515625, -22.473876953125, 286.5766296386719, 33.33484649658203, 76.73816680908203, 246.68826293945312, 28.89643669128418, 629.5235595703125, 366.7737121582031, 34.79463577270508, -66.81954956054688, -8.51412582397461, 99.1361083984375, -22.665851593017578, 495.7056884765625, 147.68768310546875, -1.6280288696289062, -271.769775390625, 462.394775390625, 159.52796936035156, 222.24169921875, -13.364967346191406, 269.55084228515625, 634.5780029296875, -197.62278747558594, -46.25495147705078, 678.91162109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000311.npy"} +{"epoch": 0.4566813509544787, "step": 312, "batch_size": 64, "mean": 178.09536743164062, "std": 256.8215026855469, "min": -368.0985412597656, "p10": -82.35527420043945, "median": 127.24059295654297, "p90": 493.1836730957033, "max": 1049.793212890625, "pos_frac": 0.8125, "sample": [135.12644958496094, 1049.793212890625, 533.6088256835938, 159.87545776367188, 110.04148864746094, 57.73999786376953, -79.82242584228516, 116.93038940429688, 76.49994659423828, 365.31219482421875, -239.81298828125, 425.50433349609375, 928.4793701171875, 284.67401123046875, 252.339111328125, 316.8148498535156, -236.3402099609375, 239.12896728515625, 397.19012451171875, 169.006103515625, 561.0360107421875, 91.70191955566406, 115.83285522460938, 285.68475341796875, 66.32199096679688, 7.52836799621582, 34.872642517089844, 696.80859375, 342.1078186035156, 447.7059326171875, 119.354736328125, 407.8646545410156, 173.47402954101562, 99.91069030761719, -172.8121337890625, 112.02076721191406, 176.91233825683594, 512.6741333007812, -6.2606658935546875, 27.322673797607422, 295.1807556152344, 118.63619232177734, -58.588958740234375, 252.41847229003906, 172.1576385498047, 1.08929443359375, -36.19134521484375, 665.264404296875, -83.44078063964844, 20.9207820892334, 214.96360778808594, 105.08311462402344, -209.608154296875, -10.8682861328125, -118.74031829833984, 148.17750549316406, 153.42724609375, 8.988113403320312, -368.0985412597656, 293.9274597167969, 258.5928649902344, 16.510818481445312, 60.14534378051758, 336.0050964355469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000312.npy"} +{"epoch": 0.4581497797356828, "step": 313, "batch_size": 64, "mean": 192.1097869873047, "std": 263.3614196777344, "min": -493.29608154296875, "p10": -96.71422271728512, "median": 172.13155364990234, "p90": 505.5260131835938, "max": 849.9840087890625, "pos_frac": 0.78125, "sample": [89.89529418945312, 329.02783203125, 106.0545654296875, 333.72894287109375, -161.20147705078125, -4.4529571533203125, 509.9759521484375, 173.69903564453125, 394.4256896972656, -47.923641204833984, 39.87709045410156, -150.42201232910156, 481.84649658203125, 185.66123962402344, 21.013025283813477, 170.56407165527344, 432.5274963378906, 232.7852325439453, 97.8336181640625, 213.63916015625, 103.81037139892578, 338.8435363769531, -493.29608154296875, -110.33515930175781, 213.36953735351562, 556.3234252929688, 280.56494140625, 100.04426574707031, 238.94247436523438, 849.9840087890625, 333.9068603515625, 129.3687744140625, -38.05474853515625, 164.33128356933594, -253.62307739257812, 168.3539276123047, -64.93203735351562, -470.64361572265625, 430.8307800292969, 249.3370819091797, -15.004966735839844, 258.851806640625, 56.78189468383789, 320.0040283203125, 300.8426818847656, 125.25492095947266, -18.044456481933594, 337.9637451171875, 318.26898193359375, -173.66714477539062, 495.142822265625, 88.0589599609375, 145.27325439453125, -0.325225830078125, 244.50698852539062, 629.1770629882812, 603.7904052734375, 14.551956176757812, 21.628990173339844, 78.83851623535156, 290.6239318847656, 405.13482666015625, 762.4763793945312, 829.2149047851562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000313.npy"} +{"epoch": 0.45961820851688695, "step": 314, "batch_size": 64, "mean": 175.31007385253906, "std": 274.52978515625, "min": -365.04473876953125, "p10": -163.6714874267578, "median": 128.30367279052734, "p90": 482.7947448730469, "max": 865.8029174804688, "pos_frac": 0.71875, "sample": [754.6998291015625, -295.7261047363281, 99.8455581665039, 54.18810272216797, 44.013519287109375, 246.99954223632812, 71.41178894042969, 325.26025390625, 78.8023681640625, -128.1767120361328, 373.55810546875, 3.9533538818359375, 266.2462158203125, 111.60636901855469, 181.77200317382812, -365.04473876953125, -175.82737731933594, 28.023483276367188, 702.3209838867188, 750.2601928710938, 30.431442260742188, -44.35742950439453, -164.0716094970703, 23.843990325927734, 175.73809814453125, -150.11851501464844, -0.1976165771484375, -162.7378692626953, -14.489952087402344, 427.6994323730469, 462.726806640625, 34.89788818359375, 287.6722717285156, 468.96038818359375, 310.1611022949219, 169.23976135253906, -108.1357650756836, 145.0009765625, 36.1287841796875, 780.967041015625, 437.4858093261719, 389.7607727050781, 865.8029174804688, 488.7237548828125, -95.64622497558594, 346.42083740234375, -177.25961303710938, 458.360107421875, -25.105178833007812, 231.68621826171875, 332.50164794921875, 46.436309814453125, 316.5045166015625, 376.9803161621094, 457.1431579589844, 498.96063232421875, 94.70881652832031, 231.04931640625, 233.30149841308594, -4.521980285644531, -190.5017547607422, -42.11891555786133, 316.53271484375, -204.9071502685547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000314.npy"} +{"epoch": 0.461086637298091, "step": 315, "batch_size": 64, "mean": 165.16049194335938, "std": 214.72100830078125, "min": -178.2917022705078, "p10": -76.56481475830077, "median": 135.1613006591797, "p90": 496.6761871337891, "max": 717.8801879882812, "pos_frac": 0.75, "sample": [565.0637817382812, -54.34320831298828, 297.02252197265625, 20.2491455078125, -53.656002044677734, 71.91229248046875, 58.92829132080078, 156.20265197753906, 270.63201904296875, -25.679611206054688, 302.41778564453125, 39.099609375, -89.85054016113281, 258.6981201171875, 85.50041961669922, 545.1181640625, 371.8363037109375, 160.3770294189453, -138.33749389648438, 365.599609375, 201.5020751953125, 500.5302734375, 549.1675415039062, 33.940399169921875, -126.86419677734375, 176.71630859375, 244.2054443359375, 186.0092315673828, 398.5129699707031, 136.6848907470703, 461.52117919921875, 133.63771057128906, 84.3916015625, 117.56951904296875, 207.61167907714844, 331.9203186035156, -113.73190307617188, -7.973480224609375, 231.20152282714844, 23.116836547851562, 42.406314849853516, -65.83441162109375, 717.8801879882812, 70.86103057861328, 214.50674438476562, -10.040885925292969, -157.19070434570312, 26.03465461730957, -81.16355895996094, 600.5821533203125, 108.76791381835938, 231.9250946044922, 636.9807739257812, 139.65530395507812, 166.7551727294922, -54.05499267578125, 290.9178161621094, -8.13979721069336, 391.4857482910156, 487.6833190917969, -178.2917022705078, -55.32440185546875, 61.73282241821289, 15.676475524902344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000315.npy"} +{"epoch": 0.46255506607929514, "step": 316, "batch_size": 64, "mean": 228.41299438476562, "std": 252.40106201171875, "min": -258.26971435546875, "p10": -15.018904495239251, "median": 161.75245666503906, "p90": 596.9120544433595, "max": 931.166259765625, "pos_frac": 0.875, "sample": [319.017578125, 123.31224060058594, 487.00872802734375, 66.88461303710938, 56.10234832763672, 931.166259765625, 175.7015380859375, 702.1829223632812, 453.4404296875, 638.3446655273438, 379.5655212402344, 326.30474853515625, 32.921142578125, 22.21835708618164, 35.847320556640625, 211.47731018066406, 61.4534797668457, 128.48086547851562, 9.843807220458984, 425.43707275390625, 162.08657836914062, 25.63309097290039, 605.9864501953125, 414.2139892578125, 270.9692687988281, 112.2491683959961, 56.63968276977539, 669.0108642578125, 143.5251007080078, 506.99505615234375, 67.83435821533203, 161.4183349609375, 48.828643798828125, 160.44578552246094, 238.76834106445312, 313.190185546875, 385.17584228515625, 55.51581573486328, 294.0257568359375, 20.69147491455078, 575.7384643554688, 318.4387512207031, 39.34271240234375, 398.5307922363281, 191.0259552001953, 145.27392578125, -258.26971435546875, -17.559803009033203, 912.9297485351562, 152.56602478027344, 801.858154296875, -252.06036376953125, 180.83377075195312, 195.6562957763672, 384.2579345703125, -9.090141296386719, -79.40364837646484, -43.67597961425781, -32.031349182128906, 122.1115951538086, 249.29571533203125, -93.08699798583984, 98.04360961914062, 337.79083251953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000316.npy"} +{"epoch": 0.46402349486049926, "step": 317, "batch_size": 64, "mean": 247.00172424316406, "std": 235.42642211914062, "min": -162.41775512695312, "p10": -39.786382293701166, "median": 241.03556060791016, "p90": 568.2134948730469, "max": 712.004638671875, "pos_frac": 0.8125, "sample": [362.11395263671875, 2.1815567016601562, 108.0496597290039, 480.9471130371094, 469.0879821777344, 78.66549682617188, 447.21173095703125, 381.84771728515625, -162.41775512695312, 569.1392211914062, 711.2236938476562, 130.04258728027344, 260.39276123046875, 1.23260498046875, 152.91885375976562, 400.356689453125, 182.30613708496094, -36.503257751464844, 107.53003692626953, 402.3770446777344, -26.298095703125, 344.7557373046875, 373.3273620605469, 489.73486328125, 17.431102752685547, 529.58203125, -21.52480697631836, 111.2323226928711, 712.004638671875, 432.6451416015625, 644.943115234375, 447.35467529296875, 149.3986358642578, 566.053466796875, 436.16314697265625, -54.69660949707031, 269.4302978515625, 95.08912658691406, 270.0880126953125, 90.77484130859375, 186.04229736328125, 533.6196899414062, -22.087133407592773, -41.19343566894531, 654.7380981445312, 228.29818725585938, 591.5205078125, 323.51470947265625, 265.5857849121094, -78.74019622802734, 129.22850036621094, 96.63794708251953, 253.77293395996094, -20.482383728027344, -91.55780792236328, 122.42313385009766, 95.5914306640625, -114.07320404052734, 650.2186279296875, 406.93603515625, 74.93988800048828, 381.2688903808594, -126.81300354003906, 382.5273742675781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000317.npy"} +{"epoch": 0.4654919236417034, "step": 318, "batch_size": 64, "mean": 184.75576782226562, "std": 234.08702087402344, "min": -328.91656494140625, "p10": -72.07937698364258, "median": 156.11519622802734, "p90": 500.5004882812501, "max": 840.3966064453125, "pos_frac": 0.796875, "sample": [70.6219253540039, -61.51887512207031, 840.3966064453125, 86.99383544921875, 257.22930908203125, 101.07691955566406, -68.67052459716797, 191.81573486328125, 128.89639282226562, 653.1346435546875, 27.183618545532227, 109.92657470703125, 301.27044677734375, 515.3358764648438, 601.8093872070312, 257.6478271484375, -12.591278076171875, 127.15040588378906, -81.29375457763672, -16.96439552307129, 567.5418090820312, 64.1970443725586, -131.2781524658203, 465.88458251953125, 794.925537109375, 312.60870361328125, -61.555477142333984, -88.95869445800781, 235.8647003173828, 372.9974670410156, 78.93307495117188, 333.0316467285156, 74.09186553955078, 415.7851867675781, 223.0786590576172, 168.40048217773438, 552.8175659179688, 9.885231018066406, -39.996124267578125, 444.3735046386719, 431.51629638671875, 211.88272094726562, 8.001758575439453, -84.4896011352539, 305.95391845703125, 216.41244506835938, 87.0779037475586, 180.38864135742188, 143.8299102783203, -73.54031372070312, -328.91656494140625, 36.08466339111328, 247.94891357421875, 178.27972412109375, 360.76837158203125, 350.795654296875, 236.34588623046875, 139.26910400390625, 57.49781799316406, 254.54415893554688, 128.13916015625, 207.11541748046875, -327.36859130859375, 34.75157928466797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000318.npy"} +{"epoch": 0.4669603524229075, "step": 319, "batch_size": 64, "mean": 222.0023193359375, "std": 252.32803344726562, "min": -372.4964599609375, "p10": -120.6470245361328, "median": 225.4190216064453, "p90": 543.6395324707032, "max": 1003.0226440429688, "pos_frac": 0.828125, "sample": [317.44598388671875, 162.54408264160156, 309.1714782714844, 251.89920043945312, 78.02354431152344, 246.0322265625, 368.1780090332031, 522.8788452148438, 238.39877319335938, 89.1339340209961, -98.13455200195312, -251.83309936523438, 320.1193542480469, 110.69166564941406, 297.432861328125, 137.37876892089844, 56.002296447753906, 256.07806396484375, 250.86151123046875, 196.61795043945312, 185.59056091308594, 408.94049072265625, 193.23056030273438, 252.02017211914062, 316.4429931640625, 54.7176513671875, 389.6347351074219, 803.96337890625, 455.24188232421875, 374.40447998046875, -127.58193969726562, 195.62278747558594, 175.9086456298828, 528.1675415039062, -282.99420166015625, 390.00299072265625, -372.4964599609375, -115.81442260742188, -122.7181396484375, 396.2406005859375, 552.1956787109375, 453.2453918457031, 574.0309448242188, 6.678865432739258, 169.65586853027344, 18.887161254882812, 551.0017700195312, 593.5833740234375, 261.530029296875, 550.2703857421875, 1003.0226440429688, -163.7360076904297, 158.21231079101562, 433.263671875, -21.795001983642578, 38.394134521484375, -160.39016723632812, -33.45353698730469, 338.854736328125, 166.55287170410156, 144.04803466796875, 212.43927001953125, 248.04953002929688, 156.16188049316406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000319.npy"} +{"epoch": 0.4684287812041116, "step": 320, "batch_size": 64, "mean": 243.22470092773438, "std": 261.526123046875, "min": -242.49224853515625, "p10": -69.13655853271484, "median": 231.1150131225586, "p90": 572.1887939453126, "max": 959.8372802734375, "pos_frac": 0.796875, "sample": [140.54794311523438, 185.04656982421875, 946.4464111328125, 124.19676208496094, 281.4066162109375, -120.8584213256836, 190.72756958007812, -242.49224853515625, 816.8746948242188, 241.2180633544922, 421.1827392578125, 376.7364807128906, -43.41363525390625, 376.7469787597656, 220.1282958984375, 549.820068359375, 609.3224487304688, 402.2926330566406, 372.8825988769531, -192.24273681640625, 370.1266174316406, 373.83203125, -71.53995513916016, 227.2562255859375, 79.853515625, 10.971672058105469, 276.4117431640625, 287.0079345703125, -46.89988708496094, 581.775390625, -35.26011657714844, 242.41668701171875, -63.52863311767578, 142.97879028320312, 469.79656982421875, 97.23793029785156, 168.564697265625, 209.73924255371094, 959.8372802734375, 400.0166320800781, 398.7862243652344, 118.82715606689453, 128.52110290527344, -57.3582763671875, 660.1788330078125, 60.747337341308594, 399.3731994628906, 211.97474670410156, -2.879331588745117, 27.041976928710938, 387.4904479980469, 376.66412353515625, 294.9906005859375, 435.53887939453125, 484.84405517578125, 606.41357421875, 529.1119995117188, 2.8395214080810547, -115.04898071289062, -74.96639251708984, 49.044281005859375, 234.9738006591797, 241.93170166015625, -169.8238525390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000320.npy"} +{"epoch": 0.4698972099853157, "step": 321, "batch_size": 64, "mean": 208.4459228515625, "std": 271.16741943359375, "min": -364.32830810546875, "p10": -91.82781677246092, "median": 182.05783081054688, "p90": 567.5824707031251, "max": 1031.8394775390625, "pos_frac": 0.78125, "sample": [683.5243530273438, 130.28323364257812, 296.79595947265625, 245.33663940429688, -45.978431701660156, 275.1352844238281, 361.5617370605469, 331.5809020996094, 177.012451171875, 480.366943359375, 419.4529724121094, 366.17755126953125, 63.791595458984375, 51.36967086791992, -242.59349060058594, 241.87913513183594, 433.9005126953125, 268.0977783203125, 238.39089965820312, 572.7130126953125, 759.943359375, 158.32843017578125, -364.32830810546875, 292.0887756347656, 215.8026580810547, 303.8985900878906, 764.24169921875, -269.4001159667969, -109.83457946777344, 141.97122192382812, 54.61427307128906, 555.6112060546875, -33.92303466796875, 379.5008544921875, 135.80865478515625, 407.1680603027344, 232.8457489013672, -288.66900634765625, 1031.8394775390625, 91.44031524658203, -27.900146484375, 97.65840911865234, 17.400476455688477, 104.80989074707031, -54.566627502441406, 155.69277954101562, -99.6160888671875, -47.9938850402832, 182.63693237304688, 268.4227294921875, 688.6055908203125, 676.6812744140625, -14.295360565185547, -242.54425048828125, 174.40472412109375, 181.47872924804688, 169.367431640625, 148.84352111816406, 1.7991657257080078, 479.78509521484375, 217.8841552734375, 278.5042724609375, 249.3885498046875, -73.65518188476562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000321.npy"} +{"epoch": 0.4713656387665198, "step": 322, "batch_size": 64, "mean": 223.640869140625, "std": 234.4007110595703, "min": -300.8452453613281, "p10": -43.752029418945305, "median": 245.9314727783203, "p90": 520.2057189941406, "max": 801.2529296875, "pos_frac": 0.765625, "sample": [-196.26565551757812, 734.0897216796875, 308.6898498535156, 801.2529296875, 121.62752532958984, -28.149906158447266, 507.8489990234375, 362.1382141113281, 322.8818054199219, -62.35226058959961, -47.41703796386719, -5.565788269042969, 349.20440673828125, 272.398681640625, 297.7933044433594, 349.8720703125, -28.416099548339844, 238.55545043945312, -20.93328094482422, -101.59967041015625, 559.084228515625, 525.4041748046875, 212.40322875976562, 580.6292114257812, -46.413330078125, 217.5767822265625, 361.3937072753906, 533.5556640625, 18.600440979003906, 508.07598876953125, -37.542327880859375, 179.5604248046875, -11.598583221435547, 380.51025390625, -4.145355224609375, 16.76729965209961, 296.81231689453125, 195.99400329589844, 2.6206703186035156, 334.42523193359375, 444.27362060546875, 469.3653564453125, 422.480224609375, -103.7612533569336, 441.63677978515625, 108.94096374511719, 289.90191650390625, -14.043193817138672, 253.3074951171875, 7.9140472412109375, 115.63092803955078, 667.9110717773438, 196.85879516601562, 325.0265808105469, 300.9790954589844, 339.24627685546875, 86.1214599609375, 69.40467071533203, 57.29505920410156, -300.8452453613281, 39.818084716796875, 373.7205810546875, 462.2416687011719, 260.2227783203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000322.npy"} +{"epoch": 0.47283406754772395, "step": 323, "batch_size": 64, "mean": 197.70355224609375, "std": 248.60272216796875, "min": -412.65435791015625, "p10": -102.58516387939451, "median": 189.89761352539062, "p90": 533.8467712402344, "max": 798.150390625, "pos_frac": 0.8125, "sample": [390.3080749511719, 252.62887573242188, 567.6737670898438, -412.65435791015625, 618.8944091796875, -305.2900085449219, 573.1348266601562, 798.150390625, 30.422962188720703, 148.87115478515625, 255.08486938476562, 46.092559814453125, 187.77041625976562, 140.89698791503906, 15.262947082519531, 336.1768798828125, 93.95673370361328, 306.9682922363281, -120.55484771728516, 650.3746948242188, -10.514106750488281, 77.77714538574219, 508.07049560546875, -109.3923568725586, -111.63575744628906, 390.3366394042969, 159.57945251464844, 141.90184020996094, 408.9393310546875, 30.41265869140625, 537.19384765625, 287.52490234375, 497.080078125, -9.657072067260742, 311.96136474609375, 22.462692260742188, -2.3662261962890625, 193.57064819335938, 305.8076171875, 229.08755493164062, 302.8504638671875, 29.1298828125, 255.65771484375, 526.0369262695312, -307.8113708496094, 182.7241973876953, 308.49072265625, 657.2833862304688, 392.3838806152344, -56.784263610839844, 228.91714477539062, 508.42169189453125, -86.70171356201172, 352.736572265625, -219.81346130371094, 192.02481079101562, 97.5262451171875, 242.6458740234375, 102.00582122802734, 277.4289245605469, 24.25035858154297, 24.075069427490234, 132.79946899414062, 54.4381103515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000323.npy"} +{"epoch": 0.47430249632892807, "step": 324, "batch_size": 64, "mean": 188.0611572265625, "std": 287.52703857421875, "min": -382.5910949707031, "p10": -135.33743438720703, "median": 151.4688720703125, "p90": 477.1989440917969, "max": 1001.303955078125, "pos_frac": 0.75, "sample": [-232.43783569335938, 687.1480102539062, -12.39404296875, 210.97265625, 14.282918930053711, 93.81365966796875, 219.91864013671875, 279.03839111328125, 143.5800018310547, 246.91806030273438, 851.4859619140625, -42.962738037109375, 156.75296020507812, 105.93450927734375, -17.506488800048828, 691.5594482421875, -235.55650329589844, -377.360595703125, 346.245361328125, 314.1640319824219, 25.046157836914062, 146.18478393554688, -167.55328369140625, 234.91976928710938, 336.85467529296875, 920.7937622070312, 479.3148193359375, -135.80857849121094, 117.08908081054688, 417.2204284667969, 403.9119567871094, 160.2746124267578, 83.06926727294922, 113.1009292602539, 330.9041442871094, 315.575927734375, 143.85816955566406, 317.5897216796875, 235.18637084960938, -3.184671401977539, 457.85247802734375, -347.24407958984375, -134.23809814453125, 363.4209899902344, 472.26190185546875, 387.88250732421875, 67.31857299804688, -382.5910949707031, -62.876556396484375, 110.21686553955078, 282.03973388671875, 86.86492156982422, 267.49896240234375, -65.60354614257812, 115.49974060058594, 25.863431930541992, 417.0491027832031, -108.4847640991211, 2.5740623474121094, 1001.303955078125, 697.8776245117188, 158.07130432128906, -30.764785766601562, 336.1758117675781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000324.npy"} +{"epoch": 0.47577092511013214, "step": 325, "batch_size": 64, "mean": 157.7469482421875, "std": 221.32850646972656, "min": -355.9023132324219, "p10": -77.53672866821287, "median": 143.03072357177734, "p90": 452.8746917724612, "max": 846.580810546875, "pos_frac": 0.8125, "sample": [-355.0663757324219, -355.9023132324219, 196.97659301757812, -8.963981628417969, 355.0848083496094, 250.85089111328125, 145.7379150390625, -43.52873229980469, 162.92892456054688, 262.8377685546875, 291.1019287109375, 488.97589111328125, 265.34259033203125, 163.8310546875, 156.6782684326172, 132.99119567871094, 9.706916809082031, 74.72053527832031, 48.59104919433594, 237.52450561523438, -118.80028533935547, 236.63812255859375, 283.8506774902344, 537.556884765625, 23.954303741455078, 28.005882263183594, 140.3235321044922, 115.81926727294922, 118.99891662597656, 17.642478942871094, 527.470947265625, -55.43943786621094, 846.580810546875, -12.03280258178711, 539.214599609375, 16.60198211669922, 480.63720703125, -152.0837860107422, 264.9840393066406, 225.3777618408203, -87.00699615478516, 129.88262939453125, 388.0954895019531, -16.462587356567383, 305.36895751953125, 295.7949523925781, 179.00100708007812, 289.24737548828125, 138.26492309570312, 152.60140991210938, 296.33074951171875, -307.765869140625, 23.231903076171875, 672.67138671875, 27.67169189453125, 26.744300842285156, 386.3934020996094, 87.49491119384766, -118.01974487304688, 247.26104736328125, 134.73731994628906, 34.6306037902832, 60.53227996826172, 203.38272094726562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000325.npy"} +{"epoch": 0.47723935389133626, "step": 326, "batch_size": 64, "mean": 145.47195434570312, "std": 194.49720764160156, "min": -387.2820739746094, "p10": -41.964133834838854, "median": 145.63098907470703, "p90": 385.21406860351567, "max": 623.6028442382812, "pos_frac": 0.796875, "sample": [-12.1405029296875, -25.602413177490234, 52.58009338378906, 388.82049560546875, 269.6610107421875, -10.964324951171875, 152.52899169921875, 149.76182556152344, 293.5530700683594, -244.75784301757812, -181.61700439453125, -387.2820739746094, 584.7457885742188, 52.090476989746094, -143.7689666748047, 141.50015258789062, 217.05152893066406, 118.7843017578125, 308.296142578125, 234.08489990234375, -7.514434814453125, 23.476470947265625, 156.5685577392578, 208.27944946289062, -26.689773559570312, 161.5740966796875, 608.796142578125, 196.1216583251953, 96.54367065429688, 97.71868896484375, 388.1070251464844, 204.06838989257812, 229.7108917236328, 121.86083984375, 208.0863037109375, 2.8045654296875, 51.767608642578125, 406.1544189453125, -48.51028823852539, 67.90475463867188, 118.63258361816406, -61.2559814453125, 81.8892822265625, 165.82794189453125, 60.701568603515625, 98.51988220214844, 157.44097900390625, 341.0638732910156, 238.86329650878906, 223.9142303466797, -0.5971527099609375, 378.4638366699219, 623.6028442382812, 105.2051010131836, 336.40966796875, 156.7845458984375, 368.95526123046875, 226.7254638671875, 13.795888900756836, -320.63916015625, 424.658935546875, 313.52227783203125, 78.46275329589844, 75.10216522216797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000326.npy"} +{"epoch": 0.4787077826725404, "step": 327, "batch_size": 64, "mean": 222.72535705566406, "std": 221.71058654785156, "min": -248.32630920410156, "p10": -94.48186569213867, "median": 248.22679901123047, "p90": 471.01643676757817, "max": 760.8448486328125, "pos_frac": 0.828125, "sample": [215.03982543945312, 175.8917694091797, 410.155029296875, 208.39999389648438, 262.73828125, 319.468994140625, 97.17752838134766, 253.7537841796875, 319.779541015625, 227.69451904296875, 323.2920837402344, 314.94024658203125, 117.84608459472656, 323.32891845703125, -87.54529571533203, 419.6227722167969, -45.63576889038086, 426.8402404785156, 344.2334899902344, 294.6495361328125, -209.93377685546875, 520.11474609375, 466.2928466796875, 287.708251953125, 149.4389190673828, 473.04083251953125, 301.75439453125, -179.8163604736328, 190.5887908935547, 266.5656433105469, 333.40570068359375, -104.04400634765625, -27.25640869140625, 177.4181671142578, 312.77410888671875, 168.68800354003906, 242.69981384277344, 263.2388916015625, 275.5964660644531, 161.99374389648438, 52.56349182128906, 82.60675048828125, -121.54116821289062, 453.53436279296875, 208.54580688476562, -97.45468139648438, 3.374286651611328, 413.81842041015625, 366.5394287109375, 143.41690063476562, 721.4030151367188, 73.59019470214844, 760.8448486328125, -248.32630920410156, 473.5534973144531, -52.968505859375, 85.10542297363281, 571.48046875, 94.07856750488281, 367.8140563964844, 413.3249206542969, 647.8685302734375, 86.41010284423828, -237.10025024414062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000327.npy"} +{"epoch": 0.4801762114537445, "step": 328, "batch_size": 64, "mean": 209.12203979492188, "std": 258.68145751953125, "min": -446.1654968261719, "p10": -61.02054748535156, "median": 193.39926147460938, "p90": 507.1850982666016, "max": 929.562255859375, "pos_frac": 0.71875, "sample": [226.5391082763672, -179.23110961914062, 160.32620239257812, 205.02798461914062, -28.69415283203125, 380.467529296875, 329.2895202636719, 119.25550842285156, 41.22947692871094, -446.1654968261719, 98.86658477783203, 505.5382995605469, 507.890869140625, 346.6542663574219, 363.67779541015625, 488.42437744140625, -41.20281219482422, -81.56474304199219, -39.65826416015625, 156.75592041015625, -56.93398666381836, -29.43609619140625, 323.8504333496094, 929.562255859375, 225.607177734375, 335.1534729003906, 374.4997253417969, 158.69479370117188, 664.7785034179688, 411.7586364746094, -116.31356811523438, 205.41854858398438, 287.71087646484375, -62.77193069458008, 116.32331848144531, 141.31613159179688, 314.3697204589844, 450.8025207519531, 181.77053833007812, 178.3040008544922, -8.165632247924805, 213.52981567382812, -96.48616027832031, 250.32504272460938, -32.78339385986328, 483.40533447265625, 140.2130889892578, 242.9452362060547, 273.03753662109375, 214.3375244140625, 611.822998046875, 472.9463195800781, 54.97865295410156, -53.317230224609375, -28.361507415771484, 817.7789306640625, 838.2005615234375, 69.49566650390625, 556.4287719726562, -27.10836410522461, -15.963371276855469, 296.311279296875, 92.8853759765625, -130.53787231445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000328.npy"} +{"epoch": 0.48164464023494863, "step": 329, "batch_size": 64, "mean": 218.89981079101562, "std": 300.8843994140625, "min": -810.2685546875, "p10": -87.71152038574218, "median": 183.1309356689453, "p90": 625.0733642578125, "max": 985.7703247070312, "pos_frac": 0.796875, "sample": [139.35726928710938, 390.41326904296875, 547.9524536132812, 331.60198974609375, 339.0403747558594, -132.19691467285156, 519.4761962890625, 33.466285705566406, 521.8173217773438, 183.611572265625, -12.1883544921875, 400.3188781738281, 75.44635772705078, -67.35722351074219, 80.68388366699219, 161.94815063476562, 202.34925842285156, 182.65029907226562, 58.351287841796875, 879.60498046875, 149.1455078125, 110.78974914550781, 148.6944122314453, 131.4952392578125, -810.2685546875, 376.8743591308594, 40.441429138183594, 985.7703247070312, 672.9124755859375, -94.38038635253906, 210.709716796875, -34.71000289916992, 391.34942626953125, 80.7947006225586, 220.2851104736328, 483.5408630371094, 639.8079833984375, 383.38568115234375, 736.2340087890625, 184.43467712402344, 19.11825942993164, -72.15083312988281, -183.2757110595703, 630.7001953125, 481.0054016113281, -144.98928833007812, 303.0652160644531, -130.35206604003906, 353.3150634765625, 153.5653076171875, 213.88909912109375, -55.12178039550781, 261.10589599609375, 249.46383666992188, 78.51758575439453, 49.397666931152344, 250.7424774169922, 467.77294921875, 817.5833740234375, -57.23046875, -316.2895812988281, 153.70663452148438, 611.944091796875, 30.451087951660156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000329.npy"} +{"epoch": 0.4831130690161527, "step": 330, "batch_size": 64, "mean": 235.27944946289062, "std": 270.6396789550781, "min": -287.7521667480469, "p10": -67.60581054687499, "median": 220.24581146240234, "p90": 502.33540039062507, "max": 962.2716064453125, "pos_frac": 0.796875, "sample": [490.212890625, -64.21783447265625, -158.65933227539062, -52.52763748168945, 369.3948974609375, 482.24847412109375, 144.50608825683594, 179.45086669921875, -23.186182022094727, 58.05754852294922, -141.11729431152344, 133.03436279296875, 136.41897583007812, 444.83544921875, 507.53076171875, 44.35388946533203, 296.7886047363281, 267.0201110839844, 273.1759033203125, 449.3501892089844, -54.80274963378906, 120.40432739257812, 314.6631164550781, 70.62193298339844, 182.5186767578125, 329.0164489746094, -251.41116333007812, 368.5846252441406, 239.8097686767578, 164.57244873046875, 454.3239440917969, 3.002704620361328, -287.7521667480469, 297.1682434082031, 489.3543395996094, 200.68185424804688, 59.55561828613281, 865.247802734375, 20.112525939941406, 156.98716735839844, -13.449073791503906, 463.68890380859375, 123.21577453613281, 962.2716064453125, -240.8936004638672, 849.876220703125, 765.7274169921875, 377.4294128417969, 405.6248779296875, 89.09495544433594, 354.7568054199219, 526.6287231445312, 195.44705200195312, 744.1487426757812, -11.610633850097656, 309.3965148925781, 260.95538330078125, 456.9267578125, -153.93875122070312, 411.666748046875, 45.255859375, 311.31121826171875, 314.080810546875, -69.05780029296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000330.npy"} +{"epoch": 0.4845814977973568, "step": 331, "batch_size": 64, "mean": 227.6820831298828, "std": 201.87876892089844, "min": -174.10000610351562, "p10": 15.539501190185563, "median": 198.41211700439453, "p90": 446.2546539306641, "max": 854.3159790039062, "pos_frac": 0.90625, "sample": [164.05381774902344, 744.6502685546875, 285.5531005859375, -27.990570068359375, 279.860595703125, 386.520263671875, 52.821075439453125, 48.53318786621094, 224.60862731933594, 145.33258056640625, 727.0174560546875, 324.9669494628906, 854.3159790039062, 31.36023712158203, 173.87379455566406, 362.8977966308594, 644.5108642578125, 117.32466125488281, 146.12310791015625, 222.97622680664062, 453.9986877441406, 428.18524169921875, 466.5025634765625, -77.21974182128906, 117.22714233398438, 188.2744598388672, 193.83592224121094, 315.1023864746094, 250.16845703125, -64.03436279296875, 245.4195098876953, -110.02607727050781, 313.443115234375, 193.8112335205078, 33.30065155029297, 175.72378540039062, -174.10000610351562, 202.98831176757812, 320.72613525390625, 594.2507934570312, 268.5122375488281, 193.00648498535156, 135.9503936767578, 374.80792236328125, 91.93729400634766, 97.34423828125, 251.87725830078125, 159.4771728515625, 257.98248291015625, 72.75859069824219, -168.35357666015625, 295.52734375, 107.24372863769531, 82.37217712402344, 337.1734313964844, 354.59637451171875, 301.13201904296875, 68.9207534790039, 165.8662567138672, 372.75140380859375, 145.30503845214844, 280.3493347167969, 339.46746826171875, 8.759185791015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000331.npy"} +{"epoch": 0.48604992657856094, "step": 332, "batch_size": 64, "mean": 162.39520263671875, "std": 233.09779357910156, "min": -195.85208129882812, "p10": -106.2402458190918, "median": 121.48944854736328, "p90": 459.81515197753913, "max": 947.96484375, "pos_frac": 0.765625, "sample": [722.3790283203125, 143.10958862304688, 150.01087951660156, 305.61553955078125, 19.31005859375, 20.606430053710938, 47.92668151855469, 425.8486328125, 413.161376953125, 184.60740661621094, -43.23487854003906, 61.16053771972656, -102.310302734375, 947.96484375, 184.99118041992188, 78.34749603271484, 128.7288818359375, 131.0206756591797, 81.69428253173828, -9.95567512512207, 135.53363037109375, 86.86802673339844, 464.4305725097656, -126.34214782714844, 355.17156982421875, 6.305103302001953, -112.30062866210938, 100.61211395263672, 627.93115234375, 215.78570556640625, 146.6237335205078, 214.53091430664062, 290.0056457519531, -107.92450714111328, -56.03253936767578, 79.16570281982422, 499.9503173828125, 495.5030212402344, -129.98764038085938, 248.69569396972656, 166.40127563476562, -195.85208129882812, 319.35015869140625, 60.79588317871094, 301.7121887207031, 688.9890747070312, 114.25001525878906, 449.04583740234375, -79.97122192382812, 82.93681335449219, -194.07305908203125, 225.91799926757812, 390.5665283203125, -141.90335083007812, 379.1773681640625, 27.27884292602539, -95.90141296386719, 2.437305450439453, -71.92701721191406, 145.57452392578125, 307.20416259765625, 113.3281478881836, 111.08268737792969, -38.63690185546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000332.npy"} +{"epoch": 0.48751835535976507, "step": 333, "batch_size": 64, "mean": 215.18930053710938, "std": 298.3913879394531, "min": -333.917236328125, "p10": -182.7840606689453, "median": 203.5207290649414, "p90": 574.4554565429688, "max": 949.4278564453125, "pos_frac": 0.75, "sample": [-31.821029663085938, 294.71087646484375, 949.4278564453125, -13.494583129882812, 108.1042251586914, 57.49815368652344, 298.9273681640625, 257.5733642578125, 39.90771484375, 39.974178314208984, 745.9312133789062, -172.78494262695312, 215.63934326171875, -228.07589721679688, 542.593017578125, -270.4250793457031, 699.1069946289062, -103.18319702148438, 653.018798828125, 428.1296691894531, -4.864200592041016, 303.8964538574219, 0.0689544677734375, 350.86968994140625, 459.31512451171875, 555.1519775390625, -187.06939697265625, 111.79193115234375, 60.2728271484375, 417.7791442871094, 150.3307647705078, 2.0727672576904297, -78.5137939453125, 517.0474243164062, 558.4368896484375, 420.18212890625, -265.8327941894531, 11.230236053466797, 553.9495849609375, 446.9399108886719, 441.43707275390625, -233.0352325439453, -14.276283264160156, 496.8572082519531, 466.64373779296875, -37.176429748535156, 198.43516540527344, 160.02455139160156, 740.640869140625, 261.5515441894531, 474.10479736328125, 288.8548278808594, 581.320556640625, 158.78067016601562, 208.60629272460938, -145.91639709472656, 400.4124755859375, 3.6482887268066406, 342.5771484375, -333.917236328125, 619.1239624023438, -314.4457702636719, 14.667526245117188, 99.38301086425781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000333.npy"} +{"epoch": 0.4889867841409692, "step": 334, "batch_size": 64, "mean": 187.9039306640625, "std": 294.8891906738281, "min": -549.85791015625, "p10": -125.66009597778319, "median": 150.69841766357422, "p90": 634.0444641113282, "max": 803.1802978515625, "pos_frac": 0.796875, "sample": [605.1008911132812, 9.590057373046875, -173.42620849609375, 485.5108642578125, 232.25967407226562, 40.529151916503906, -72.72905731201172, 42.588661193847656, 243.61329650878906, 28.243316650390625, 220.6317138671875, 13.036033630371094, 358.1537780761719, 211.64715576171875, -131.7790069580078, 200.54774475097656, 43.82017517089844, 424.2208251953125, -549.85791015625, 646.4488525390625, 51.795719146728516, 224.06048583984375, -7.731475830078125, -282.257568359375, 372.58343505859375, 256.6308288574219, 460.83465576171875, -402.980712890625, -220.72682189941406, -42.24348449707031, 33.95172119140625, -28.822887420654297, 123.36448669433594, 59.49774169921875, 737.7747192382812, 803.1802978515625, 312.2433166503906, 24.89666175842285, 77.37696838378906, 499.08453369140625, 83.13146209716797, 252.5162353515625, 553.2393798828125, 410.32281494140625, -77.06819152832031, 73.00213623046875, 767.88134765625, 38.060272216796875, 770.1214599609375, 481.68194580078125, -111.38263702392578, -385.60797119140625, 32.081787109375, 260.0702819824219, 201.65199279785156, 660.7373657226562, 453.3428649902344, 115.87336730957031, 45.84654235839844, 360.881591796875, 48.78517150878906, 674.2673950195312, 178.0323486328125, 207.7198486328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000334.npy"} +{"epoch": 0.49045521292217326, "step": 335, "batch_size": 64, "mean": 149.90032958984375, "std": 259.487548828125, "min": -294.9684143066406, "p10": -196.21173858642575, "median": 114.5254898071289, "p90": 436.3501831054688, "max": 973.1768188476562, "pos_frac": 0.734375, "sample": [171.04229736328125, 353.5323486328125, 430.3994140625, 10.34686279296875, 178.9490509033203, -71.46170806884766, 95.12866973876953, 438.9005126953125, 63.587894439697266, 42.83782196044922, -219.88771057128906, 49.31004333496094, 276.1690673828125, 250.72584533691406, 24.749610900878906, 631.1160278320312, 13.358940124511719, 106.12808990478516, -238.8546905517578, 226.1151123046875, -119.41213989257812, 53.94932556152344, 189.58489990234375, 130.47433471679688, 686.5004272460938, 533.6301879882812, 389.6106872558594, 264.62213134765625, 35.83479309082031, 629.7703857421875, 973.1768188476562, -272.0169677734375, 164.03573608398438, -294.9684143066406, 397.02752685546875, -8.718942642211914, 430.1692810058594, 7.262102127075195, 73.45266723632812, 496.0570068359375, -105.51397705078125, 336.3714904785156, -168.25430297851562, 120.25177001953125, 259.519775390625, -52.624900817871094, 208.0115203857422, -24.978988647460938, -239.02908325195312, -97.05963134765625, 108.79920959472656, -129.6800079345703, 406.857177734375, -233.41552734375, -156.3428955078125, 357.70611572265625, 263.01434326171875, 422.22479248046875, 45.45545959472656, -208.19349670410156, 287.9431457519531, 336.000244140625, 192.4769287109375, 71.84722900390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000335.npy"} +{"epoch": 0.4919236417033774, "step": 336, "batch_size": 64, "mean": 238.0438995361328, "std": 323.5335998535156, "min": -556.8159790039062, "p10": -94.99150238037109, "median": 223.46730041503906, "p90": 663.5703430175782, "max": 1126.6158447265625, "pos_frac": 0.84375, "sample": [330.54376220703125, 66.19939422607422, 863.3460693359375, 39.06951141357422, 433.4521484375, 237.7423095703125, 248.9287109375, -104.89192962646484, -54.121822357177734, 844.8743896484375, 260.1337890625, 381.20953369140625, 151.15615844726562, 446.9875183105469, 1126.6158447265625, -1.1123371124267578, 729.0440673828125, 388.9849548339844, 250.6155242919922, 413.08233642578125, -97.65574645996094, 77.56343078613281, 127.18418884277344, 277.39404296875, 133.3089599609375, 350.8799743652344, 115.7674560546875, 79.23773193359375, 631.5403442382812, 237.65673828125, 193.72079467773438, 63.39375305175781, 67.65788269042969, 209.27786254882812, 99.46241760253906, 87.235595703125, -449.671142578125, 677.2974853515625, -88.77493286132812, -205.63662719726562, 571.2954711914062, 22.464515686035156, 762.476318359375, 67.4349365234375, 441.445068359375, 61.699493408203125, -556.8159790039062, 92.1080551147461, 324.2490539550781, 316.7920227050781, 125.76283264160156, 72.0025405883789, 197.4818878173828, 63.672096252441406, 269.8246154785156, 535.13525390625, 455.32940673828125, 1057.78173828125, -331.7037353515625, 310.38751220703125, -436.8017578125, 374.9765319824219, 288.27630615234375, 510.83489990234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000336.npy"} +{"epoch": 0.4933920704845815, "step": 337, "batch_size": 64, "mean": 234.46226501464844, "std": 255.49624633789062, "min": -239.88829040527344, "p10": -101.54704971313475, "median": 209.59927368164062, "p90": 560.5297729492188, "max": 961.7635498046875, "pos_frac": 0.8125, "sample": [887.539306640625, 300.3265380859375, -193.95375061035156, 64.85186004638672, 543.5811767578125, 462.0758972167969, -31.974586486816406, 261.9462890625, 23.373729705810547, 408.8607177734375, 36.63208770751953, 272.778564453125, 269.38409423828125, 391.87445068359375, 422.1410217285156, -92.49329376220703, 214.0556640625, 187.9770965576172, -105.42723083496094, 494.24462890625, 208.1757354736328, 204.28952026367188, -6.965127944946289, 567.79345703125, 151.9290313720703, -134.3048095703125, 961.7635498046875, 67.98291015625, -141.05648803710938, -140.493896484375, 228.57321166992188, 190.4502410888672, 256.93597412109375, -176.25393676757812, 184.9128875732422, 211.02281188964844, 458.5583801269531, 5.0055999755859375, 272.6726989746094, 370.8155517578125, -6.730091094970703, 652.0319213867188, 69.58316040039062, 189.6763916015625, -239.88829040527344, 141.43295288085938, 789.653564453125, 477.504150390625, 334.95587158203125, 78.34749603271484, 205.97726440429688, 174.90277099609375, 650.8359985351562, 307.73883056640625, 159.0486297607422, 640.42431640625, 362.50201416015625, 201.40643310546875, 92.4060287475586, 343.6380615234375, 305.1399230957031, 255.19403076171875, -56.35585403442383, 316.5577087402344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000337.npy"} +{"epoch": 0.4948604992657856, "step": 338, "batch_size": 64, "mean": 219.0503387451172, "std": 213.3196258544922, "min": -215.43643188476562, "p10": -7.122251892089844, "median": 168.7763214111328, "p90": 500.81647033691405, "max": 763.3347778320312, "pos_frac": 0.875, "sample": [96.66474914550781, 499.9928894042969, 69.79084777832031, -215.43643188476562, 219.27200317382812, 109.13389587402344, -108.31201171875, 234.4988250732422, 612.7645263671875, 250.7890625, 106.98686218261719, 49.93693923950195, -54.24267578125, 468.3801574707031, 78.13519287109375, 276.2960205078125, 30.177215576171875, 524.67822265625, 334.4981384277344, 470.0140380859375, 610.3546142578125, -14.607093811035156, 234.92791748046875, 151.2846221923828, 261.315673828125, 32.43388366699219, 422.952880859375, -7.037101745605469, -56.299659729003906, 52.32860565185547, 190.3209991455078, 493.0896301269531, 449.78765869140625, 68.178466796875, 256.37957763671875, 394.8581848144531, 180.5645751953125, 44.787052154541016, 368.5547790527344, 147.1205596923828, 405.7433776855469, 39.483184814453125, 81.80999755859375, 126.57493591308594, 497.9591064453125, 28.166170120239258, 163.81463623046875, 124.2804183959961, 363.2449951171875, 131.85984802246094, 31.35125732421875, 24.462116241455078, 517.64599609375, 763.3347778320312, -7.158744812011719, 236.32395935058594, 494.52203369140625, 173.73800659179688, 341.0850830078125, 151.1671142578125, 587.7268676757812, -153.06277465820312, 58.665374755859375, 501.16943359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000338.npy"} +{"epoch": 0.49632892804698975, "step": 339, "batch_size": 64, "mean": 216.89700317382812, "std": 231.7227325439453, "min": -236.99472045898438, "p10": -54.77608985900878, "median": 221.35562896728516, "p90": 568.3123779296876, "max": 826.9299926757812, "pos_frac": 0.828125, "sample": [71.81271362304688, -122.00794219970703, -43.27011489868164, -40.415931701660156, 591.0228271484375, 25.085403442382812, 43.29621887207031, 165.40371704101562, 335.2525634765625, 34.373023986816406, 826.9299926757812, 2.3335113525390625, 101.19735717773438, 144.58938598632812, 294.662353515625, 372.523193359375, 15.970273971557617, 16.66283416748047, 78.06705474853516, 576.0738525390625, 253.24400329589844, -6.856437683105469, 237.33749389648438, -59.70722198486328, 254.25552368164062, 153.97483825683594, 617.647216796875, 28.151473999023438, 378.9117431640625, 617.59375, -120.04861450195312, 237.93972778320312, -236.99472045898438, 143.4408416748047, 462.3583068847656, 313.43572998046875, 179.65618896484375, -6.778711318969727, 242.13180541992188, 451.1503601074219, 136.1985321044922, 222.1636505126953, 390.03936767578125, 508.8324279785156, 108.49286651611328, 550.2022705078125, -131.98114013671875, 642.6986083984375, 268.7087707519531, -166.1591033935547, 259.715576171875, 193.70724487304688, 77.64097595214844, 220.547607421875, 46.071163177490234, 256.5369873046875, 230.7949981689453, -128.23309326171875, 417.0040283203125, 368.39617919921875, 524.2067260742188, 577.556396484375, 407.0296630859375, 270.8321533203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000339.npy"} +{"epoch": 0.4977973568281938, "step": 340, "batch_size": 64, "mean": 188.17184448242188, "std": 256.86224365234375, "min": -418.00311279296875, "p10": -119.0688758850097, "median": 166.60628509521484, "p90": 518.1206848144532, "max": 907.37939453125, "pos_frac": 0.78125, "sample": [176.78411865234375, 266.19964599609375, 87.8731460571289, 194.09326171875, 230.8927459716797, 465.18701171875, 593.6241455078125, -418.00311279296875, 253.696044921875, -47.729393005371094, 323.949951171875, 907.37939453125, 136.90460205078125, -237.27828979492188, 517.743896484375, -14.123497009277344, 302.15869140625, 125.03821563720703, -59.657142639160156, 94.26103210449219, 166.41151428222656, -143.58642578125, -341.6515808105469, 514.1956176757812, -164.9652557373047, 139.10891723632812, 60.89354705810547, 432.5928649902344, -45.47998046875, 308.2781982421875, 465.513671875, 45.11903381347656, 781.9313354492188, 661.4264526367188, -2.677614212036133, 129.84283447265625, 69.88165283203125, 296.1812744140625, 140.77345275878906, 194.44638061523438, 213.32095336914062, 93.872314453125, 206.55581665039062, 22.89205551147461, -154.29222106933594, 272.69976806640625, 249.2777099609375, -61.86125946044922, 518.2821655273438, 2.9474334716796875, 166.80105590820312, 439.8669738769531, 521.155029296875, 553.2645263671875, 336.4048156738281, 208.73776245117188, 115.24092102050781, 34.9914436340332, 367.99896240234375, 158.0799560546875, -264.999755859375, 383.2530212402344, -40.608642578125, 91.88762664794922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000340.npy"} +{"epoch": 0.49926578560939794, "step": 341, "batch_size": 64, "mean": 231.0950164794922, "std": 262.06024169921875, "min": -279.6058349609375, "p10": -84.45407714843749, "median": 229.64261627197266, "p90": 541.8378723144532, "max": 1159.6937255859375, "pos_frac": 0.796875, "sample": [1159.6937255859375, -98.79901123046875, -86.19203186035156, -135.2514190673828, 435.3365478515625, 553.423583984375, 704.2529296875, 494.8340148925781, 226.4256591796875, 256.9441833496094, -18.096513748168945, 198.59732055664062, 164.20877075195312, 283.62237548828125, 44.64503479003906, -55.364776611328125, -68.16377258300781, 363.62274169921875, 73.70537567138672, 285.42578125, -150.847412109375, 384.7967529296875, 414.23114013671875, 253.92910766601562, 69.91048431396484, 559.3603515625, 129.98001098632812, 233.4754638671875, -39.71690368652344, -80.39884948730469, 128.3520050048828, 453.630859375, 633.066650390625, 267.6620178222656, 275.275146484375, 401.9912414550781, 100.67909240722656, 17.16819953918457, -67.7399673461914, 422.9405517578125, 517.7894287109375, 252.4556121826172, 86.39158630371094, 93.64041900634766, -279.6058349609375, 549.5689697265625, 104.49311828613281, 232.8595733642578, -247.02731323242188, 366.78533935546875, 219.54946899414062, 257.0445251464844, 431.29632568359375, -90.6540298461914, 768.9207763671875, 40.94023132324219, 336.1639404296875, 482.2222900390625, 433.586181640625, 78.96405029296875, 185.88937377929688, 101.16390228271484, 523.7986450195312, 153.22799682617188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000341.npy"} +{"epoch": 0.5007342143906021, "step": 342, "batch_size": 64, "mean": 203.21817016601562, "std": 273.95648193359375, "min": -550.9515380859375, "p10": -120.31016616821287, "median": 173.42942810058594, "p90": 507.48280029296876, "max": 1036.9178466796875, "pos_frac": 0.796875, "sample": [695.46337890625, -550.9515380859375, -220.83782958984375, 542.564697265625, 79.46652221679688, 55.00270080566406, 156.93679809570312, 340.83404541015625, 429.5384521484375, 353.37799072265625, 122.95951843261719, 109.20561218261719, 195.58531188964844, 200.0918731689453, 499.2168273925781, 292.68743896484375, 86.46717834472656, 436.4625549316406, -186.1981201171875, -35.50404357910156, 430.45343017578125, -75.83531951904297, 84.39443969726562, -130.20904541015625, -129.95980834960938, 308.4800109863281, 957.7503051757812, 373.2171325683594, 119.01402282714844, -200.6997528076172, -67.80796813964844, 1036.9178466796875, 204.58322143554688, 187.17303466796875, 10.301040649414062, -89.6178207397461, 551.1253662109375, 431.6344909667969, 602.7645263671875, 187.9269256591797, 3.060670852661133, 174.87496948242188, 361.6598205566406, 14.061187744140625, 511.0253601074219, 375.0450744628906, 161.35296630859375, 395.4219055175781, 125.90301513671875, 432.08612060546875, 437.11175537109375, 92.62223052978516, -153.97982788085938, 80.32514953613281, 403.0655822753906, -97.7943344116211, 171.98388671875, -60.216224670410156, 186.9010009765625, 127.19841003417969, 209.31423950195312, 426.78466796875, 154.63804626464844, 79.54170989990234], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000342.npy"} +{"epoch": 0.5022026431718062, "step": 343, "batch_size": 64, "mean": 243.47164916992188, "std": 224.06832885742188, "min": -376.45672607421875, "p10": 14.596405029296886, "median": 230.14461517333984, "p90": 515.7219665527344, "max": 848.1041870117188, "pos_frac": 0.921875, "sample": [372.91912841796875, 204.76675415039062, 164.8036346435547, 150.57965087890625, 267.4865417480469, 9.9315185546875, 312.9255676269531, 355.341552734375, 208.8966064453125, 723.5328369140625, 72.36958312988281, -141.4864501953125, 252.55221557617188, 25.48114013671875, 197.83255004882812, 47.246334075927734, 81.94976806640625, 237.8202362060547, 251.14999389648438, 743.2468872070312, 223.9876251220703, 331.31256103515625, 121.43745422363281, 301.3902587890625, 372.05645751953125, 337.8451232910156, 135.33592224121094, 108.35140991210938, 848.1041870117188, 167.3414764404297, 49.29212188720703, 91.84713745117188, 331.5943908691406, 115.4915771484375, 265.8660583496094, 336.4154052734375, 274.23638916015625, 97.70401000976562, 7.240108489990234, 456.5354919433594, -376.45672607421875, 320.9675598144531, 159.87094116210938, 404.0660095214844, 253.847412109375, 784.1376953125, 483.11993408203125, 290.16290283203125, 215.89596557617188, 323.6714172363281, 169.32220458984375, 368.6878967285156, 28.75128173828125, -109.92816162109375, 496.46038818359375, 523.9769287109375, 172.07386779785156, 174.5823211669922, 163.13784790039062, 236.30160522460938, -43.80957794189453, 604.179443359375, 610.6673583984375, -184.23251342773438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000343.npy"} +{"epoch": 0.5036710719530103, "step": 344, "batch_size": 64, "mean": 211.33169555664062, "std": 292.8844299316406, "min": -307.1170654296875, "p10": -38.20401611328125, "median": 173.40929412841797, "p90": 558.83583984375, "max": 1500.6175537109375, "pos_frac": 0.78125, "sample": [47.679481506347656, -77.90630340576172, -19.279739379882812, -13.259841918945312, 46.5561408996582, 58.05596923828125, 182.8887939453125, 97.2900161743164, 78.08705139160156, 260.45654296875, -1.3785991668701172, 915.5072021484375, 135.08474731445312, 264.1190185546875, 177.00621032714844, 170.990966796875, 182.3535919189453, -307.1170654296875, -276.82489013671875, 315.151123046875, 240.3619842529297, 49.69500732421875, 202.42857360839844, 5.249866485595703, 230.30853271484375, 788.845703125, 582.9253540039062, 389.4647521972656, 104.30338287353516, -38.391845703125, 391.1974792480469, 235.73388671875, 282.9595947265625, 73.0257568359375, 173.24188232421875, -37.7657470703125, 361.6986083984375, -28.8504638671875, 309.11212158203125, 25.41450309753418, 547.3577880859375, 249.19277954101562, 173.5767059326172, 47.43730926513672, 83.21874237060547, 1500.6175537109375, 472.40045166015625, 304.0062255859375, -8.625547409057617, 581.9908447265625, -11.807395935058594, 518.1363525390625, 144.425537109375, 567.9193725585938, 39.14460372924805, -182.32972717285156, -163.1377716064453, 469.67095947265625, 544.971435546875, -233.34896850585938, 288.6415710449219, 362.4056091308594, 89.18992614746094, 563.7550048828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000344.npy"} +{"epoch": 0.5051395007342144, "step": 345, "batch_size": 64, "mean": 237.87557983398438, "std": 281.6604919433594, "min": -438.1977233886719, "p10": -70.02271728515623, "median": 195.13054656982422, "p90": 629.4166015625003, "max": 944.49267578125, "pos_frac": 0.8125, "sample": [-37.22388458251953, 678.625, 82.75823974609375, 385.5531311035156, 189.64996337890625, -229.32528686523438, 719.8302612304688, -167.23304748535156, 147.48287963867188, 477.2344970703125, -29.996999740600586, 298.77020263671875, 145.68438720703125, 430.4862976074219, 556.7327270507812, 108.16140747070312, -349.2793273925781, 356.20074462890625, 53.85195541381836, 83.85755920410156, 468.8708190917969, 402.6497802734375, 99.22434997558594, 392.80645751953125, 277.124267578125, 544.6883544921875, 128.47561645507812, 334.5335388183594, 83.04339599609375, 194.8404083251953, 35.1300048828125, 847.9840087890625, 450.9012451171875, 31.534896850585938, 97.8741455078125, 449.7477111816406, 454.498291015625, 453.05743408203125, 357.9273376464844, 206.6776885986328, 412.36566162109375, 944.49267578125, 95.07797241210938, 174.5125732421875, 413.09344482421875, 256.5116882324219, -41.66218185424805, 770.86865234375, 195.42068481445312, 474.129150390625, -55.63072967529297, -126.3973159790039, 196.55111694335938, 660.5668334960938, -11.198919296264648, -151.32281494140625, -76.19071197509766, 122.75460815429688, 706.9315795898438, 201.92538452148438, 141.232177734375, 143.17413330078125, -438.1977233886719, 1.6179046630859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000345.npy"} +{"epoch": 0.5066079295154186, "step": 346, "batch_size": 64, "mean": 224.0716094970703, "std": 267.63995361328125, "min": -628.772216796875, "p10": -96.83586349487302, "median": 224.06234741210938, "p90": 570.723553466797, "max": 808.4140014648438, "pos_frac": 0.796875, "sample": [318.0055847167969, 212.56996154785156, 21.44542694091797, 307.9814453125, 666.2012939453125, 546.0035400390625, 693.8778076171875, 196.1226348876953, 137.41709899902344, 770.2504272460938, -628.772216796875, 114.23155212402344, 401.2762451171875, 67.86825561523438, 327.3682556152344, 657.296630859375, -47.62615966796875, 316.943359375, 808.4140014648438, 120.88394165039062, 378.530029296875, 178.83657836914062, -137.86837768554688, 411.39202880859375, 314.77734375, 319.9234619140625, 370.6268615722656, 260.11376953125, 452.1483154296875, 64.02566528320312, 11.154233932495117, -107.65535736083984, 226.2029266357422, 50.04896545410156, 641.2168579101562, 221.92176818847656, 220.4002227783203, 130.70742797851562, 279.8194274902344, -178.0355987548828, -20.034828186035156, 579.6929931640625, 313.05615234375, 549.7948608398438, 208.0519561767578, 376.32666015625, 392.1484375, 355.4374694824219, -20.766590118408203, -69.53240966796875, 274.2144775390625, -71.59037780761719, 71.80050659179688, -213.09646606445312, 133.24632263183594, 512.9927978515625, 133.56369018554688, 150.2696533203125, 456.0361633300781, -244.35073852539062, -34.4130859375, -173.21337890625, 269.9779968261719, 294.92425537109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000346.npy"} +{"epoch": 0.5080763582966226, "step": 347, "batch_size": 64, "mean": 220.20404052734375, "std": 295.0769958496094, "min": -386.6219787597656, "p10": -121.00106887817383, "median": 176.42359924316406, "p90": 629.4262329101564, "max": 913.892578125, "pos_frac": 0.765625, "sample": [-386.6219787597656, 286.1881103515625, 427.31005859375, 360.0544738769531, 111.28551483154297, -56.023231506347656, 213.09841918945312, 281.73492431640625, 706.55078125, 21.99864959716797, 467.32098388671875, 31.565162658691406, 148.18544006347656, -118.34561157226562, 448.68792724609375, -320.04522705078125, -277.92645263671875, -118.21273803710938, 800.232177734375, 83.58824920654297, 586.5626831054688, 32.952030181884766, 177.41143798828125, 312.42620849609375, -224.22119140625, 543.10009765625, 570.186279296875, 347.55487060546875, -17.002647399902344, 714.5479736328125, 379.8228759765625, 662.5194091796875, 334.701171875, -52.226234436035156, 229.62884521484375, -122.13912200927734, 519.1170654296875, 144.55340576171875, 131.928955078125, -71.74327087402344, 175.43576049804688, 281.2651672363281, 83.27428436279297, 347.7645263671875, 145.42892456054688, 93.92373657226562, 121.87896728515625, -42.56177520751953, 913.892578125, 91.0781478881836, 379.46978759765625, 647.7963256835938, 184.0223846435547, 826.575927734375, -94.96002960205078, 37.444740295410156, 344.56439208984375, -208.76248168945312, 434.7796325683594, 2.5583343505859375, 477.6059265136719, -147.1201934814453, 139.49253845214844, 547.9046020507812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000347.npy"} +{"epoch": 0.5095447870778267, "step": 348, "batch_size": 64, "mean": 189.67080688476562, "std": 265.00286865234375, "min": -320.6519775390625, "p10": -117.59218978881833, "median": 190.4236602783203, "p90": 455.5255462646484, "max": 1138.271484375, "pos_frac": 0.75, "sample": [338.38836669921875, 77.97350311279297, 159.18441772460938, 430.7359619140625, 355.90313720703125, -58.552589416503906, 90.72510528564453, 345.592529296875, 378.133056640625, -320.6519775390625, 102.18031311035156, 61.047210693359375, 645.384521484375, 269.8117980957031, 136.6550750732422, 316.4888000488281, 13.898538589477539, 315.219482421875, 131.2049560546875, 109.54412078857422, 358.1109924316406, 296.4319763183594, -87.82384490966797, 1138.271484375, 451.6849670410156, 260.5068664550781, -65.41475677490234, 237.37542724609375, -48.39244079589844, 37.54729461669922, -82.31654357910156, -149.06581115722656, -159.38650512695312, 868.3060913085938, 599.40673828125, 349.25457763671875, 329.7330322265625, 292.8187255859375, 221.66290283203125, 4.41224479675293, -238.75172424316406, 433.225830078125, -17.61450958251953, 307.7171325683594, 271.3600158691406, 227.43789672851562, -211.31768798828125, 10.374032974243164, 553.7444458007812, 312.71337890625, 108.72306060791016, -37.15827941894531, 457.1715087890625, -262.86700439453125, 360.37652587890625, 229.36424255371094, 97.61206817626953, -2.6372337341308594, 29.400604248046875, 533.23974609375, 90.89694213867188, 330.6854248046875, -130.3500518798828, -66.40365600585938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000348.npy"} +{"epoch": 0.5110132158590308, "step": 349, "batch_size": 64, "mean": 216.3872528076172, "std": 311.50262451171875, "min": -523.735107421875, "p10": -100.53388519287107, "median": 154.6573486328125, "p90": 580.7486206054688, "max": 1232.559814453125, "pos_frac": 0.78125, "sample": [137.12603759765625, -359.4029541015625, 437.4070129394531, 125.3019027709961, 86.58535766601562, 149.15505981445312, 486.3140869140625, -109.9931869506836, 237.52841186523438, 412.0101623535156, 191.59115600585938, 510.248046875, 111.41211700439453, -54.414390563964844, 40.407012939453125, 16.138906478881836, -3.9861507415771484, 21.80406951904297, 248.95761108398438, -196.24606323242188, 536.9299926757812, 372.47674560546875, 383.6249084472656, 20.393735885620117, 341.49755859375, 15.297222137451172, 484.679443359375, -78.4621810913086, 121.42717742919922, 182.12210083007812, 790.5144653320312, 33.33519744873047, -35.0174674987793, 567.7794799804688, 200.80563354492188, -523.735107421875, -151.14950561523438, 115.63324737548828, 601.3690185546875, 41.070743560791016, 744.849365234375, -5.056243896484375, -279.2729797363281, -68.75862121582031, 559.733154296875, 655.486572265625, 909.446533203125, 183.91184997558594, 586.3068237304688, 160.15963745117188, 436.96923828125, 460.36810302734375, -15.088081359863281, 357.8921813964844, 287.48211669921875, 297.2568054199219, -220.86648559570312, 55.66184997558594, 15.562236785888672, 485.2790222167969, 404.05133056640625, 43.734649658203125, 52.57891845703125, 1232.559814453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000349.npy"} +{"epoch": 0.5124816446402349, "step": 350, "batch_size": 64, "mean": 209.05242919921875, "std": 289.9720153808594, "min": -606.6255493164062, "p10": -171.00472869873047, "median": 198.8621368408203, "p90": 532.5513977050783, "max": 943.2975463867188, "pos_frac": 0.828125, "sample": [817.62451171875, -384.323486328125, 106.91522979736328, 420.8197937011719, 321.0352783203125, 178.2570037841797, 471.57025146484375, 89.92990112304688, -144.78431701660156, 126.17240905761719, 107.55630493164062, 454.7051696777344, -420.18212890625, 161.2045440673828, 200.4522705078125, 5.723663330078125, 680.6370849609375, 465.087890625, 943.2975463867188, 290.3802795410156, 186.89495849609375, 267.76812744140625, -43.57250213623047, 467.8252258300781, 253.98956298828125, 709.6295166015625, 145.32177734375, -194.922607421875, 156.44625854492188, 49.82752227783203, 554.6517333984375, 106.29150390625, 175.26019287109375, 197.27200317382812, 322.7903137207031, 292.68841552734375, 146.55636596679688, 286.77301025390625, -251.50067138671875, 650.4625244140625, 187.7472381591797, 480.98394775390625, 366.4393310546875, -173.68212890625, 179.5970458984375, 234.04893493652344, 602.8392944335938, 424.4551086425781, 215.26296997070312, 322.40582275390625, -606.6255493164062, 419.46112060546875, 98.59159851074219, 246.74600219726562, 47.94903564453125, 230.2664794921875, -335.21246337890625, -91.2584228515625, 22.963523864746094, 183.75576782226562, 427.265625, 234.87667846679688, -164.75746154785156, 452.70257568359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000350.npy"} +{"epoch": 0.5139500734214391, "step": 351, "batch_size": 64, "mean": 252.74497985839844, "std": 318.532470703125, "min": -358.80255126953125, "p10": -105.29508819580073, "median": 224.76630401611328, "p90": 685.8322692871094, "max": 1172.0994873046875, "pos_frac": 0.765625, "sample": [84.82916259765625, 313.55096435546875, 537.9082641601562, 1054.3948974609375, 1172.0994873046875, 168.54115295410156, 403.1637878417969, 112.1473388671875, 272.60052490234375, 302.28985595703125, 50.23004150390625, 229.642333984375, 688.1254272460938, 810.4240112304688, 345.09429931640625, 375.0956726074219, 513.2069091796875, 616.5144653320312, 680.4815673828125, 5.137353897094727, -288.2640075683594, 390.0844421386719, 96.01425170898438, 200.08560180664062, 196.83999633789062, 359.0082702636719, 493.03094482421875, 228.8745574951172, -358.80255126953125, -50.02442932128906, -295.4675598144531, 163.55194091796875, -178.90069580078125, 96.83873748779297, -0.787322998046875, 865.1365966796875, 26.899497985839844, 130.07388305664062, 227.1419677734375, -0.6306533813476562, -128.98251342773438, 392.9728698730469, -18.657543182373047, 702.2907104492188, 88.62435913085938, -17.3868350982666, 222.39064025878906, -6.146820068359375, 20.03692626953125, -189.00611877441406, -35.627235412597656, 670.9004516601562, 192.01585388183594, 409.78216552734375, 734.3846435546875, -180.6837615966797, 301.40716552734375, 677.993896484375, 304.35845947265625, 285.9725036621094, 156.69053649902344, 371.454833984375, 229.0377960205078, -44.32494354248047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000351.npy"} +{"epoch": 0.5154185022026432, "step": 352, "batch_size": 64, "mean": 154.27169799804688, "std": 277.7617492675781, "min": -630.897705078125, "p10": -142.59793548583983, "median": 124.16315841674805, "p90": 492.4511657714844, "max": 964.2426147460938, "pos_frac": 0.6875, "sample": [370.6941833496094, 378.0517578125, 151.44464111328125, 477.8457336425781, -346.3918151855469, 113.37186431884766, 56.386268615722656, 428.7138671875, 83.61796569824219, 231.2806396484375, 640.8895263671875, 494.2449951171875, -57.78745651245117, 63.89427947998047, 224.33909606933594, -115.36402893066406, -84.7789306640625, 964.2426147460938, 290.1365051269531, -148.78744506835938, 175.8382568359375, 708.4384765625, 140.12278747558594, 423.6473388671875, 66.97633361816406, -103.56787872314453, -91.70303344726562, -264.99713134765625, 8.656471252441406, 109.94244384765625, -13.889801025390625, 488.26556396484375, 17.330535888671875, 29.725303649902344, 580.6672973632812, 129.6619873046875, -630.897705078125, 67.4289321899414, 230.869140625, 377.2275695800781, 223.09059143066406, 172.11764526367188, 173.6696319580078, -203.58670043945312, -71.49119567871094, -31.944366455078125, -183.63101196289062, 582.09912109375, 471.637451171875, -162.0402069091797, 266.2066650390625, -65.2603988647461, -128.15574645996094, 453.097900390625, 266.22845458984375, 212.24136352539062, 394.96063232421875, -19.17156219482422, 572.4995727539062, 118.6643295288086, -22.704925537109375, -100.12675476074219, 208.016357421875, 81.18502807617188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000352.npy"} +{"epoch": 0.5168869309838473, "step": 353, "batch_size": 64, "mean": 202.0507354736328, "std": 280.7708740234375, "min": -459.3804016113281, "p10": -83.51435012817383, "median": 142.86984252929688, "p90": 598.4275512695312, "max": 876.1688232421875, "pos_frac": 0.703125, "sample": [348.3616943359375, 380.8893737792969, 41.54857635498047, 464.9344482421875, 187.83377075195312, 439.531982421875, 47.42170715332031, -82.53789520263672, 576.3453369140625, 167.672119140625, 224.80093383789062, -42.96881866455078, 268.76171875, 572.4124145507812, 17.081939697265625, 418.093505859375, -112.61424255371094, -12.7978515625, 589.0533447265625, 706.4724731445312, 138.0071563720703, 286.7730407714844, -41.11777114868164, 103.29376220703125, -459.3804016113281, 412.7105712890625, -157.55084228515625, -51.27198028564453, -69.94269561767578, 269.354248046875, -15.556440353393555, 147.73252868652344, 322.8925476074219, 319.98297119140625, 43.623779296875, -117.28005981445312, 51.423797607421875, -152.59707641601562, 779.6221923828125, 1.7158050537109375, -54.57122802734375, 642.0028076171875, -57.959999084472656, 58.17304229736328, 119.04730987548828, 293.5189208984375, 334.38787841796875, 136.09117126464844, -39.795806884765625, 602.445068359375, 247.91897583007812, -68.37518310546875, 577.5819091796875, 385.9265441894531, 876.1688232421875, 792.021728515625, -193.91148376464844, 375.3765563964844, 622.46728515625, -83.93283081054688, 108.4616928100586, 41.098506927490234, -45.2855224609375, 249.65927124023438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000353.npy"} +{"epoch": 0.5183553597650514, "step": 354, "batch_size": 64, "mean": 227.90103149414062, "std": 227.87921142578125, "min": -145.05081176757812, "p10": -43.271263885498016, "median": 201.76913452148438, "p90": 482.2512115478516, "max": 935.43408203125, "pos_frac": 0.859375, "sample": [303.9041748046875, 485.113525390625, 329.4228515625, 34.052398681640625, 442.6803283691406, 201.85845947265625, 641.0870361328125, -81.32365417480469, 260.48431396484375, -105.86961364746094, -14.361396789550781, -142.53280639648438, 204.14720153808594, 935.43408203125, 460.7271423339844, 91.84041595458984, 18.395668029785156, 384.0247497558594, 437.5292053222656, 329.2491149902344, 16.639625549316406, 391.8421325683594, 148.81564331054688, 121.32876586914062, -137.63919067382812, 134.648681640625, 377.4793701171875, 193.82936096191406, 96.77220916748047, 345.28497314453125, 500.20306396484375, -132.29627990722656, 324.9912414550781, 259.6898193359375, 149.96292114257812, 177.91493225097656, -145.05081176757812, 475.5724792480469, 201.6798095703125, 29.455276489257812, 460.6644592285156, 15.913917541503906, 355.80548095703125, 202.935302734375, 81.34473419189453, 325.41015625, 91.0862045288086, 702.3811645507812, 393.7362060546875, -15.763275146484375, 18.07961082458496, 427.31451416015625, 404.11578369140625, 104.84780883789062, -55.060401916503906, 679.0970458984375, 40.14147186279297, 349.69677734375, 358.0356140136719, 97.47868347167969, 535.8135375976562, 111.83384704589844, 136.13705444335938, 17.63247299194336], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000354.npy"} +{"epoch": 0.5198237885462555, "step": 355, "batch_size": 64, "mean": 225.7357177734375, "std": 229.86846923828125, "min": -324.9645080566406, "p10": -30.32895526885985, "median": 210.5216522216797, "p90": 522.3421386718751, "max": 914.073974609375, "pos_frac": 0.828125, "sample": [139.15357971191406, 526.388916015625, 365.9240417480469, 512.899658203125, 69.18965911865234, 252.77101135253906, 250.92950439453125, 914.073974609375, -321.79095458984375, 255.42137145996094, 242.43551635742188, 593.9324951171875, -4.978706359863281, -17.74593162536621, 431.3343505859375, 584.7337036132812, -78.9520263671875, 546.7650756835938, 154.6210479736328, 130.48574829101562, 408.04290771484375, 256.9345703125, 0.8321990966796875, 1.0284233093261719, 378.05450439453125, 501.0787353515625, 461.6810302734375, 288.279052734375, 168.57447814941406, -3.6902618408203125, -70.78656005859375, 216.634521484375, 44.93061828613281, -324.9645080566406, 237.19473266601562, -136.9355010986328, 339.9219970703125, 579.5394897460938, 302.3478088378906, 670.2696533203125, 87.5799331665039, 185.2608642578125, 112.57106018066406, 167.88232421875, 179.46231079101562, 277.1622314453125, 111.51177215576172, 444.3353576660156, 89.34576416015625, 207.19430541992188, 475.07489013671875, 420.1667785644531, 292.9024658203125, 205.49191284179688, 109.75971984863281, -35.7216796875, 132.7085723876953, -48.595821380615234, 213.8489990234375, -7.652912139892578, 87.96456909179688, 377.8345947265625, 333.87835693359375, 160.56011962890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000355.npy"} +{"epoch": 0.5212922173274597, "step": 356, "batch_size": 64, "mean": 123.73868560791016, "std": 296.9876403808594, "min": -580.0505981445312, "p10": -233.57044830322263, "median": 112.79378890991211, "p90": 496.9706512451173, "max": 944.3052978515625, "pos_frac": 0.671875, "sample": [247.8245849609375, 38.892906188964844, 342.1179504394531, 77.1993637084961, 0.4556694030761719, -92.46987915039062, -116.64532470703125, 198.10287475585938, -116.01872253417969, 247.1649169921875, 292.59552001953125, 0.8722343444824219, 731.5053100585938, 463.7488708496094, -126.22630310058594, 610.2012939453125, -155.7764129638672, -188.0206756591797, -580.0505981445312, -244.06411743164062, 44.320167541503906, 784.2017211914062, -72.33659362792969, 120.44158935546875, 324.3758544921875, 511.20855712890625, -105.5578842163086, 394.5140075683594, -333.4460754394531, 70.10674285888672, -276.99267578125, 653.996337890625, 113.95576477050781, -281.1777648925781, 388.2528076171875, 114.43315124511719, 253.36907958984375, 345.93792724609375, -209.08522033691406, 51.51765441894531, -96.13417053222656, -102.7268295288086, 173.7303466796875, 223.7396240234375, 8.102838516235352, 281.22052001953125, 292.5920715332031, -192.0137176513672, 677.5839233398438, 220.70767211914062, 194.97613525390625, -2.279754638671875, -301.8314208984375, -5.693000793457031, 239.807373046875, 111.6318130493164, 126.17820739746094, 282.2066650390625, -376.0046691894531, 944.3052978515625, 377.60308837890625, 251.4455108642578, 8.229881286621094, 58.453773498535156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000356.npy"} +{"epoch": 0.5227606461086637, "step": 357, "batch_size": 64, "mean": 202.65982055664062, "std": 245.2743377685547, "min": -608.938232421875, "p10": -39.72139568328856, "median": 159.15419006347656, "p90": 559.5482910156251, "max": 772.615478515625, "pos_frac": 0.859375, "sample": [380.36004638671875, 571.0425415039062, 66.41859436035156, 634.849609375, 541.8399658203125, 46.29283142089844, 67.62269592285156, 414.825927734375, 143.15548706054688, 528.05517578125, 251.53074645996094, 102.11144256591797, -168.59317016601562, 569.9592895507812, -57.425079345703125, 140.16738891601562, 155.84024047851562, 162.4681396484375, 274.545654296875, -95.65797424316406, 729.0716552734375, 3.0835800170898438, 295.7194519042969, -9.240676879882812, -608.938232421875, 48.822994232177734, 314.32830810546875, 125.65868377685547, -31.41343116760254, 567.1375732421875, 306.3926086425781, -126.20734405517578, 487.6840515136719, -116.04428100585938, 379.9537353515625, 0.22712326049804688, 362.8873596191406, 772.615478515625, 286.93756103515625, 276.28338623046875, 7.388824462890625, 7.361181259155273, 295.2706298828125, 486.7042236328125, 52.89679718017578, 77.74171447753906, 53.99192810058594, 279.20147705078125, 18.550350189208984, 200.85067749023438, 5.148927688598633, 269.9248046875, 176.28433227539062, 7.7466888427734375, 303.01336669921875, 318.5137634277344, 117.74662780761719, 314.24395751953125, -43.281951904296875, 245.5934600830078, 112.61222839355469, 676.28564453125, 39.9957275390625, 152.07363891601562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000357.npy"} +{"epoch": 0.5242290748898678, "step": 358, "batch_size": 64, "mean": 192.69509887695312, "std": 250.69949340820312, "min": -390.21484375, "p10": -53.252734374999996, "median": 148.53872680664062, "p90": 494.21686401367197, "max": 883.833251953125, "pos_frac": 0.78125, "sample": [63.070220947265625, 645.0127563476562, 63.45201110839844, 263.0637512207031, 264.46881103515625, 345.68218994140625, 76.44224548339844, 289.3414306640625, 109.43359375, 45.59559631347656, 3.5593795776367188, -47.21258544921875, -113.39065551757812, -24.274551391601562, -3.453157424926758, 757.711669921875, -43.499794006347656, 470.55413818359375, 42.997222900390625, 117.31942749023438, 32.94768524169922, -2.5168609619140625, 19.18572235107422, 70.34244537353516, 158.16357421875, -390.21484375, 349.3166809082031, -145.04852294921875, 883.833251953125, 415.435791015625, 463.92138671875, 213.97006225585938, 304.298828125, -32.822975158691406, 593.9525146484375, 201.64190673828125, 5.448127746582031, 101.61614990234375, -95.15441131591797, 332.6430969238281, 307.674072265625, 406.8035583496094, -179.99136352539062, 315.89202880859375, 406.31231689453125, -55.84136962890625, 370.5783386230469, 695.5030517578125, -12.372661590576172, 111.47001647949219, 365.20623779296875, 163.90219116210938, 80.64517211914062, 13.786796569824219, 504.3580322265625, 862.4113159179688, 72.08257293701172, 219.37771606445312, 263.06982421875, -102.16201782226562, 194.40049743652344, 156.39395141601562, 225.4694366455078, 140.68350219726562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000358.npy"} +{"epoch": 0.5256975036710719, "step": 359, "batch_size": 64, "mean": 224.2485809326172, "std": 288.9275817871094, "min": -376.64849853515625, "p10": -124.15082244873045, "median": 176.7684783935547, "p90": 604.7479980468751, "max": 978.936279296875, "pos_frac": 0.765625, "sample": [-376.64849853515625, 239.898681640625, 182.69174194335938, 434.6165466308594, -133.59750366210938, 711.412109375, -213.62673950195312, 892.0838012695312, 113.41817474365234, -83.37483215332031, -149.9470977783203, -65.73197174072266, 380.0054016113281, 328.7365417480469, -179.57974243164062, 53.82972717285156, 389.5958251953125, -83.03729248046875, 501.70318603515625, -158.86233520507812, 279.8422546386719, 234.88815307617188, 532.326171875, 82.7417221069336, 10.647907257080078, 135.4964141845703, -42.265708923339844, 528.048095703125, 79.904296875, 46.35448455810547, 469.79052734375, 292.40350341796875, 978.936279296875, 449.45294189453125, -102.10856628417969, 58.428260803222656, 199.586181640625, 102.8189926147461, -154.3475341796875, 685.9373779296875, 135.4988250732422, 511.0741271972656, 252.70274353027344, 376.55694580078125, -23.578094482421875, 165.28038024902344, 503.2309265136719, 247.60040283203125, 131.65621948242188, 723.3580322265625, 53.12566375732422, -22.652145385742188, -99.77598571777344, 307.625244140625, 215.0445556640625, 614.1470947265625, 44.662841796875, 214.04469299316406, 781.8333740234375, 527.3495483398438, 122.27246856689453, 582.8167724609375, 164.7219696044922, 170.84521484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000359.npy"} +{"epoch": 0.527165932452276, "step": 360, "batch_size": 64, "mean": 172.14373779296875, "std": 206.3827667236328, "min": -165.85263061523438, "p10": -75.5314552307129, "median": 147.30007934570312, "p90": 446.4451385498048, "max": 652.3556518554688, "pos_frac": 0.796875, "sample": [54.903045654296875, 515.3936767578125, 4.463859558105469, 122.1954345703125, -35.88390350341797, 343.6920166015625, 58.018959045410156, 185.69900512695312, 386.2077941894531, 151.7951202392578, 149.0478515625, 511.9936218261719, -116.77891540527344, 323.84149169921875, 652.3556518554688, 111.66590881347656, 39.1193733215332, 46.96711730957031, 302.48297119140625, 96.97561645507812, 367.1550598144531, 195.0708465576172, -66.3033676147461, 250.0842742919922, 29.68056869506836, 342.09661865234375, 411.16595458984375, 80.46354675292969, 0.5912055969238281, 461.5647888183594, 533.4970703125, 366.251953125, 512.255126953125, -76.88313293457031, 55.982139587402344, 150.3088836669922, 205.9949951171875, 117.27981567382812, 395.8818359375, -52.647491455078125, 70.13197326660156, 607.05322265625, -142.64022827148438, -115.90182495117188, -84.29293823242188, 156.69827270507812, -165.85263061523438, 154.1070098876953, 13.372003555297852, -142.50221252441406, -18.810028076171875, 325.29473876953125, 17.757299423217773, 388.4170837402344, 409.719970703125, 260.5455322265625, 243.25914001464844, -72.3775405883789, 145.55230712890625, 403.8544006347656, 80.45936584472656, -72.37284088134766, 18.951622009277344, 353.1300964355469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000360.npy"} +{"epoch": 0.5286343612334802, "step": 361, "batch_size": 64, "mean": 176.93666076660156, "std": 258.85650634765625, "min": -433.2300109863281, "p10": -88.87191009521484, "median": 193.77503967285156, "p90": 530.8234741210938, "max": 786.9251708984375, "pos_frac": 0.6875, "sample": [30.74092674255371, 209.01016235351562, 19.526012420654297, 366.79327392578125, -65.8343276977539, -303.2352600097656, -47.500282287597656, 195.66500854492188, 526.5777587890625, 141.11607360839844, 196.56472778320312, 243.70339965820312, -433.2300109863281, 415.43218994140625, -42.69136047363281, -76.56024932861328, 191.88507080078125, 151.9694366455078, -310.826416015625, -7.133241653442383, 208.9725341796875, -88.73604583740234, -7.351255416870117, -18.859905242919922, 226.91600036621094, 747.2235107421875, 351.38592529296875, 439.78045654296875, 113.40618896484375, 435.11865234375, -76.89360046386719, -37.39077377319336, 215.52626037597656, 199.90225219726562, 480.04083251953125, 547.3516845703125, -116.24891662597656, 786.9251708984375, 687.1710815429688, 525.7738647460938, 179.48667907714844, 202.28440856933594, 532.64306640625, 392.65869140625, 166.14743041992188, 196.82827758789062, 100.72325897216797, 212.32449340820312, -53.41302490234375, -88.93013763427734, 51.46062469482422, -62.13437271118164, 551.1358032226562, -7.818153381347656, 349.1455078125, 276.28240966796875, 148.70318603515625, -134.2550048828125, 262.4823913574219, 50.37278747558594, 244.24359130859375, -245.28329467773438, 557.1740112304688, 419.69757080078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000361.npy"} +{"epoch": 0.5301027900146843, "step": 362, "batch_size": 64, "mean": 276.5138854980469, "std": 268.0620422363281, "min": -169.37771606445312, "p10": -28.023215484619133, "median": 232.1100616455078, "p90": 620.218798828125, "max": 1302.0435791015625, "pos_frac": 0.859375, "sample": [110.737548828125, 152.47161865234375, 528.31396484375, 578.9345703125, 294.3116149902344, 10.177558898925781, 1302.0435791015625, 269.90399169921875, 141.59515380859375, 654.5501708984375, 504.1483459472656, 123.12967681884766, 61.556983947753906, 156.31350708007812, 263.734130859375, 462.7675476074219, 152.8663787841797, -30.921911239624023, 105.19000244140625, 376.9049072265625, 480.44287109375, 100.67453002929688, 20.2231502532959, -65.43753051757812, -12.778610229492188, -21.259592056274414, 150.42599487304688, 223.38539123535156, -114.30321502685547, 693.7906494140625, 589.7765502929688, 114.54920959472656, -86.955810546875, 256.9780578613281, 265.5118408203125, 178.62855529785156, 609.1427001953125, -48.105499267578125, 214.2425079345703, 624.9656982421875, 240.83473205566406, 108.57522583007812, 718.568115234375, 478.8663024902344, 152.86489868164062, 151.6644287109375, 395.29229736328125, 181.16073608398438, 70.45863342285156, 776.7134399414062, 242.22018432617188, 762.1973266601562, 475.0221862792969, 377.469482421875, 282.76861572265625, 462.4358825683594, 513.1502075195312, -93.10614013671875, 191.74195861816406, 368.9721984863281, 283.1351318359375, 325.4558410644531, 7.178558349609375, -169.37771606445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000362.npy"} +{"epoch": 0.5315712187958884, "step": 363, "batch_size": 64, "mean": 278.2501220703125, "std": 272.1302490234375, "min": -262.06915283203125, "p10": -29.339080047607414, "median": 258.8816375732422, "p90": 643.0626098632813, "max": 1288.8670654296875, "pos_frac": 0.859375, "sample": [-262.06915283203125, 365.2354431152344, 579.8530883789062, 243.16433715820312, 115.56925964355469, 217.63720703125, 294.52093505859375, 257.99237060546875, 261.206298828125, 153.3634490966797, 360.6451416015625, 259.38787841796875, 360.29205322265625, -7.656852722167969, 363.8011474609375, 863.813720703125, -33.201202392578125, 623.6043701171875, 83.20774841308594, 703.3868408203125, 456.8634948730469, 60.55364227294922, 308.39422607421875, 391.138427734375, 331.4158630371094, 418.28533935546875, 1288.8670654296875, 514.2841796875, -64.4935302734375, 605.6915893554688, 153.67189025878906, 230.61537170410156, 18.550994873046875, 263.45587158203125, 450.40936279296875, 50.24793243408203, 226.93783569335938, -38.54632568359375, 651.40185546875, 463.26959228515625, 412.76617431640625, -127.49483489990234, 40.904930114746094, 420.62603759765625, 36.579566955566406, 408.62213134765625, 679.5469360351562, 4.61785888671875, 252.71197509765625, 394.9355163574219, -36.095924377441406, 135.87646484375, -108.5083236694336, 416.4530029296875, 258.3753967285156, 21.32807159423828, 17.64550018310547, 229.90328979492188, 46.78921127319336, 29.89289093017578, 694.102294921875, 691.7026977539062, -20.32746124267578, 322.285400390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000363.npy"} +{"epoch": 0.5330396475770925, "step": 364, "batch_size": 64, "mean": 206.77392578125, "std": 282.0346984863281, "min": -514.3486938476562, "p10": -87.62141647338866, "median": 203.9774627685547, "p90": 519.6166381835938, "max": 930.427490234375, "pos_frac": 0.796875, "sample": [431.2198791503906, 239.88572692871094, -225.3948211669922, 37.329734802246094, -514.3486938476562, 589.436279296875, -236.06350708007812, 373.32208251953125, 229.19589233398438, 905.2470092773438, 207.8909149169922, 633.9574584960938, 440.048583984375, 353.8331298828125, -60.91413116455078, 71.03593444824219, 54.95143127441406, -288.1734619140625, 323.4681091308594, -46.589599609375, -92.98699951171875, 416.90264892578125, 223.98892211914062, 426.2620849609375, 313.910400390625, 21.863954544067383, 5.579494476318359, 507.68048095703125, 200.0640106201172, 294.11810302734375, -27.962265014648438, 706.29296875, 124.7328872680664, 15.406585693359375, 113.47724151611328, 91.88949584960938, 459.4336242675781, 328.5985412597656, 289.031494140625, -123.39804077148438, 113.04235076904297, 515.8560180664062, 695.4214477539062, 107.33741760253906, 326.42706298828125, -25.96076202392578, -4.466531753540039, 386.14154052734375, 521.2283325195312, 360.6866149902344, 12.744487762451172, 187.37588500976562, 43.073211669921875, 167.21957397460938, 930.427490234375, 62.98291015625, 308.35260009765625, 303.5256042480469, -75.10172271728516, 257.5251159667969, 23.338897705078125, 151.05810546875, 463.615234375, -412.5424499511719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000364.npy"} +{"epoch": 0.5345080763582967, "step": 365, "batch_size": 64, "mean": 221.8110809326172, "std": 262.0715026855469, "min": -225.78330993652344, "p10": -74.76783447265622, "median": 167.2810821533203, "p90": 515.403973388672, "max": 1251.3406982421875, "pos_frac": 0.84375, "sample": [170.73336791992188, 130.18917846679688, 140.92694091796875, 125.50338745117188, 106.0483169555664, 137.34796142578125, 354.2581787109375, 117.94098663330078, 259.1622009277344, 169.29263305664062, 308.56182861328125, 485.62432861328125, -86.61038208007812, 224.08792114257812, 72.68067932128906, -47.135223388671875, 88.78657531738281, 707.70947265625, 559.427490234375, 295.7680358886719, 148.02728271484375, 72.65648651123047, 84.17623138427734, 18.890960693359375, 242.3701934814453, -117.1776123046875, 90.00749206542969, 34.73410415649414, 204.09262084960938, 165.26953125, -118.63117980957031, -36.73672103881836, 118.60968780517578, 40.541385650634766, 432.370361328125, 663.0087280273438, 438.90972900390625, 67.35432434082031, -107.79083251953125, 501.09930419921875, 286.036865234375, -200.81814575195312, 232.22560119628906, 8.083658218383789, 14.485326766967773, 287.5585632324219, 265.3354187011719, 286.2047119140625, 373.26141357421875, -144.49017333984375, 59.341468811035156, 402.55120849609375, -1.529500961303711, 279.87603759765625, 274.93194580078125, 329.1527404785156, 1251.3406982421875, 521.5345458984375, 490.8909912109375, -225.78330993652344, 799.867431640625, 787.3084716796875, 127.66996765136719, 428.78814697265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000365.npy"} +{"epoch": 0.5359765051395007, "step": 366, "batch_size": 64, "mean": 222.82901000976562, "std": 290.5567626953125, "min": -334.91253662109375, "p10": -131.32156829833983, "median": 199.9437255859375, "p90": 603.9563598632812, "max": 975.8141479492188, "pos_frac": 0.78125, "sample": [-178.3370361328125, 207.57061767578125, 306.7945251464844, 110.91847229003906, -309.0106201171875, 35.44599914550781, -104.25027465820312, 67.15875244140625, -127.3450927734375, 479.43670654296875, 74.71569061279297, -221.37179565429688, 486.7572937011719, 662.3225708007812, 212.7114715576172, -91.12440490722656, 344.6890563964844, 25.83391571044922, 164.59288024902344, -78.07856750488281, 324.5382995605469, 595.4917602539062, 379.8739318847656, 16.031082153320312, 607.5840454101562, 975.8141479492188, 363.3875732421875, 122.31380462646484, 581.163818359375, 192.31683349609375, 183.58596801757812, -76.76028442382812, -234.04824829101562, 660.2659301757812, 361.21966552734375, 365.409912109375, 846.1091918945312, 351.28948974609375, 231.98895263671875, 141.19325256347656, 243.66763305664062, 148.9573974609375, 86.23675537109375, -172.13897705078125, 539.3721313476562, -133.02577209472656, 325.461181640625, 65.84996032714844, 536.261962890625, 593.3414306640625, 384.38134765625, 304.88653564453125, 734.4830322265625, 331.1398010253906, 735.8528442382812, 249.11807250976562, -69.71017456054688, 29.202072143554688, -101.2818603515625, 138.50289916992188, 115.0107650756836, 134.87762451171875, 317.3227233886719, -334.91253662109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000366.npy"} +{"epoch": 0.5374449339207048, "step": 367, "batch_size": 64, "mean": 186.90333557128906, "std": 238.9384002685547, "min": -500.6032409667969, "p10": -73.23221511840819, "median": 140.16596221923828, "p90": 502.54587097167973, "max": 799.8734130859375, "pos_frac": 0.8125, "sample": [45.76017761230469, 122.1705322265625, 462.0465087890625, 71.693603515625, 627.0469970703125, 10.351438522338867, 475.13458251953125, 131.09890747070312, 509.04376220703125, 71.6525650024414, 27.509048461914062, 14.81260871887207, -65.28839111328125, 519.7479858398438, 457.6923828125, 245.1343994140625, 420.48956298828125, 129.42050170898438, 226.6813507080078, -76.63671112060547, 304.1075744628906, 412.2551574707031, -36.46479034423828, 3.9834823608398438, 332.3192443847656, 289.8069763183594, 799.8734130859375, -54.22154235839844, 566.9968872070312, 90.79644775390625, 411.4115905761719, 187.19593811035156, 11.53592300415039, 326.63458251953125, 64.42634582519531, 230.19482421875, 316.73223876953125, 333.42132568359375, 417.55914306640625, 91.06294250488281, 161.29464721679688, -235.39743041992188, -202.2199249267578, -78.074462890625, -97.39697265625, 70.36080169677734, 87.20132446289062, 405.8268737792969, 144.90406799316406, 54.732818603515625, -225.31716918945312, -500.6032409667969, 135.4278564453125, 122.92935180664062, 267.305419921875, 348.0206298828125, 487.3841247558594, 315.3201904296875, 525.0726928710938, 13.374801635742188, 150.05091857910156, -7.058799743652344, 512.127685546875, -18.642578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000367.npy"} +{"epoch": 0.5389133627019089, "step": 368, "batch_size": 64, "mean": 286.42449951171875, "std": 226.30055236816406, "min": -225.81858825683594, "p10": 4.588896942138673, "median": 300.4633026123047, "p90": 556.7817382812501, "max": 1036.411865234375, "pos_frac": 0.921875, "sample": [299.79833984375, 376.8575134277344, 219.18345642089844, 490.2948913574219, 391.98309326171875, 581.4668579101562, 178.17755126953125, 27.887680053710938, 532.0224609375, 206.04360961914062, 3.4133644104003906, 415.3477783203125, 477.45611572265625, 4.081264495849609, 301.1282653808594, 344.2088317871094, 289.4712829589844, 190.05430603027344, -72.17029571533203, 430.7484436035156, 206.87979125976562, 259.39691162109375, 597.436767578125, 335.478515625, 349.3165283203125, 125.32705688476562, 397.00152587890625, 5.773372650146484, 67.71600341796875, 40.672874450683594, 157.28013610839844, 470.3797607421875, 383.0622253417969, 382.9200439453125, 394.3975830078125, 563.0296630859375, 395.37103271484375, 18.688905715942383, 182.23277282714844, -41.269493103027344, 319.39044189453125, 542.2032470703125, 474.6313781738281, 601.6947021484375, 143.2366180419922, 320.8206787109375, 666.2593383789062, 21.263198852539062, -225.81858825683594, 99.46978759765625, 105.53351593017578, 511.1685485839844, 466.3869323730469, 505.8788757324219, 358.5680236816406, 18.259153366088867, 209.16848754882812, 1036.411865234375, 160.3609619140625, 223.3074951171875, 290.420654296875, -27.607315063476562, -85.12040710449219, 616.7339477539062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000368.npy"} +{"epoch": 0.540381791483113, "step": 369, "batch_size": 64, "mean": 144.11106872558594, "std": 285.2254943847656, "min": -662.96337890625, "p10": -185.36820220947263, "median": 127.80835723876953, "p90": 520.0304779052735, "max": 827.2373046875, "pos_frac": 0.765625, "sample": [-285.6056213378906, -149.56814575195312, 827.2373046875, -92.64522552490234, -248.92567443847656, 4.18855094909668, -64.66487884521484, 303.65960693359375, 36.186641693115234, 679.16357421875, 233.01406860351562, -130.93438720703125, 130.72665405273438, 319.20294189453125, 301.81103515625, 803.587158203125, 505.2646179199219, 114.10803985595703, 11.96860122680664, 396.1366882324219, 296.76141357421875, 100.70384216308594, 94.24984741210938, 266.0636901855469, 201.30953979492188, 175.69232177734375, -511.66937255859375, 261.2530517578125, 179.01821899414062, 129.2945556640625, 220.23556518554688, 310.3756103515625, 119.48711395263672, 126.32215881347656, 175.02444458007812, 82.43550872802734, 336.5901184082031, 79.0167465209961, -157.44740295410156, 385.96868896484375, 526.3587036132812, -205.4956512451172, 688.552001953125, 23.923995971679688, 471.0660095214844, 204.07366943359375, 390.37847900390625, -426.0792541503906, 0.16925430297851562, 558.1821899414062, 156.95559692382812, 118.77693176269531, -662.96337890625, 79.56620025634766, 139.29205322265625, -197.33425903320312, -130.51356506347656, 96.19963836669922, 101.08325958251953, 555.2603759765625, 156.28854370117188, -55.449989318847656, 108.30172729492188, -38.08171844482422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000369.npy"} +{"epoch": 0.5418502202643172, "step": 370, "batch_size": 64, "mean": 152.9559783935547, "std": 291.0430603027344, "min": -433.74688720703125, "p10": -215.42227630615233, "median": 161.16058349609375, "p90": 556.5637268066407, "max": 893.702392578125, "pos_frac": 0.71875, "sample": [54.237220764160156, 285.5188293457031, 260.65606689453125, -419.1605224609375, 645.0167846679688, 493.43182373046875, 108.47846221923828, 290.08154296875, 22.402921676635742, -125.2692642211914, 424.57012939453125, 530.538330078125, 42.593414306640625, 142.477294921875, -118.87397766113281, 629.771728515625, 228.70355224609375, 237.686767578125, 220.65756225585938, 893.702392578125, 810.390380859375, 280.69342041015625, 270.2837219238281, -170.90170288085938, 281.6763610839844, -86.24906158447266, 5.54644775390625, 320.41522216796875, -207.80203247070312, 175.35382080078125, 567.7174682617188, 186.546142578125, 37.55046081542969, 305.74725341796875, -23.846599578857422, 415.9362487792969, 239.83053588867188, -179.92437744140625, -378.461181640625, 29.110410690307617, 158.70303344726562, 229.35595703125, -218.68809509277344, 96.69729614257812, 441.02667236328125, 95.56207275390625, 101.84771728515625, 297.3499755859375, 227.02854919433594, -433.74688720703125, -82.80216979980469, -48.95989990234375, 102.35260009765625, 163.61813354492188, -303.7436218261719, -278.3916320800781, 10.509513854980469, -315.88629150390625, -58.45071029663086, 526.8826904296875, 579.6236572265625, 257.056640625, -57.06146240234375, 572.464599609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000370.npy"} +{"epoch": 0.5433186490455213, "step": 371, "batch_size": 64, "mean": 211.5328369140625, "std": 305.36083984375, "min": -555.2072143554688, "p10": -116.16462173461913, "median": 167.8176040649414, "p90": 653.924353027344, "max": 933.1671752929688, "pos_frac": 0.75, "sample": [346.250732421875, 259.5673522949219, 498.0272521972656, 72.07393646240234, -555.2072143554688, 141.86264038085938, 531.0560913085938, 166.09884643554688, 874.1651000976562, 335.9478759765625, 504.84503173828125, 21.13686180114746, 322.4808349609375, 223.8604736328125, -312.0935363769531, 86.06819152832031, -4.474884033203125, 196.91891479492188, 494.1829833984375, 44.21563720703125, -31.761945724487305, -238.06861877441406, 692.5279541015625, -118.78401184082031, -200.08135986328125, 159.22474670410156, 907.5377807617188, 702.7280883789062, -236.08607482910156, 8.502531051635742, 678.140380859375, -42.24803161621094, 374.8764343261719, 294.50872802734375, -97.20763397216797, -94.85304260253906, 597.4202880859375, -110.0527114868164, 256.35064697265625, 473.9091796875, -67.22891235351562, 117.15918731689453, 714.5341796875, 77.8843765258789, 13.97610092163086, 253.83773803710938, 294.8247985839844, 435.20526123046875, 507.27685546875, -29.218521118164062, 426.38427734375, 139.53533935546875, 100.86036682128906, 336.9678955078125, 169.53636169433594, 169.662109375, 128.4042205810547, 67.53379821777344, -21.789291381835938, 471.9393310546875, 241.49432373046875, 933.1671752929688, -173.4009246826172, 5.988777160644531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000371.npy"} +{"epoch": 0.5447870778267254, "step": 372, "batch_size": 64, "mean": 246.97903442382812, "std": 337.7304382324219, "min": -424.3111572265625, "p10": -146.55095672607422, "median": 154.3909912109375, "p90": 738.8876586914063, "max": 1316.199951171875, "pos_frac": 0.765625, "sample": [332.16314697265625, 340.65228271484375, 695.8573608398438, -300.8648681640625, 23.531936645507812, 729.140380859375, 552.672607421875, 231.71347045898438, 321.4162292480469, -19.083343505859375, 1316.199951171875, 361.48876953125, -5.62115478515625, 156.1480712890625, -102.48989868164062, 104.89221954345703, 60.32175064086914, 66.32422637939453, 763.3995361328125, -199.8675537109375, 210.0777130126953, 139.76348876953125, 669.210205078125, -144.20852661132812, 519.6446533203125, 130.93893432617188, 87.95455169677734, 149.08169555664062, 81.362060546875, 152.6339111328125, 115.98556518554688, 357.5380554199219, 431.65240478515625, -158.84750366210938, 147.08131408691406, -122.7125015258789, 378.8746032714844, 743.0650634765625, 58.81598663330078, 1071.9991455078125, 518.5758056640625, 398.9105224609375, 152.63279724121094, -237.83456420898438, 381.7634582519531, 516.9227294921875, 774.5266723632812, 253.82591247558594, -12.285263061523438, 381.85638427734375, 106.98904418945312, 265.2073974609375, 112.49971008300781, -51.816673278808594, 780.9595947265625, 179.16319274902344, -2.717531204223633, 150.5467529296875, -147.5548553466797, 873.7578125, -241.63223266601562, -424.3111572265625, 449.50482177734375, 179.26174926757812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000372.npy"} +{"epoch": 0.5462555066079295, "step": 373, "batch_size": 64, "mean": 239.539794921875, "std": 280.9273681640625, "min": -569.7874755859375, "p10": -60.84494857788085, "median": 224.2378692626953, "p90": 617.1910034179688, "max": 842.0826416015625, "pos_frac": 0.8125, "sample": [148.48028564453125, -161.5550079345703, -209.5584716796875, 268.25860595703125, 17.884904861450195, 842.0826416015625, 415.62982177734375, 137.8546600341797, 296.3531799316406, 174.55300903320312, 587.5840454101562, 249.33876037597656, -53.999542236328125, 264.33416748046875, 390.0818786621094, 40.98224639892578, 29.317211151123047, 716.9075317382812, 331.2644348144531, -63.77869415283203, -357.323486328125, -93.70651245117188, 207.97409057617188, -569.7874755859375, 140.43475341796875, 56.261993408203125, 179.35226440429688, 139.72393798828125, 621.82177734375, 369.2856140136719, 223.4581756591797, 448.71246337890625, 320.81512451171875, 531.4951782226562, 451.5916442871094, 72.31501770019531, -171.80752563476562, 606.3858642578125, 317.604248046875, -18.075851440429688, 830.1809692382812, 145.97137451171875, 403.73040771484375, 215.73394775390625, 346.7018737792969, 443.5205078125, 320.5823669433594, 225.01756286621094, 564.9942016601562, 788.8724365234375, 255.0555419921875, 21.375244140625, 102.2073974609375, 157.05511474609375, -3.6223297119140625, 285.19854736328125, 112.88209533691406, 100.08724975585938, 771.4614868164062, 360.57940673828125, 378.1553039550781, 651.5628051757812, -36.31791687011719, -8.985969543457031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000373.npy"} +{"epoch": 0.5477239353891337, "step": 374, "batch_size": 64, "mean": 215.2000732421875, "std": 357.7436218261719, "min": -800.4982299804688, "p10": -130.60262298583982, "median": 142.58795928955078, "p90": 522.6642333984375, "max": 1285.9747314453125, "pos_frac": 0.765625, "sample": [-138.56842041015625, 123.9933090209961, -86.25550842285156, 1285.9747314453125, 380.581787109375, 1120.2998046875, 330.144775390625, 615.24609375, 426.4881591796875, 32.271202087402344, 139.38992309570312, 352.0225524902344, 31.68145751953125, 212.49322509765625, 65.58114624023438, -50.14698028564453, 485.97540283203125, 100.97293090820312, 506.99810791015625, 345.8737487792969, -84.6602783203125, 57.89022445678711, 401.067626953125, 39.83393859863281, -31.177188873291016, 960.3485107421875, 46.004554748535156, 81.87705993652344, -175.93296813964844, 347.17108154296875, 280.2792053222656, 279.7377014160156, -170.62576293945312, 973.1715698242188, -510.129638671875, 106.71318054199219, 21.822189331054688, 515.5440063476562, 61.63823699951172, -38.474853515625, 145.78599548339844, 120.73694610595703, 80.22272491455078, 87.3311996459961, 194.48744201660156, 296.1213073730469, -154.3786163330078, 477.0317687988281, 301.47039794921875, 445.86700439453125, 104.50938415527344, 525.7157592773438, -112.01576232910156, 395.7195129394531, 297.3232727050781, -70.46562957763672, -154.14169311523438, 180.77789306640625, -2.4892425537109375, 156.88121032714844, 1189.4637451171875, -800.4982299804688, 172.9630126953125, 451.2691955566406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000374.npy"} +{"epoch": 0.5491923641703378, "step": 375, "batch_size": 64, "mean": 197.35800170898438, "std": 318.6018981933594, "min": -742.9003295898438, "p10": -198.37783813476562, "median": 196.18629455566406, "p90": 575.442645263672, "max": 989.7745361328125, "pos_frac": 0.765625, "sample": [644.92236328125, 381.7682800292969, -34.844970703125, 263.0864562988281, -121.28376770019531, -208.70379638671875, 474.021240234375, 219.6011962890625, 346.5732421875, -87.31590270996094, 73.29485321044922, -77.12983703613281, 64.58394622802734, -742.9003295898438, 130.8294677734375, 907.92578125, 345.8332214355469, 48.791282653808594, 316.66949462890625, 106.73107147216797, 25.838897705078125, 260.6560974121094, -478.86541748046875, 40.76294708251953, 442.144775390625, -190.89878845214844, 586.7169799804688, 70.7008056640625, 138.24041748046875, 2.6904067993164062, -398.65264892578125, 1.9223785400390625, 549.1358642578125, 598.9091186523438, 49.28837966918945, 252.79074096679688, 333.8683776855469, 499.48724365234375, 481.5753173828125, 654.9127807617188, 415.3006896972656, 188.49533081054688, 126.7633285522461, -286.2242736816406, 453.56231689453125, 475.2049865722656, 295.9607849121094, -201.58314514160156, 989.7745361328125, 521.5700073242188, 403.5876770019531, -230.16897583007812, -2.373260498046875, 251.61361694335938, 126.15567016601562, 521.9385375976562, 91.42835998535156, 591.9611206054688, -95.8761215209961, 203.87725830078125, 444.7010498046875, 374.0519714355469, 89.37429809570312, -91.86109924316406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000375.npy"} +{"epoch": 0.5506607929515418, "step": 376, "batch_size": 64, "mean": 256.3907470703125, "std": 320.7525634765625, "min": -536.20947265625, "p10": -144.7107452392578, "median": 227.43343353271484, "p90": 663.0728820800781, "max": 852.4658813476562, "pos_frac": 0.78125, "sample": [-219.16653442382812, 750.3350830078125, 177.38418579101562, 54.93210220336914, 852.4658813476562, 103.36863708496094, 461.65216064453125, -252.7411346435547, -18.343276977539062, 20.366928100585938, 545.508056640625, 0.30138206481933594, 640.9308471679688, 105.79151916503906, -327.6172790527344, 454.45465087890625, 543.8006591796875, -195.89816284179688, -122.95065307617188, 156.2429962158203, 86.86228942871094, 28.506853103637695, -154.0364990234375, 798.7743530273438, 751.39306640625, 38.78874969482422, 449.91754150390625, 257.0645751953125, 548.6778564453125, -73.50935363769531, 754.3651733398438, 533.1175537109375, 134.41969299316406, 590.368896484375, 196.1209716796875, 572.006103515625, 528.2178344726562, 253.40042114257812, 418.56915283203125, 235.95761108398438, 182.23289489746094, 484.3288269042969, -63.83122253417969, 237.69091796875, 567.586669921875, 158.91453552246094, 838.6912841796875, -62.969688415527344, -164.0808563232422, 109.21009063720703, 24.548690795898438, -85.0010757446289, 659.9598388671875, 664.4070434570312, 279.5654296875, 609.5604248046875, 227.99717712402344, 417.034423828125, 226.86968994140625, 278.1396484375, -46.975303649902344, 146.5572967529297, 574.98046875, -536.20947265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000376.npy"} +{"epoch": 0.5521292217327459, "step": 377, "batch_size": 64, "mean": 251.91156005859375, "std": 305.2445068359375, "min": -438.8543701171875, "p10": -76.87665405273435, "median": 253.83515167236328, "p90": 676.254052734375, "max": 984.6080322265625, "pos_frac": 0.765625, "sample": [371.28680419921875, 984.6080322265625, 134.66473388671875, -83.46438598632812, 295.8882141113281, -38.184539794921875, 220.38442993164062, -278.4700927734375, 270.5880432128906, 93.51764678955078, 805.5348510742188, 757.53759765625, -26.517173767089844, 893.4033203125, -26.666345596313477, 214.66580200195312, 149.9685516357422, 477.4300842285156, 260.2167663574219, -31.758527755737305, 258.8414306640625, 590.8539428710938, 550.41650390625, -23.074947357177734, 585.4483642578125, 879.5789184570312, 362.1751403808594, -273.1805419921875, 697.3792724609375, 113.58182525634766, 332.47259521484375, 248.82887268066406, 25.53191375732422, 146.43966674804688, 22.611263275146484, 424.0797119140625, 142.8241424560547, 317.157470703125, 449.67218017578125, 658.1840209960938, 312.8846130371094, -61.505279541015625, -438.8543701171875, 229.10595703125, -144.81723022460938, 81.97876739501953, 554.957763671875, 341.26727294921875, -213.6072235107422, 337.3668518066406, 12.299545288085938, 683.9983520507812, -25.211318969726562, 55.126708984375, -14.674873352050781, 558.3873901367188, 279.82794189453125, 585.1397705078125, 357.09228515625, 10.346855163574219, 351.9781494140625, 84.40444946289062, 324.05615234375, -93.66472625732422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000377.npy"} +{"epoch": 0.55359765051395, "step": 378, "batch_size": 64, "mean": 265.88409423828125, "std": 286.7349548339844, "min": -509.3359375, "p10": -68.63469619750977, "median": 276.4412841796875, "p90": 625.69287109375, "max": 1081.8577880859375, "pos_frac": 0.828125, "sample": [758.9210815429688, 368.8033752441406, 631.67822265625, 604.4568481445312, 478.08319091796875, 101.54261016845703, 197.93942260742188, 356.1951904296875, 134.085205078125, 298.2956237792969, 360.2818603515625, 410.106201171875, 634.3348388671875, 179.96949768066406, 115.14007568359375, -31.381393432617188, 291.57122802734375, 660.9367065429688, 242.037109375, 420.95086669921875, 308.706298828125, -68.67501831054688, -157.397705078125, 58.901161193847656, 268.833251953125, 205.03366088867188, -90.11311340332031, 467.69830322265625, 142.37814331054688, 394.4112548828125, 534.6387329101562, 414.4008483886719, 307.4994812011719, 70.74880981445312, -19.05602264404297, 526.5549926757812, 230.9556884765625, 1081.8577880859375, 40.03248596191406, -328.67694091796875, 286.9138488769531, -46.56267547607422, 230.82273864746094, 611.72705078125, 130.79855346679688, -104.28360748291016, 476.7385559082031, 310.14801025390625, 493.9311218261719, 337.5266418457031, 74.18086242675781, 284.04931640625, 495.76690673828125, 242.3724365234375, -509.3359375, 186.7725067138672, 260.9400329589844, 443.7025451660156, -68.54061126708984, 834.6303100585938, 102.79631042480469, -392.09210205078125, 68.79300689697266, 662.1060791015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000378.npy"} +{"epoch": 0.5550660792951542, "step": 379, "batch_size": 64, "mean": 264.1388854980469, "std": 325.6307067871094, "min": -465.83197021484375, "p10": -96.35971145629883, "median": 209.9394073486328, "p90": 692.6810424804688, "max": 1391.26513671875, "pos_frac": 0.78125, "sample": [300.00390625, 408.379150390625, -12.425994873046875, 207.9559326171875, -95.94969940185547, 124.97016143798828, 181.84707641601562, 669.07373046875, 199.38027954101562, 184.61697387695312, 362.6517639160156, 841.654541015625, 34.77909851074219, 515.7347412109375, 363.1591796875, 210.78915405273438, -465.83197021484375, 901.2714233398438, 17.003158569335938, 236.12933349609375, 510.0704345703125, 455.80548095703125, 275.59124755859375, -38.90537643432617, 126.34454345703125, -81.6488037109375, -310.67047119140625, 173.79910278320312, -35.41655731201172, 433.6729736328125, 733.0685424804688, 462.74749755859375, -274.14190673828125, 180.8690643310547, 507.9159240722656, 723.519287109375, 366.76898193359375, 387.37677001953125, 702.7984619140625, 146.2224578857422, -78.28862762451172, 533.588623046875, 209.08966064453125, 35.76393508911133, 433.784912109375, -96.53543090820312, 186.74526977539062, 413.0562438964844, 234.4065704345703, 158.14878845214844, -132.68064880371094, 388.4658203125, 838.8356323242188, 251.25697326660156, 1391.26513671875, 360.17230224609375, 41.53862762451172, 546.9879760742188, 151.2036895751953, -207.48577880859375, 152.72035217285156, 661.1583251953125, -176.55667114257812, -22.732101440429688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000379.npy"} +{"epoch": 0.5565345080763583, "step": 380, "batch_size": 64, "mean": 288.2654724121094, "std": 326.82598876953125, "min": -830.8529052734375, "p10": -67.94820404052733, "median": 310.5505065917969, "p90": 676.8174194335938, "max": 1179.8302001953125, "pos_frac": 0.859375, "sample": [137.63204956054688, 112.71217346191406, 67.41777038574219, 1179.8302001953125, 146.56536865234375, 110.0942611694336, 148.33041381835938, 388.64105224609375, 401.63653564453125, 310.7828369140625, 128.33721923828125, 425.2465515136719, -51.54131317138672, 591.9373168945312, 871.130859375, 503.17822265625, 386.8517150878906, 486.56256103515625, 514.6534423828125, 177.96136474609375, 355.17584228515625, -305.19757080078125, 192.64068603515625, 296.3555908203125, 73.91122436523438, -74.97972869873047, 434.0621337890625, 320.4134216308594, 287.1896667480469, 687.2745361328125, 422.61370849609375, 622.5206298828125, 581.8954467773438, 652.41748046875, 642.1536865234375, 428.791748046875, 90.67427062988281, -47.84027099609375, 270.4036560058594, 69.26708221435547, 913.6405029296875, 81.29536437988281, -279.2655944824219, 181.48712158203125, -200.92041015625, 221.90489196777344, 357.51922607421875, 693.28662109375, 64.10670471191406, -347.5338134765625, 429.09613037109375, 60.348968505859375, 748.7452392578125, 349.16693115234375, 310.31817626953125, -134.01388549804688, 463.6331787109375, 362.8774108886719, 215.10296630859375, 204.33770751953125, -830.8529052734375, 333.267333984375, 790.751708984375, 422.9879150390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000380.npy"} +{"epoch": 0.5580029368575624, "step": 381, "batch_size": 64, "mean": 204.197998046875, "std": 355.566162109375, "min": -759.5465087890625, "p10": -163.83786315917968, "median": 183.30817413330078, "p90": 628.0960021972659, "max": 1170.199462890625, "pos_frac": 0.71875, "sample": [267.7632751464844, -37.620941162109375, -19.732267379760742, 654.8807983398438, 490.45928955078125, 678.2053833007812, 522.4910278320312, -39.12345886230469, 133.56985473632812, 549.1517333984375, 470.1584777832031, 108.32426452636719, 407.1675109863281, 242.62353515625, 1062.4990234375, -145.175048828125, 484.1966552734375, 366.35504150390625, 695.33837890625, 332.5325622558594, 1029.758056640625, 674.533935546875, -196.5928955078125, -21.906082153320312, -171.83621215820312, 124.52423095703125, 553.0634155273438, 30.492656707763672, -353.1637268066406, -43.602882385253906, 182.1049041748047, 291.393310546875, 50.46543884277344, -34.87697982788086, 265.89898681640625, -138.9693145751953, 264.14501953125, 163.34585571289062, 181.57400512695312, -77.05972290039062, 63.82610321044922, 102.193603515625, 1170.199462890625, 214.8003692626953, 431.2032470703125, 392.4212951660156, 11.90625, -205.58544921875, 565.59814453125, -126.49333190917969, 49.458335876464844, 363.5232238769531, -108.34732055664062, 227.53468322753906, 55.807899475097656, -307.0890808105469, 95.54632568359375, 329.8258972167969, 436.92059326171875, 184.51144409179688, 225.64979553222656, -759.5465087890625, -690.7086181640625, 348.15771484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000381.npy"} +{"epoch": 0.5594713656387665, "step": 382, "batch_size": 64, "mean": 245.88180541992188, "std": 272.1447448730469, "min": -331.8670654296875, "p10": -93.93155288696288, "median": 204.46183013916016, "p90": 601.6862182617188, "max": 963.4157104492188, "pos_frac": 0.84375, "sample": [963.4157104492188, 607.2066650390625, 120.6460189819336, 71.33797454833984, 155.63050842285156, 821.901123046875, -101.66545867919922, 110.16600799560547, 338.6314697265625, -178.53836059570312, 160.2736053466797, 236.4810028076172, 395.96405029296875, 478.356689453125, 581.0757446289062, 566.8243408203125, 210.73362731933594, 43.03114318847656, 409.5172424316406, 117.88632202148438, 421.3005676269531, -118.06303405761719, 14.952835083007812, 360.3147277832031, 736.6861572265625, -171.74130249023438, 351.1578063964844, -75.88577270507812, 552.2406005859375, 238.57586669921875, 639.3739013671875, -110.63140106201172, -25.782894134521484, 354.0559997558594, 506.5981140136719, -331.8670654296875, -198.59060668945312, 385.8797607421875, 14.032424926757812, 246.03488159179688, 202.62069702148438, 185.19766235351562, 306.81988525390625, 51.471519470214844, 217.7249298095703, 99.68357849121094, 108.03248596191406, 312.0988464355469, 206.30296325683594, 119.54493713378906, 632.739990234375, 575.3240356445312, 309.48443603515625, 15.102380752563477, 130.0694580078125, 769.1205444335938, 71.2919921875, -0.4917640686035156, 535.1785278320312, 588.80517578125, 146.21690368652344, 129.2138671875, 5.1582489013671875, 122.2069091796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000382.npy"} +{"epoch": 0.5609397944199707, "step": 383, "batch_size": 64, "mean": 239.28997802734375, "std": 356.1913757324219, "min": -935.57177734375, "p10": -174.63472747802734, "median": 240.76715087890625, "p90": 751.4206054687501, "max": 962.0069580078125, "pos_frac": 0.765625, "sample": [-23.910179138183594, -89.62164306640625, 163.42568969726562, -324.8317565917969, 511.05523681640625, -260.9442138671875, -207.661865234375, 616.404296875, 330.5643310546875, 1.5330085754394531, 392.255615234375, 149.50851440429688, 61.959529876708984, 0.3099212646484375, -374.4586181640625, 270.5746154785156, 962.0069580078125, 43.72760772705078, 241.67471313476562, 364.373046875, 815.6807861328125, -222.25051879882812, -8.431880950927734, 730.1802368164062, 229.26275634765625, -52.496437072753906, 757.2769775390625, 93.55726623535156, -31.098876953125, 299.97552490234375, 949.9242553710938, 266.51519775390625, 742.7630615234375, 244.8358154296875, 419.67242431640625, 406.68310546875, 430.0615539550781, 59.86460876464844, 181.3731689453125, 294.17401123046875, -97.67245483398438, -54.8834228515625, 239.85958862304688, 41.15047073364258, 355.0691223144531, 820.8465576171875, -174.41806030273438, 581.8446655273438, 4.4910430908203125, 315.30224609375, 820.6790771484375, 77.71988677978516, -174.7275848388672, 314.477294921875, 411.7486572265625, 42.620643615722656, 755.1309814453125, 573.1361083984375, 236.4354248046875, 567.3438110351562, -935.57177734375, 87.60476684570312, 481.8531494140625, 589.050537109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000383.npy"} +{"epoch": 0.5624082232011748, "step": 384, "batch_size": 64, "mean": 194.8409423828125, "std": 250.0452117919922, "min": -390.89239501953125, "p10": -110.0501998901367, "median": 207.04146575927734, "p90": 488.29281005859383, "max": 946.6728515625, "pos_frac": 0.796875, "sample": [195.46139526367188, 8.314117431640625, 946.6728515625, -182.4512176513672, 3.085205078125, 134.9896697998047, -221.08665466308594, -32.58988952636719, 732.9297485351562, 214.68679809570312, 468.2068176269531, 503.23443603515625, 255.3107147216797, -344.2304382324219, 408.2916259765625, 29.67208480834961, -120.76716613769531, 41.3591423034668, 356.08245849609375, 384.61865234375, 155.49710083007812, 294.81829833984375, 42.34794616699219, 365.44873046875, 270.7313537597656, 292.837890625, 143.71417236328125, -210.70828247070312, -390.89239501953125, 102.25794219970703, 343.4402160644531, 123.63044738769531, 48.626312255859375, 217.77728271484375, 191.16624450683594, -59.185218811035156, 420.88787841796875, 358.0942687988281, 370.2193603515625, 65.19598388671875, -21.321552276611328, 199.39613342285156, 369.6617126464844, 541.5026245117188, 496.9010925292969, 287.2274169921875, 274.332763671875, 273.5565185546875, 249.54837036132812, -187.251953125, 238.952880859375, -26.57256317138672, 73.95833587646484, 463.296630859375, 123.46157836914062, -82.5894775390625, -85.0439453125, 532.400634765625, 417.2725830078125, 132.77989196777344, 598.7017211914062, 353.1678771972656, 36.419036865234375, 282.366943359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000384.npy"} +{"epoch": 0.5638766519823789, "step": 385, "batch_size": 64, "mean": 233.38693237304688, "std": 249.66159057617188, "min": -395.346435546875, "p10": -45.68745040893554, "median": 201.6787567138672, "p90": 561.3955810546876, "max": 790.6387329101562, "pos_frac": 0.875, "sample": [-35.9866943359375, 50.04065704345703, -49.84491729736328, 269.65179443359375, 444.49676513671875, 247.49160766601562, 389.65594482421875, 121.83370971679688, -212.56289672851562, 489.638427734375, 148.81422424316406, 31.250349044799805, 161.13858032226562, 128.69061279296875, 449.50616455078125, 198.2142333984375, 637.3533935546875, 239.51512145996094, 112.3484115600586, 93.32593536376953, 354.0978088378906, 165.22340393066406, -160.9844512939453, 346.6339111328125, -222.35455322265625, 299.613525390625, 159.84776306152344, 549.4388427734375, 99.45637512207031, 47.85675048828125, 354.80731201171875, 192.76377868652344, 178.363037109375, 734.1473999023438, 790.6387329101562, 139.82777404785156, 172.12258911132812, 3.8562164306640625, 566.5198974609375, 307.841796875, 47.60954284667969, 107.94642639160156, 511.8928527832031, 187.5693817138672, 210.9666748046875, 649.0439453125, 451.1913146972656, -137.30233764648438, 759.35546875, 373.3707275390625, -395.346435546875, 64.98158264160156, 250.99151611328125, -273.93707275390625, 209.29002380371094, 334.8035888671875, 478.3038635253906, 114.1561050415039, 312.9514465332031, 697.7413330078125, 78.61653137207031, 368.81976318359375, 205.14328002929688, 334.315673828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000385.npy"} +{"epoch": 0.5653450807635829, "step": 386, "batch_size": 64, "mean": 256.977783203125, "std": 332.35858154296875, "min": -441.12030029296875, "p10": -142.90402832031245, "median": 194.27964782714844, "p90": 762.1091552734376, "max": 1050.802490234375, "pos_frac": 0.828125, "sample": [-85.96598815917969, 389.3719787597656, -67.53392791748047, 393.29913330078125, 778.0849609375, 180.1208038330078, 86.1889419555664, 423.0665283203125, 141.31625366210938, 476.77117919921875, 345.9722595214844, 1050.802490234375, -328.1644287109375, -60.492095947265625, 799.60693359375, 351.28851318359375, 353.45928955078125, 455.6652526855469, 12.10899543762207, 101.79090118408203, 93.6606674194336, 270.2020263671875, 282.5445251464844, 166.45172119140625, 293.6976013183594, 257.3354187011719, 96.5241470336914, -188.19754028320312, 988.3212280273438, 663.35595703125, -62.499969482421875, 88.26177978515625, 75.2567138671875, -265.9537658691406, 148.8030242919922, 807.63525390625, 985.9887084960938, 364.84234619140625, 724.832275390625, 117.67972564697266, 240.0390625, 633.91748046875, -271.32275390625, 145.6806182861328, 44.549530029296875, 166.07723999023438, 208.43849182128906, 159.81729125976562, -167.30604553222656, 434.4698181152344, -441.12030029296875, 927.2131958007812, 80.42264556884766, 76.6303939819336, 56.2603759765625, 128.83094787597656, 484.57720947265625, 319.95050048828125, -351.399658203125, 556.394287109375, 592.2927856445312, 274.1081237792969, 269.1438903808594, 173.41250610351562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000386.npy"} +{"epoch": 0.566813509544787, "step": 387, "batch_size": 64, "mean": 223.16220092773438, "std": 277.20703125, "min": -415.60443115234375, "p10": -125.57430496215818, "median": 200.62606048583984, "p90": 572.686492919922, "max": 920.0958862304688, "pos_frac": 0.796875, "sample": [320.4808044433594, -102.3714828491211, 159.2031707763672, 161.80149841308594, 351.3814392089844, 920.0958862304688, 200.61776733398438, 130.00244140625, 309.0351867675781, 539.6424560546875, -393.27276611328125, 316.4507141113281, 106.0692138671875, 32.8003044128418, -415.60443115234375, 218.92654418945312, 702.80224609375, 549.5098876953125, -140.06704711914062, 225.53097534179688, 775.7488403320312, 67.25498962402344, 334.4585876464844, 778.3779296875, 455.8293762207031, 176.54129028320312, 259.6824645996094, 150.69534301757812, 154.52670288085938, 173.08154296875, 65.74857330322266, 154.49842834472656, 466.1177062988281, 46.1493034362793, 600.66455078125, 161.091796875, 418.1322021484375, 135.68846130371094, 301.3185119628906, 382.9356384277344, 94.74959564208984, -63.80622482299805, 625.6415405273438, 459.8956604003906, 349.1202087402344, 55.30950927734375, 416.2869567871094, -2.775035858154297, 582.6193237304688, -57.96864318847656, 384.974609375, -211.24789428710938, 28.249168395996094, 343.4234313964844, -221.45278930664062, 200.6343536376953, -10.812324523925781, 464.55401611328125, -135.51837158203125, -29.720535278320312, 253.60887145996094, -299.1853332519531, 279.1785888671875, 525.0457763671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000387.npy"} +{"epoch": 0.5682819383259912, "step": 388, "batch_size": 64, "mean": 288.08319091796875, "std": 361.1645202636719, "min": -620.8928833007812, "p10": -66.18918991088867, "median": 222.38197326660156, "p90": 704.141638183594, "max": 1600.2000732421875, "pos_frac": 0.828125, "sample": [1600.2000732421875, 637.204345703125, -67.89064025878906, 538.1802978515625, 537.2846069335938, 851.2844848632812, 168.79359436035156, -288.7144775390625, 164.4122314453125, 287.64044189453125, 171.98239135742188, 944.0479736328125, 614.6039428710938, 231.81985473632812, -620.8928833007812, 2.6026687622070312, 334.9822998046875, 254.53399658203125, 515.5960693359375, -62.219139099121094, 107.76592254638672, 783.4288330078125, 172.81332397460938, -51.02704620361328, 163.2014617919922, 209.5059814453125, 82.53258514404297, -123.13441467285156, 161.54296875, 433.2308654785156, 67.76421356201172, 105.86503601074219, 267.86724853515625, 77.24874877929688, 490.3431396484375, 182.06988525390625, 114.39884948730469, 61.251197814941406, 610.8781127929688, 103.4202651977539, 423.3474426269531, 121.43870544433594, 431.56903076171875, 167.98577880859375, 212.944091796875, -1.8365402221679688, 442.2259521484375, 234.20843505859375, -268.8436584472656, 411.5143127441406, 339.1917724609375, 1288.683349609375, -206.2298126220703, 379.484619140625, 234.0583038330078, 26.03350830078125, -26.699310302734375, 545.5624389648438, 723.2786865234375, 819.8009643554688, 659.488525390625, 520.65283203125, 257.190673828125, -134.14659118652344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000388.npy"} +{"epoch": 0.5697503671071953, "step": 389, "batch_size": 64, "mean": 234.0470733642578, "std": 321.1650390625, "min": -415.9674072265625, "p10": -142.9644607543945, "median": 242.99939727783203, "p90": 583.9732910156251, "max": 1017.09130859375, "pos_frac": 0.71875, "sample": [64.74459838867188, 496.23809814453125, 395.0863952636719, -363.6294250488281, 184.00291442871094, 268.59771728515625, 176.3941192626953, 563.1586303710938, 486.3382568359375, 226.2794189453125, 334.9101867675781, -111.31558990478516, 257.6596984863281, -206.54098510742188, 143.44508361816406, 523.990234375, 124.3941421508789, 444.27093505859375, 428.5052795410156, -86.71237182617188, -350.17620849609375, -0.19015121459960938, 592.8938598632812, -18.996646881103516, 311.0339050292969, 707.4581909179688, -415.9674072265625, -27.180191040039062, 106.3836898803711, 150.3654022216797, 272.09027099609375, -149.53097534179688, 139.70765686035156, 337.6756591796875, -328.6693115234375, 371.4110107421875, 228.33909606933594, 477.8174743652344, 407.1841735839844, 984.257568359375, 450.07257080078125, -39.95747375488281, 439.0603332519531, 460.7601318359375, 187.85552978515625, 396.6429748535156, -5.861268997192383, 112.08723449707031, 458.13897705078125, 125.7160415649414, 83.10767364501953, 1017.09130859375, -127.64259338378906, -98.91387176513672, 257.7123107910156, 821.9443359375, -75.1278076171875, 942.0886840820312, 278.302734375, -35.56754684448242, 331.6452331542969, -243.4842071533203, 293.663818359375, 803.9529418945312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000389.npy"} +{"epoch": 0.5712187958883994, "step": 390, "batch_size": 64, "mean": 237.1005096435547, "std": 384.1810607910156, "min": -661.1967163085938, "p10": -163.56034851074216, "median": 234.66807556152344, "p90": 568.610302734375, "max": 2227.20654296875, "pos_frac": 0.765625, "sample": [235.59776306152344, 313.41650390625, 588.50146484375, 501.1356201171875, -661.1967163085938, -175.48880004882812, -260.3409729003906, 125.94908142089844, 385.61773681640625, 280.3222351074219, 503.2070617675781, 52.28645324707031, 307.31976318359375, 414.7024841308594, 224.33657836914062, -7.818023681640625, -11.099235534667969, 2227.20654296875, -132.80093383789062, 434.38958740234375, 100.30770874023438, 525.2811279296875, 569.2423095703125, 152.5724639892578, 153.4369354248047, -135.727294921875, 106.00252532958984, -48.529449462890625, 157.91697692871094, 270.4439697265625, 175.91177368164062, -20.175933837890625, -183.728759765625, -74.8221206665039, 160.0256805419922, 567.1356201171875, 833.9620361328125, 573.7869873046875, -379.79400634765625, 413.8204040527344, 557.9120483398438, 628.0159301757812, 416.9516906738281, 102.27556610107422, 585.16259765625, 94.78486633300781, 177.262451171875, 105.42430114746094, 543.426025390625, 416.05194091796875, 459.4875183105469, 233.73838806152344, 328.6294860839844, 259.98358154296875, 369.75445556640625, -35.715599060058594, 0.131134033203125, -308.1558837890625, -510.9963684082031, 309.28118896484375, 459.56402587890625, 45.505916595458984, 284.35882568359375, 389.2851257324219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000390.npy"} +{"epoch": 0.5726872246696035, "step": 391, "batch_size": 64, "mean": 142.0626983642578, "std": 275.3839416503906, "min": -601.9620971679688, "p10": -146.12131042480468, "median": 108.77741241455078, "p90": 463.11053161621106, "max": 970.65625, "pos_frac": 0.734375, "sample": [2.246623992919922, 11.315353393554688, 62.124473571777344, -46.24449157714844, 178.26040649414062, 340.3180236816406, 36.127174377441406, 115.42313385009766, 15.451221466064453, 399.4897155761719, -122.6679916381836, 87.66883850097656, -226.4140625, -89.51079559326172, 67.48332214355469, 36.279808044433594, 392.75634765625, 130.00439453125, 34.511390686035156, -133.725341796875, 207.38267517089844, 506.8817443847656, 562.119140625, 475.3741760253906, -93.16059112548828, 124.85411071777344, 970.65625, -184.7589874267578, -48.611602783203125, 305.6964111328125, 371.62945556640625, -111.70635223388672, 83.80369567871094, 41.71536636352539, -76.47423553466797, 204.1413116455078, 102.1316909790039, -22.00566864013672, 434.495361328125, 162.7822265625, 24.00811004638672, -151.43386840820312, 148.47540283203125, 333.5442810058594, 730.635009765625, -377.5182800292969, 535.2811279296875, -23.981985092163086, -395.444580078125, 37.703514099121094, 56.70661926269531, 253.60177612304688, -189.17288208007812, 385.8101501464844, 406.070068359375, 118.71784973144531, 756.6785888671875, 406.1004638671875, 309.68438720703125, -601.9620971679688, 247.55337524414062, 231.43374633789062, 293.31488037109375, 248.36341857910156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000391.npy"} +{"epoch": 0.5741556534508077, "step": 392, "batch_size": 64, "mean": 272.79132080078125, "std": 261.2071228027344, "min": -155.08804321289062, "p10": -25.227830886840792, "median": 226.2935562133789, "p90": 603.1115051269531, "max": 1025.0677490234375, "pos_frac": 0.890625, "sample": [43.54315948486328, 172.61041259765625, -151.08993530273438, 59.624935150146484, 227.95802307128906, -38.151466369628906, 603.9874267578125, 255.77650451660156, 277.3010559082031, 339.34423828125, 601.0676879882812, -136.6637420654297, 301.7765197753906, 175.46389770507812, 146.7299346923828, 32.7333984375, 609.7012939453125, 760.5413818359375, 480.7523193359375, 201.20079040527344, -98.83733367919922, 345.85015869140625, 553.2160034179688, 50.09642028808594, 359.0274658203125, 400.6717529296875, 224.62908935546875, 1025.0677490234375, -100.60733032226562, 217.55111694335938, 219.44102478027344, 251.216552734375, 383.99169921875, 174.17782592773438, 473.60760498046875, 232.13133239746094, 342.21490478515625, 181.35377502441406, 184.1898956298828, 133.9834442138672, 445.47705078125, 428.23675537109375, 85.46444702148438, 69.52552032470703, 505.33880615234375, 852.1044921875, 377.96441650390625, 444.3976135253906, 382.1094970703125, -144.13926696777344, 129.08851623535156, 165.68980407714844, 84.62326049804688, 312.69757080078125, 168.67420959472656, 146.9866485595703, 329.7337646484375, 4.927318572998047, 989.420166015625, 28.730789184570312, -155.08804321289062, 81.38232421875, 425.9094543457031, 782.20703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000392.npy"} +{"epoch": 0.5756240822320118, "step": 393, "batch_size": 64, "mean": 209.79046630859375, "std": 318.2052001953125, "min": -266.19390869140625, "p10": -191.43768615722655, "median": 220.97095489501953, "p90": 589.4853637695314, "max": 1424.402099609375, "pos_frac": 0.71875, "sample": [272.81292724609375, 632.2771606445312, 5.628658294677734, -83.01807403564453, 357.55194091796875, 459.87725830078125, -202.10873413085938, 370.16119384765625, 263.3341979980469, 241.65176391601562, 62.74285888671875, 232.1048583984375, 123.3459243774414, 331.94195556640625, 270.2259521484375, 73.25707244873047, 973.5447998046875, 941.5052490234375, 613.4619140625, -7.800117492675781, 285.6928405761719, 547.495361328125, -245.9365234375, -224.96629333496094, -100.59904479980469, -6.695888519287109, -192.6373748779297, 220.61648559570312, 297.0244445800781, 310.8282470703125, 181.84417724609375, 416.4805908203125, 468.97943115234375, 217.63571166992188, 512.8466186523438, 279.13140869140625, 222.303955078125, 105.2236099243164, 421.815185546875, -32.38398742675781, 1.7895660400390625, 119.41690063476562, 607.4810791015625, -134.26974487304688, -197.69039916992188, 504.2044677734375, 314.6400451660156, -192.4483642578125, 31.43364715576172, 147.31515502929688, 111.63208770751953, 221.32542419433594, -154.27529907226562, -132.1727294921875, -189.07943725585938, 356.1396179199219, 370.58416748046875, -266.19390869140625, 307.6173095703125, 82.26879119873047, -92.65154266357422, 1424.402099609375, 633.4541015625, -65.53057861328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000393.npy"} +{"epoch": 0.5770925110132159, "step": 394, "batch_size": 64, "mean": 220.0300750732422, "std": 279.3369445800781, "min": -283.2396545410156, "p10": -119.54056167602538, "median": 166.6559600830078, "p90": 603.4959228515626, "max": 997.23095703125, "pos_frac": 0.78125, "sample": [121.77400207519531, 312.6904296875, -200.65841674804688, 418.3432922363281, 163.9783935546875, -127.93109130859375, 106.28221130371094, 136.5944366455078, 599.4949340820312, 57.40777587890625, 559.6004638671875, -20.18885040283203, -283.2396545410156, 26.801315307617188, -33.30567932128906, -176.82012939453125, -103.7497329711914, 680.6771850585938, 539.7657470703125, -184.1975555419922, 519.538818359375, -212.8466796875, 274.6436767578125, 303.6988830566406, 171.35597229003906, 304.5992431640625, 218.1228790283203, 47.25502014160156, -19.910423278808594, 605.2106323242188, 540.7787475585938, 689.1190795898438, -126.30805969238281, 419.20513916015625, 129.44810485839844, 46.776214599609375, 151.047119140625, 21.52904510498047, 132.8372802734375, 617.5804443359375, 30.31977081298828, 454.7571716308594, -1.8397941589355469, 169.8067169189453, 141.27085876464844, 32.0997314453125, 524.1776123046875, 789.16015625, 241.43821716308594, 157.21820068359375, 475.4366455078125, 289.43402099609375, 997.23095703125, 843.6415405273438, 324.718994140625, 35.971397399902344, 51.20634460449219, -33.84627914428711, 169.33352661132812, 373.7952575683594, 201.45281982421875, -56.7652587890625, 173.95562744140625, 270.9505310058594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000394.npy"} +{"epoch": 0.57856093979442, "step": 395, "batch_size": 64, "mean": 239.2242431640625, "std": 309.171630859375, "min": -496.8615417480469, "p10": -83.36632385253905, "median": 203.2417449951172, "p90": 689.4072692871096, "max": 955.0869140625, "pos_frac": 0.78125, "sample": [492.2329406738281, -194.84674072265625, 163.91448974609375, 72.1448974609375, 214.9586639404297, 341.23846435546875, 338.322509765625, 237.6673583984375, -248.53067016601562, 274.5289611816406, -5.4856109619140625, 514.899169921875, 390.8076171875, -46.295860290527344, 106.9984359741211, 27.391429901123047, 657.9913940429688, 321.1263427734375, 865.7005615234375, -36.77483367919922, 614.5010375976562, 42.936744689941406, 497.70977783203125, -92.36564636230469, 212.46871948242188, 28.78472900390625, 205.90353393554688, 376.21002197265625, -33.988548278808594, 17.303916931152344, 862.2958984375, 917.8936157226562, 410.7999267578125, 138.84095764160156, 347.21734619140625, 39.47296142578125, 160.3421630859375, 280.86090087890625, 872.210693359375, 152.65338134765625, 481.8140869140625, 470.8057556152344, -496.8615417480469, -84.79853820800781, -217.27529907226562, 313.0880126953125, -53.46519470214844, 761.2088012695312, 12.927011489868164, -80.02449035644531, 64.73150634765625, -165.7947998046875, 22.002920150756836, 955.0869140625, 702.8712158203125, 244.3605499267578, 200.5799560546875, 458.45025634765625, 174.00123596191406, 434.0527648925781, 449.6715393066406, 66.36009979248047, 94.3532485961914, -37.836734771728516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000395.npy"} +{"epoch": 0.580029368575624, "step": 396, "batch_size": 64, "mean": 219.32781982421875, "std": 238.59107971191406, "min": -385.80340576171875, "p10": -33.53731002807613, "median": 173.92937469482422, "p90": 548.0099395751954, "max": 972.5863037109375, "pos_frac": 0.890625, "sample": [221.22178649902344, 563.4695434570312, 69.59371185302734, 196.71597290039062, 101.94422912597656, 568.208984375, 142.50531005859375, 179.254638671875, 142.8275146484375, 218.24453735351562, 335.66156005859375, 260.2418212890625, 224.21694946289062, 361.16015625, -125.13238525390625, 173.61204528808594, 188.97442626953125, 56.10508728027344, 59.82801818847656, 164.55889892578125, 124.2289810180664, 412.3569030761719, 829.9434204101562, 308.79779052734375, 195.74935913085938, 442.4659729003906, 263.0675048828125, 135.9444580078125, 159.42535400390625, 657.6717529296875, -81.50898742675781, 344.84051513671875, 112.3075180053711, 255.26522827148438, 63.43888473510742, 161.3639373779297, 415.448974609375, -159.6043701171875, 395.888671875, 636.5933837890625, 356.9255676269531, 13.72113037109375, -94.135498046875, 511.9375305175781, 42.5462646484375, -52.869754791259766, 438.17547607421875, 11.571723937988281, 70.11196899414062, 217.09796142578125, 776.5877075195312, 119.97377014160156, 74.2930908203125, -104.04922485351562, 67.22673034667969, 272.7848205566406, 311.2088317871094, 125.5271224975586, 77.33509063720703, -385.80340576171875, 113.91558837890625, 972.5863037109375, 174.2467041015625, 149.16726684570312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000396.npy"} +{"epoch": 0.5814977973568282, "step": 397, "batch_size": 64, "mean": 277.65472412109375, "std": 267.56640625, "min": -291.74761962890625, "p10": -8.544419479370106, "median": 228.4979248046875, "p90": 626.6746704101563, "max": 922.9066162109375, "pos_frac": 0.890625, "sample": [34.36093521118164, 521.9442749023438, 38.470672607421875, 361.8022155761719, 141.19589233398438, 92.23374938964844, 230.91424560546875, 264.61737060546875, 448.14605712890625, 174.9214324951172, 632.9766845703125, 3.6406097412109375, 561.9362182617188, -49.19415283203125, 146.69161987304688, 459.3058166503906, 226.08160400390625, -179.24594116210938, 409.037109375, 588.9052124023438, 253.13446044921875, 611.969970703125, 307.137939453125, 405.0778503417969, 922.9066162109375, 53.06037902832031, 652.018310546875, 440.05126953125, -143.5919189453125, 143.6556854248047, 540.2466430664062, 217.1309356689453, 93.5123291015625, 784.2882080078125, 488.450439453125, 252.60684204101562, 19.523944854736328, 342.09906005859375, 165.913330078125, 168.21392822265625, -291.74761962890625, -127.5208969116211, 88.94615936279297, -193.47308349609375, 568.180908203125, 182.76100158691406, 42.50189971923828, 157.36541748046875, -13.76657485961914, 498.943603515625, 51.06888961791992, 675.9547119140625, 205.34292602539062, 922.0821533203125, 168.43963623046875, 805.9459228515625, 115.57734680175781, 211.94305419921875, 518.434814453125, 164.94158935546875, 232.5841064453125, 325.66607666015625, 362.2184143066406, 271.36444091796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000397.npy"} +{"epoch": 0.5829662261380323, "step": 398, "batch_size": 64, "mean": 246.69529724121094, "std": 314.204833984375, "min": -380.7127990722656, "p10": -54.72208938598632, "median": 137.99402618408203, "p90": 593.7578735351562, "max": 1188.8519287109375, "pos_frac": 0.78125, "sample": [126.97991943359375, 60.82121276855469, 391.6595764160156, 699.5371704101562, -104.58863830566406, 91.21802520751953, 316.66656494140625, -290.75079345703125, 63.37730407714844, -25.29448699951172, 553.7286987304688, -318.49920654296875, 559.5128784179688, 391.90838623046875, 446.8818054199219, 967.131103515625, 163.61570739746094, 126.91735076904297, 557.7762451171875, 283.58148193359375, 479.079833984375, 129.61480712890625, 593.3515625, 405.7499084472656, 1054.3330078125, -10.92138671875, 171.62738037109375, 131.64088439941406, 584.9846801757812, 371.70062255859375, 33.736328125, 170.47982788085938, -66.51806640625, -5.8603668212890625, 248.55349731445312, 94.86683654785156, 42.315757751464844, 346.1251220703125, 60.194679260253906, 378.69793701171875, 116.99586486816406, 430.0843811035156, 129.69375610351562, 48.742408752441406, 133.94241333007812, -29.12482452392578, 387.77691650390625, -30.089385986328125, -51.75459289550781, 101.72140502929688, 142.04563903808594, 1188.8519287109375, 593.9320068359375, 750.4918212890625, -380.7127990722656, 491.0898132324219, 420.94305419921875, 479.7835388183594, 729.832275390625, 117.60028076171875, -50.00975799560547, 7.13818359375, -55.993873596191406, -160.41497802734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000398.npy"} +{"epoch": 0.5844346549192364, "step": 399, "batch_size": 64, "mean": 234.14266967773438, "std": 300.6256103515625, "min": -384.1827392578125, "p10": -186.7093444824218, "median": 213.98451232910156, "p90": 614.1228271484376, "max": 1017.738037109375, "pos_frac": 0.75, "sample": [177.08413696289062, 281.86651611328125, 94.01847076416016, -219.06549072265625, 586.3829345703125, -65.67317962646484, 626.0113525390625, 42.69950485229492, 200.3450927734375, 8.503570556640625, 247.88229370117188, -245.03445434570312, 639.4328002929688, -51.22540283203125, 553.9163818359375, 190.4430694580078, 350.06610107421875, -130.17477416992188, -19.67706871032715, 53.95802688598633, 666.8758544921875, 174.28883361816406, 435.4332275390625, 228.56492614746094, 525.40576171875, 731.900390625, 197.8099365234375, 407.14764404296875, -253.9033660888672, 227.62393188476562, -14.870773315429688, 358.8004455566406, 231.66519165039062, 154.61398315429688, 431.2527770996094, -210.93844604492188, 1017.738037109375, 508.7999572753906, 577.92919921875, -238.2652130126953, 411.5655212402344, 490.8896789550781, 189.9995880126953, -384.1827392578125, -34.932247161865234, 395.8345947265625, 69.352783203125, 657.3112182617188, 98.31133270263672, -110.37228393554688, 425.3397521972656, 885.214111328125, 462.8818359375, 188.10618591308594, 559.4597778320312, 261.76678466796875, 179.93161010742188, 270.0162658691406, -300.9745178222656, 265.2753601074219, 480.34173583984375, -74.91801452636719, 193.37884521484375, -74.09920501708984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000399.npy"} +{"epoch": 0.5859030837004405, "step": 400, "batch_size": 64, "mean": 330.199951171875, "std": 287.2948303222656, "min": -422.671875, "p10": -50.32951965332031, "median": 312.2267303466797, "p90": 681.0405700683594, "max": 1103.0087890625, "pos_frac": 0.828125, "sample": [266.1824645996094, 433.49029541015625, 385.14556884765625, 242.00137329101562, 597.3814697265625, -51.251190185546875, 429.7236328125, 236.70697021484375, -67.49529266357422, 561.7078247070312, 182.09725952148438, 147.06939697265625, 493.2684020996094, 250.59959411621094, -422.671875, 661.9140014648438, 703.9367065429688, 221.66954040527344, 316.279052734375, 409.8841552734375, 782.1991577148438, 540.3095092773438, 145.13882446289062, 395.5696105957031, 132.1061248779297, 286.37908935546875, 115.68772888183594, 534.3336181640625, 322.23095703125, 382.82427978515625, 364.1266174316406, 149.29315185546875, -92.65424346923828, 356.687255859375, 308.1744079589844, -195.59104919433594, 34.018089294433594, 731.3153076171875, 330.1536560058594, 502.31610107421875, -22.606521606445312, 246.2520751953125, 298.6690979003906, 984.3052978515625, -52.60882568359375, 558.3208618164062, 297.3296203613281, 187.2488555908203, -10.084117889404297, 1103.0087890625, 607.2607421875, -44.919525146484375, 285.680908203125, 463.05767822265625, 291.59735107421875, 620.0782470703125, -161.8167266845703, 689.2376708984375, 602.22998046875, 458.5211181640625, 267.25775146484375, 633.1085205078125, 757.5899658203125, -48.178955078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000400.npy"} +{"epoch": 0.5873715124816447, "step": 401, "batch_size": 64, "mean": 185.60690307617188, "std": 362.07354736328125, "min": -759.6788940429688, "p10": -187.3199981689453, "median": 119.85631561279297, "p90": 728.4331420898437, "max": 1052.100830078125, "pos_frac": 0.703125, "sample": [122.42433166503906, 426.596923828125, -62.97471237182617, 216.0941162109375, 549.6589965820312, 33.079673767089844, -382.8280029296875, -372.5995788574219, -759.6788940429688, -123.93833923339844, 25.74974822998047, 211.75418090820312, -192.460693359375, 73.7049331665039, -26.60601806640625, -17.280065536499023, 375.5141906738281, 263.4794006347656, 117.28829956054688, 107.59681701660156, -75.62680053710938, -151.66636657714844, 6.271903991699219, 927.0657958984375, 7.782079696655273, 582.6846923828125, 829.1704711914062, 421.3172607421875, 731.7002563476562, 214.52793884277344, 89.80944061279297, 287.0821228027344, -59.993892669677734, 67.96411895751953, -55.857643127441406, -438.7889099121094, 900.8348388671875, -38.05317306518555, 221.17112731933594, 530.0728759765625, 197.7008819580078, 554.561767578125, 319.2820129394531, -55.761356353759766, 277.12615966796875, 155.14358520507812, 177.30287170410156, -131.10472106933594, 1052.100830078125, -393.03240966796875, 666.6944580078125, 458.70672607421875, 310.3629150390625, 68.30330657958984, 845.7947998046875, 806.5533447265625, 39.290992736816406, 312.3817443847656, 81.28176879882812, 51.27889633178711, 186.2821044921875, 720.8098754882812, -228.93685913085938, -175.32504272460938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000401.npy"} +{"epoch": 0.5888399412628488, "step": 402, "batch_size": 64, "mean": 344.6689758300781, "std": 306.7999267578125, "min": -296.12060546875, "p10": 23.73477172851563, "median": 298.8147277832031, "p90": 797.8014831542971, "max": 1425.2257080078125, "pos_frac": 0.90625, "sample": [420.5735168457031, 28.736204147338867, -17.554786682128906, 452.4129638671875, 242.6776123046875, 429.0967102050781, -4.72607421875, 226.59725952148438, -51.62267303466797, 586.3504638671875, 399.68841552734375, 558.722900390625, 649.3762817382812, 275.2235412597656, 58.99591064453125, 570.4627685546875, 21.898910522460938, 306.55987548828125, 213.17431640625, 404.1646423339844, 563.8169555664062, 265.22454833984375, 574.1791381835938, -296.12060546875, 330.0476989746094, 468.42340087890625, 44.45904541015625, 920.3260498046875, 38.99903869628906, 839.4384765625, 179.30062866210938, 382.14288330078125, 193.6550750732422, 441.13165283203125, 133.06329345703125, 333.2005310058594, 817.2056274414062, 123.04043579101562, 186.1825408935547, 665.9972534179688, 28.018447875976562, -70.45692443847656, 872.0255737304688, 900.0133056640625, 282.9833068847656, 521.8359375, 752.525146484375, 144.9830780029297, 290.2745666503906, 1425.2257080078125, 424.8388671875, 549.3941650390625, 50.52943420410156, 876.1983642578125, 183.79244995117188, 310.57464599609375, 338.43634033203125, 257.53131103515625, 142.15664672851562, -286.4552307128906, 374.86248779296875, 202.05667114257812, 221.87844848632812, 291.069580078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000402.npy"} +{"epoch": 0.5903083700440529, "step": 403, "batch_size": 64, "mean": 266.7139892578125, "std": 303.91729736328125, "min": -344.39691162109375, "p10": -107.18658981323242, "median": 249.41559600830078, "p90": 675.3305480957033, "max": 1097.064453125, "pos_frac": 0.796875, "sample": [254.99627685546875, 75.6183853149414, 81.96903228759766, 217.09954833984375, 399.9902648925781, 60.11825942993164, 256.87298583984375, 391.14019775390625, -100.42557525634766, -201.4083709716797, 164.11697387695312, -26.006744384765625, -344.39691162109375, 558.2152099609375, 206.11500549316406, 863.1846923828125, 243.7853546142578, 25.6674861907959, 438.600341796875, 243.8349151611328, 368.31207275390625, 69.91729736328125, 701.457763671875, -58.566322326660156, -171.73092651367188, 576.7864379882812, 494.40814208984375, 697.8551025390625, 258.1300964355469, 993.5582275390625, -111.47496032714844, 426.9736633300781, 596.25732421875, 38.45321273803711, 10.942680358886719, 534.8706665039062, -122.90435028076172, 285.4770812988281, 622.7732543945312, 495.95904541015625, -178.63389587402344, 306.1585388183594, 1097.064453125, -1.8028507232666016, 495.5356140136719, 122.40370178222656, 320.5196838378906, 733.6937255859375, 341.7314147949219, 952.96923828125, 306.7456970214844, 390.7463684082031, 319.8320617675781, 146.27015686035156, -48.961639404296875, 44.23976135253906, -31.799020767211914, 351.87762451171875, 154.4459686279297, 114.37399291992188, 347.5402526855469, 204.52667236328125, -110.08416748046875, 173.75833129882812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000403.npy"} +{"epoch": 0.591776798825257, "step": 404, "batch_size": 64, "mean": 262.11383056640625, "std": 349.90411376953125, "min": -547.3900756835938, "p10": -194.50329437255854, "median": 262.8769836425781, "p90": 713.6739379882814, "max": 1174.9381103515625, "pos_frac": 0.75, "sample": [-289.57489013671875, 303.5926208496094, 43.114585876464844, 634.7318115234375, 578.815185546875, 164.4515380859375, 168.14723205566406, 946.4501953125, 166.0474853515625, 729.5732421875, 1174.9381103515625, 222.4044952392578, 284.213623046875, 811.1722412109375, -85.42841339111328, -352.2374572753906, 828.262451171875, 108.59941864013672, -282.0796203613281, 188.7718048095703, -115.96145629882812, 20.17969512939453, -80.64131164550781, 426.56610107421875, 251.59893798828125, 300.7406311035156, -215.86093139648438, 434.6244201660156, 274.155029296875, 404.3627624511719, 548.6298217773438, -240.34132385253906, 422.14788818359375, 156.90109252929688, 563.3414306640625, 126.54180145263672, -144.66880798339844, 175.9204559326172, 310.0307312011719, 569.3364868164062, -547.3900756835938, 418.331298828125, 69.882080078125, 644.8351440429688, 201.51576232910156, -272.658203125, -110.46705627441406, 482.3351745605469, 604.56689453125, -75.33769226074219, 436.7607727050781, -3.3356094360351562, 401.87353515625, 386.0853576660156, 518.75390625, 760.6259155273438, 325.4029235839844, 56.01600646972656, -8.400720596313477, -80.18984985351562, 903.4926147460938, 110.07897186279297, 676.5755615234375, 344.36309814453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000404.npy"} +{"epoch": 0.593245227606461, "step": 405, "batch_size": 64, "mean": 207.8790283203125, "std": 378.0, "min": -528.8992919921875, "p10": -257.65058593749995, "median": 148.39952087402344, "p90": 770.0597290039065, "max": 1127.365478515625, "pos_frac": 0.6875, "sample": [823.5910034179688, 787.7806396484375, 246.33984375, -208.8866729736328, -434.9656982421875, -41.004425048828125, -465.8363037109375, 132.90298461914062, 413.5960998535156, -268.49151611328125, 52.837623596191406, 1025.86962890625, 547.3926391601562, 126.5279541015625, 53.985816955566406, 1127.365478515625, -14.568300247192383, -63.078895568847656, 51.16436767578125, -147.9488983154297, 67.6180191040039, 185.18202209472656, 109.37562561035156, 521.1520385742188, 163.89605712890625, 108.6697769165039, -134.2729034423828, 492.8330993652344, -347.22283935546875, -74.40596771240234, 419.20556640625, 99.43994140625, 434.0411071777344, -11.554819107055664, 408.498291015625, 77.34473419189453, 290.9015197753906, -268.0087890625, -133.18746948242188, 78.71817016601562, 458.2021179199219, 235.26296997070312, 714.9805908203125, 360.2840881347656, -36.65719985961914, 183.0227508544922, 567.869140625, -208.22933959960938, 245.57931518554688, 276.85296630859375, 374.61761474609375, -434.98443603515625, 286.28594970703125, 877.662109375, 114.1159439086914, -528.8992919921875, 485.490966796875, 404.01947021484375, -50.3726806640625, 924.742919921875, 819.9360961914062, -233.4814453125, 506.45025634765625, 728.7109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000405.npy"} +{"epoch": 0.5947136563876652, "step": 406, "batch_size": 64, "mean": 286.5443115234375, "std": 348.5628662109375, "min": -380.06597900390625, "p10": -122.05048370361325, "median": 225.94449615478516, "p90": 751.2952148437502, "max": 1239.347900390625, "pos_frac": 0.78125, "sample": [524.7979125976562, 188.17953491210938, 288.9114685058594, 55.172119140625, 157.88934326171875, 241.5570068359375, 163.02761840820312, -340.94891357421875, 1211.2938232421875, 207.0816650390625, 439.19854736328125, 270.006103515625, 1239.347900390625, 584.3969116210938, 299.58538818359375, -6.299060821533203, 560.3114624023438, 302.7090148925781, 609.2334594726562, -141.7648468017578, 890.8314208984375, 348.5958557128906, 336.1812744140625, -149.2030792236328, 99.21472930908203, 168.60890197753906, 71.11128234863281, -96.83511352539062, 518.1046752929688, -1.568450927734375, -132.85707092285156, 224.8101348876953, 549.9912109375, -296.4940185546875, -195.285400390625, 517.2726440429688, 784.6986083984375, 227.078857421875, 173.1481475830078, -14.457263946533203, 137.77700805664062, 208.82925415039062, 469.5520324707031, -38.976932525634766, -12.553802490234375, 66.7713394165039, 770.4906005859375, 345.9555358886719, 35.3843994140625, 224.2529296875, 639.0303344726562, 706.5059814453125, 427.1767883300781, 37.84965515136719, -380.06597900390625, 10.044998168945312, 811.0794677734375, 567.0906372070312, 1067.723876953125, 380.0668640136719, 279.1964111328125, 626.3270874023438, 96.90946960449219, -14.216453552246094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000406.npy"} +{"epoch": 0.5961820851688693, "step": 407, "batch_size": 64, "mean": 172.09341430664062, "std": 333.45184326171875, "min": -889.6163330078125, "p10": -229.21056213378907, "median": 163.82671356201172, "p90": 616.2310363769532, "max": 872.138671875, "pos_frac": 0.734375, "sample": [-289.0836181640625, 544.32861328125, 521.444580078125, -347.5446472167969, -295.2379150390625, -76.7998275756836, 255.88462829589844, -175.30661010742188, -330.8525695800781, 261.72857666015625, 369.33489990234375, 97.03123474121094, 391.9783935546875, 710.37548828125, 684.7861328125, 14.807878494262695, -127.19046020507812, 494.80438232421875, 85.03052520751953, -110.68345642089844, 375.8236083984375, 128.60589599609375, 45.53028869628906, 87.82162475585938, 100.23289489746094, 503.6856689453125, 215.18870544433594, 254.279296875, 90.28865051269531, 206.38441467285156, -232.16213989257812, 171.63296508789062, 14.108901977539062, -101.78900909423828, 27.230552673339844, 156.0204620361328, 488.7734069824219, 737.0816040039062, 78.74031066894531, -889.6163330078125, 738.1088256835938, 329.63128662109375, 432.96966552734375, 13.515602111816406, 424.55291748046875, 210.20875549316406, 209.8179931640625, 872.138671875, 400.2421569824219, 323.54608154296875, 625.6902465820312, -222.32354736328125, -127.66893005371094, 594.1595458984375, -39.86042022705078, 89.99238586425781, 128.6365203857422, 505.7142028808594, -206.5877227783203, -398.05938720703125, 642.595458984375, 250.5893096923828, -204.0049591064453, 283.675537109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000407.npy"} +{"epoch": 0.5976505139500734, "step": 408, "batch_size": 64, "mean": 206.1318359375, "std": 272.343505859375, "min": -338.236572265625, "p10": -157.5821823120117, "median": 216.36167907714844, "p90": 609.567810058594, "max": 776.9927368164062, "pos_frac": 0.78125, "sample": [84.62394714355469, 703.1080932617188, 776.9927368164062, 0.616455078125, 399.52044677734375, 362.8232727050781, -107.38909912109375, 145.51040649414062, -141.54168701171875, -164.45668029785156, -20.278364181518555, -282.6334533691406, 114.19596099853516, 12.8433837890625, -47.784873962402344, -338.236572265625, 91.25199890136719, 313.54107666015625, 384.90478515625, 60.38975524902344, 176.39674377441406, 382.33880615234375, -120.80078125, 414.00653076171875, -130.2196502685547, 259.1346435546875, 106.81061553955078, 253.08816528320312, 80.19170379638672, 699.5315551757812, 327.50665283203125, 210.16452026367188, 545.2916259765625, -15.470603942871094, -274.0296630859375, -229.23477172851562, 267.2834777832031, 236.8773651123047, 64.73954772949219, 212.65914916992188, 382.5459899902344, 444.9490661621094, 373.41839599609375, 451.61663818359375, 194.28778076171875, -241.41986083984375, 711.8436279296875, 35.67156219482422, 11.598407745361328, 93.08016967773438, 220.064208984375, 642.2205200195312, 271.1863098144531, -168.25479125976562, 364.98419189453125, 308.4371643066406, 73.01642608642578, 475.44580078125, 226.9364471435547, 308.38470458984375, 707.1353149414062, 637.11474609375, 494.150390625, 359.7564697265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000408.npy"} +{"epoch": 0.5991189427312775, "step": 409, "batch_size": 64, "mean": 200.32015991210938, "std": 282.85504150390625, "min": -443.8636169433594, "p10": -92.58041305541992, "median": 170.23763275146484, "p90": 579.9234008789064, "max": 913.8094482421875, "pos_frac": 0.75, "sample": [-38.632293701171875, -322.543701171875, 274.4745788574219, 221.98643493652344, 200.2401123046875, 364.53826904296875, -85.8919448852539, -95.56964111328125, -12.119232177734375, 180.4687957763672, 523.4789428710938, 554.2958984375, -292.35003662109375, 160.10333251953125, 406.572265625, 144.11611938476562, 219.23846435546875, 279.1930847167969, 181.53927612304688, 757.5123291015625, 331.2365417480469, -91.37105560302734, 357.40362548828125, -12.915241241455078, 657.685546875, -160.02105712890625, -4.372077941894531, 52.73236846923828, -7.029632568359375, 153.40907287597656, 323.4896240234375, 741.8665161132812, 499.2967529296875, -443.8636169433594, 78.36566925048828, 913.8094482421875, -3.0776596069335938, 97.75459289550781, -336.0921630859375, 95.77820587158203, 517.72314453125, 10.432411193847656, 590.9066162109375, 36.8195915222168, 249.25640869140625, 256.25079345703125, 80.35540008544922, 129.45753479003906, 815.0452270507812, 400.4791564941406, -16.479074478149414, 180.37193298339844, -93.09870910644531, 199.50755310058594, 185.97572326660156, 449.45440673828125, 360.0773620605469, 314.737060546875, 130.02340698242188, 101.53306579589844, 787.330322265625, 24.055435180664062, 94.88348388671875, 150.65509033203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000409.npy"} +{"epoch": 0.6005873715124816, "step": 410, "batch_size": 64, "mean": 236.67666625976562, "std": 284.5502624511719, "min": -554.3952026367188, "p10": -95.41731109619138, "median": 255.5237045288086, "p90": 599.3366516113282, "max": 846.8779296875, "pos_frac": 0.828125, "sample": [219.21405029296875, 261.59271240234375, -123.21365356445312, 60.957679748535156, 486.1907958984375, 335.4609069824219, 537.9090576171875, 440.4433288574219, 516.8099975585938, 19.46406364440918, 116.64741516113281, 235.87646484375, 12.022994995117188, 397.3802490234375, -107.72013092041016, -128.26202392578125, 618.853515625, -396.49029541015625, -47.74454116821289, 395.3350830078125, 452.0826416015625, 750.93603515625, -38.85719299316406, 95.71650695800781, 609.1461791992188, 695.9754028320312, 373.19708251953125, 68.425537109375, 338.69366455078125, 846.8779296875, 305.48565673828125, 29.267169952392578, 347.4608459472656, 33.72705078125, 428.7637939453125, 7.959747314453125, -66.71073150634766, 7.732263565063477, 166.960205078125, -235.8477783203125, 216.65956115722656, 509.9922790527344, 165.78729248046875, 340.5814208984375, 203.85214233398438, -554.3952026367188, 299.0588684082031, 253.42140197753906, 717.5150756835938, 347.405517578125, 109.17718505859375, 52.37397003173828, 414.45904541015625, 517.4411010742188, 257.6260070800781, 756.2919311523438, 189.8509063720703, 404.37481689453125, -334.7213134765625, 126.51445007324219, 264.8761901855469, -16.021371841430664, 576.44775390625, 261.0179138183594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000410.npy"} +{"epoch": 0.6020558002936858, "step": 411, "batch_size": 64, "mean": 230.87094116210938, "std": 334.8406982421875, "min": -559.5396728515625, "p10": -69.69572219848632, "median": 166.80126190185547, "p90": 643.3861938476563, "max": 1348.868408203125, "pos_frac": 0.734375, "sample": [247.93914794921875, 8.993461608886719, -218.20684814453125, 112.8073959350586, 517.9063720703125, 226.08367919921875, 15.516849517822266, 241.46728515625, 397.76220703125, 418.194091796875, 125.6522445678711, -29.55076026916504, 196.52557373046875, -44.37661361694336, 68.7391586303711, -20.070802688598633, 464.3623962402344, 259.5745544433594, 92.83438110351562, 417.0675048828125, 895.1936645507812, 335.4056701660156, -146.99542236328125, -22.482606887817383, 171.14181518554688, 158.88026428222656, -59.33580017089844, 230.52561950683594, 103.71353149414062, 1036.479248046875, 129.76205444335938, 449.0039367675781, 879.9249877929688, -74.13568878173828, 505.91827392578125, -0.5345611572265625, 368.61083984375, 588.5142822265625, 739.2559204101562, 153.70510864257812, 35.24175262451172, 336.0103759765625, 140.7845458984375, 281.8592529296875, 159.104248046875, 358.63446044921875, -19.956798553466797, -559.5396728515625, 1348.868408203125, 283.6556396484375, 162.46070861816406, 352.9821472167969, 588.283203125, 649.819091796875, 628.3760986328125, -188.62255859375, 11.281822204589844, -442.7227783203125, 172.366455078125, -32.282867431640625, 777.1708984375, -152.99041748046875, -7.2823638916015625, -49.53385543823242], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000411.npy"} +{"epoch": 0.6035242290748899, "step": 412, "batch_size": 64, "mean": 234.52589416503906, "std": 320.2996520996094, "min": -509.74853515625, "p10": -126.91835021972656, "median": 211.89244079589844, "p90": 607.8744567871095, "max": 1166.2406005859375, "pos_frac": 0.78125, "sample": [-95.91915130615234, 182.41453552246094, 300.14739990234375, 473.17059326171875, 321.71484375, -169.13931274414062, 723.2026977539062, -48.576942443847656, 210.1231689453125, 568.5206298828125, 135.40106201171875, 311.8865661621094, 67.3135986328125, 338.4691162109375, 0.9617748260498047, 368.2921142578125, 331.5546875, 123.5209732055664, -509.74853515625, 256.57977294921875, 36.72132110595703, 35.226417541503906, 630.2716064453125, 196.7877655029297, 30.583629608154297, 422.69158935546875, 18.721038818359375, 327.3212585449219, -198.19961547851562, 190.00198364257812, -56.783653259277344, 112.4975357055664, 213.66171264648438, 237.27557373046875, 143.8665313720703, -49.23210144042969, -308.5535888671875, -128.4796905517578, 173.36834716796875, 907.7093505859375, 21.655029296875, -505.4300537109375, 340.55181884765625, 552.0294189453125, 732.0068969726562, -123.27522277832031, 503.57373046875, 787.013427734375, -0.6942138671875, 559.515869140625, 342.5115966796875, -43.576446533203125, 103.44093322753906, 613.2656860351562, 564.1802978515625, 1166.2406005859375, 4.582668304443359, 451.83587646484375, 394.06243896484375, -225.88131713867188, 481.269775390625, 398.6999816894531, 595.294921875, 471.4361877441406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000412.npy"} +{"epoch": 0.604992657856094, "step": 413, "batch_size": 64, "mean": 238.4629364013672, "std": 313.2305908203125, "min": -821.4711303710938, "p10": -108.511351776123, "median": 247.18698120117188, "p90": 607.8877990722657, "max": 958.7742309570312, "pos_frac": 0.859375, "sample": [70.81484985351562, -2.752391815185547, 174.96270751953125, 405.7555847167969, 164.97100830078125, 147.4010467529297, 142.23486328125, 267.248779296875, 34.258155822753906, 225.52249145507812, 653.9488525390625, 924.8370361328125, -74.56287384033203, 275.27923583984375, 18.754310607910156, 5.81273078918457, 334.72967529296875, 111.78125, 1.8789825439453125, 888.9212646484375, 98.61985778808594, 419.2412109375, 378.3013000488281, -821.4711303710938, 76.4488525390625, 294.23681640625, 296.9761962890625, 227.12518310546875, 600.093505859375, 958.7742309570312, 374.22509765625, 350.9228210449219, 617.235595703125, 375.21490478515625, -123.06069946289062, 445.96832275390625, 326.5572509765625, 159.94647216796875, 282.9566345214844, 307.44317626953125, 143.15965270996094, 155.4206085205078, 526.2476806640625, -301.9345397949219, 611.2282104492188, 389.2705078125, 173.01205444335938, -262.8427429199219, -270.15185546875, -553.5607299804688, 221.9008331298828, 482.67791748046875, 476.80694580078125, 745.9276123046875, 28.206066131591797, 517.3670654296875, 328.8359069824219, 452.1640319824219, 206.05709838867188, 308.530029296875, 115.65640258789062, 209.81138610839844, 350.56536865234375, -210.2806396484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000413.npy"} +{"epoch": 0.6064610866372981, "step": 414, "batch_size": 64, "mean": 256.247802734375, "std": 316.9881591796875, "min": -697.1141357421875, "p10": -60.89537353515624, "median": 254.1136016845703, "p90": 641.8205810546876, "max": 1013.56640625, "pos_frac": 0.8125, "sample": [338.4967041015625, 216.76280212402344, 496.3238525390625, 313.4790954589844, 23.80337905883789, 924.5217895507812, 172.78485107421875, -697.1141357421875, 29.263656616210938, 200.55615234375, 292.14312744140625, 161.31640625, 652.3599853515625, 855.8004150390625, 251.67367553710938, -79.87037658691406, 403.02801513671875, 70.06784057617188, -109.21566009521484, 153.8828887939453, 210.83941650390625, 528.0642700195312, -66.12223815917969, 145.91482543945312, 540.365966796875, 276.11724853515625, -228.27163696289062, -21.437273025512695, 367.60760498046875, 56.26289367675781, 500.7178649902344, 533.1609497070312, 20.418170928955078, 378.66064453125, 719.9299926757812, -38.368717193603516, 330.8829345703125, -48.69935607910156, -418.77752685546875, 274.1803894042969, 310.518310546875, 256.55352783203125, 612.33251953125, -199.58953857421875, 43.34855651855469, 529.7698974609375, 31.116668701171875, 617.2286376953125, 156.16830444335938, -39.7426643371582, 296.962158203125, 102.61003112792969, 153.283203125, 90.32408905029297, 270.1675109863281, 332.3358154296875, 790.1046142578125, -30.747955322265625, 441.56903076171875, 976.5620727539062, 1013.56640625, 407.2273254394531, 136.790283203125, 369.89080810546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000414.npy"} +{"epoch": 0.6079295154185022, "step": 415, "batch_size": 64, "mean": 190.50540161132812, "std": 293.50885009765625, "min": -763.3541870117188, "p10": -103.60505905151368, "median": 159.98695373535156, "p90": 606.3653808593751, "max": 894.903564453125, "pos_frac": 0.78125, "sample": [213.70382690429688, -102.88851928710938, 376.24981689453125, 800.130859375, 28.037612915039062, 763.6449584960938, 578.20849609375, 87.87960052490234, -9.169639587402344, 187.20765686035156, 512.217529296875, 383.2449951171875, 427.4273681640625, 533.115234375, -140.39297485351562, -74.83827209472656, 208.40994262695312, -245.60763549804688, 336.28533935546875, 149.11412048339844, 7.259010314941406, 37.4996337890625, 5.62969970703125, 618.4326171875, -149.34127807617188, 123.8633804321289, 627.6090087890625, -103.91214752197266, 95.14875030517578, 161.47698974609375, -32.1729736328125, 391.0691833496094, 229.19607543945312, 51.7449951171875, 894.903564453125, -318.2059326171875, -91.71784973144531, 113.90235900878906, 410.33489990234375, 651.162109375, 86.41851806640625, 363.0756530761719, 175.22076416015625, 85.06231689453125, 136.0499267578125, 219.4099578857422, 24.70049476623535, 217.30470275878906, 382.93133544921875, -100.65430450439453, 232.1282958984375, 445.5760803222656, 270.04833984375, 7.44122314453125, 358.8507080078125, -48.62339782714844, 158.49691772460938, 6.886383056640625, 509.214599609375, -263.7743225097656, 629.4111328125, 63.97907638549805, -763.3541870117188, 260.68316650390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000415.npy"} +{"epoch": 0.6093979441997063, "step": 416, "batch_size": 64, "mean": 315.087646484375, "std": 313.6957702636719, "min": -238.64993286132812, "p10": -55.38152046203613, "median": 271.4444885253906, "p90": 757.6973266601562, "max": 1028.4927978515625, "pos_frac": 0.84375, "sample": [848.2700805664062, 113.63660430908203, 161.18914794921875, 443.09228515625, -48.416343688964844, 584.75830078125, 157.0867919921875, 25.68834686279297, 460.28363037109375, 770.596435546875, -238.64993286132812, -216.19778442382812, 17.616552352905273, 273.7378234863281, 242.76766967773438, 530.4274291992188, 605.78857421875, 6.207847595214844, 590.2731323242188, 212.45408630371094, 403.71331787109375, 492.6459045410156, 43.90812301635742, 159.43136596679688, 757.482421875, 241.73672485351562, 285.8492431640625, 746.2385864257812, 150.92886352539062, 757.7894287109375, 201.28367614746094, 549.8897705078125, 968.6072998046875, -164.73797607421875, -134.04534912109375, 84.75316619873047, 574.2783813476562, 485.47503662109375, -22.06879425048828, 1028.4927978515625, 22.67601776123047, 899.4675903320312, 273.43157958984375, 269.4573974609375, 508.67974853515625, 629.9017333984375, 294.49761962890625, 644.4122314453125, 235.06846618652344, 107.50440216064453, 54.64139938354492, 215.1903839111328, -160.79440307617188, 786.2015991210938, 403.4244384765625, -48.67755126953125, 351.4315185546875, -58.2546501159668, 277.987060546875, -86.87547302246094, 60.821739196777344, 187.2962646484375, 436.34466552734375, 709.514404296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000416.npy"} +{"epoch": 0.6108663729809104, "step": 417, "batch_size": 64, "mean": 269.8897705078125, "std": 287.2241516113281, "min": -387.4964599609375, "p10": -93.15904235839842, "median": 244.02118682861328, "p90": 693.7992004394531, "max": 889.2012939453125, "pos_frac": 0.828125, "sample": [-5.623773574829102, 13.516311645507812, -15.501958847045898, 334.02935791015625, 9.873409271240234, 507.7548828125, 38.55427551269531, 416.47186279296875, -142.97329711914062, 694.0288696289062, 58.58957290649414, 626.251220703125, 391.7980651855469, 106.10185241699219, 239.8341522216797, -118.61936950683594, -107.67021179199219, 740.8314819335938, 671.9328002929688, 52.652374267578125, 629.7701416015625, 171.06820678710938, 864.9051513671875, 270.59942626953125, 815.7142944335938, 889.2012939453125, 219.98565673828125, 253.71951293945312, 114.5654525756836, 357.4626159667969, 131.61068725585938, 162.91831970214844, -163.21517944335938, 83.23169708251953, 399.34039306640625, 693.2633056640625, 31.20834732055664, 404.2015380859375, 443.9245910644531, 239.8086700439453, 130.75869750976562, 348.6584167480469, 248.02980041503906, 346.4839172363281, -153.5384979248047, 302.87286376953125, 83.76659393310547, 177.80947875976562, 493.812744140625, 246.00816345214844, 442.39453125, -387.4964599609375, 331.4983825683594, 394.5009460449219, -97.9016342163086, -82.0929946899414, 242.03421020507812, 117.31505584716797, 706.411376953125, 480.97314453125, 668.7597045898438, 80.67558288574219, 697.71533203125, -71.6208267211914], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000417.npy"} +{"epoch": 0.6123348017621145, "step": 418, "batch_size": 64, "mean": 268.3392028808594, "std": 395.8111572265625, "min": -416.396728515625, "p10": -190.13241729736325, "median": 180.15247344970703, "p90": 775.0614135742189, "max": 1389.4530029296875, "pos_frac": 0.75, "sample": [278.32476806640625, -161.4162139892578, 67.012939453125, -60.26816177368164, 74.98786926269531, 966.8666381835938, 67.20381927490234, 71.10675048828125, 733.07568359375, 623.1310424804688, -43.71417236328125, -40.404052734375, 557.7646484375, -134.9583740234375, 1273.2119140625, 590.7816162109375, 308.41766357421875, -111.82801818847656, 465.4379577636719, 5.8104248046875, -339.1910400390625, 793.0552978515625, -236.62918090820312, 548.3961181640625, 230.25576782226562, 25.609085083007812, 289.8881530761719, 659.8163452148438, 562.2503662109375, 212.62283325195312, 574.645751953125, 68.45112609863281, 117.98348236083984, 924.2542724609375, -416.396728515625, -232.5936279296875, -202.43936157226562, 382.98651123046875, 240.56809997558594, 533.6060791015625, 1389.4530029296875, -54.44910430908203, 100.08831787109375, 106.55734252929688, 313.108154296875, 147.68211364746094, 139.04345703125, 103.07723999023438, -338.9849853515625, -315.2327880859375, 371.70855712890625, 1.8387908935546875, 577.6204833984375, -40.04441833496094, 561.7546997070312, 266.8636169433594, 628.2733154296875, 1097.567138671875, 138.8753662109375, 47.63682174682617, 491.6875305175781, 941.4741821289062, 301.8420715332031, -71.4169921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000418.npy"} +{"epoch": 0.6138032305433186, "step": 419, "batch_size": 64, "mean": 306.9680480957031, "std": 346.4142761230469, "min": -503.8890075683594, "p10": -122.50778350830073, "median": 322.06036376953125, "p90": 716.2543579101563, "max": 1144.1309814453125, "pos_frac": 0.859375, "sample": [725.86083984375, -503.8890075683594, 365.2898864746094, 590.2022705078125, 59.84095001220703, 921.7814331054688, 81.23739624023438, -70.40882873535156, 26.69091796875, 719.9807739257812, 1144.1309814453125, 183.87637329101562, 345.888916015625, 568.8671875, 472.9674072265625, 546.6812744140625, 193.97720336914062, 156.93260192871094, -491.52587890625, 82.31085968017578, -182.89971923828125, 756.0157470703125, -252.71885681152344, 294.3611755371094, 571.4577026367188, -303.17047119140625, 179.66958618164062, 204.70884704589844, 483.400634765625, 898.5186767578125, 473.32464599609375, 54.7149658203125, 546.2218627929688, 350.01495361328125, 464.7120361328125, 107.48729705810547, 121.85383605957031, -10.870819091796875, 517.5028076171875, 354.99603271484375, 686.43359375, 191.10421752929688, 668.7987060546875, 83.96482849121094, 97.20256805419922, 60.47159957885742, 1.5234394073486328, -144.83590698242188, 678.540771484375, 605.7756958007812, 573.9544677734375, 414.6121520996094, 55.10467529296875, 963.1964111328125, 489.41583251953125, 442.8005676269531, 557.0684204101562, 298.2318115234375, 148.8433837890625, 707.5593872070312, -262.3377685546875, 10.3001708984375, 165.68798828125, 402.5440368652344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000419.npy"} +{"epoch": 0.6152716593245228, "step": 420, "batch_size": 64, "mean": 198.10260009765625, "std": 415.9445495605469, "min": -776.9608154296875, "p10": -281.2812561035155, "median": 151.37195587158203, "p90": 615.8023315429688, "max": 1353.8707275390625, "pos_frac": 0.671875, "sample": [-142.56634521484375, 229.10623168945312, -32.787330627441406, -18.40420913696289, 578.900390625, -119.2820053100586, -334.3971862792969, -20.0113525390625, 182.6934051513672, 577.4363403320312, 36.86247253417969, -52.84954071044922, -33.33403396606445, 180.8997039794922, -20.4007568359375, 137.103515625, 568.2664794921875, 401.0361328125, 80.65769958496094, 282.0758972167969, 119.6444091796875, 282.2446594238281, -142.82247924804688, 498.6133728027344, -100.10162353515625, 178.27322387695312, 8.062606811523438, 433.2879638671875, 167.182861328125, -38.46929931640625, 549.9373779296875, 512.4237060546875, 517.9429321289062, -398.48736572265625, 121.94380950927734, 178.86428833007812, -345.2996826171875, -4.401268005371094, 749.181640625, 740.42724609375, -52.309593200683594, -776.9608154296875, 681.7282104492188, 461.9122009277344, 619.497802734375, 147.94674682617188, 607.1795654296875, 484.2186279296875, 395.9228515625, 321.37420654296875, 2.9763736724853516, -512.383544921875, 361.89373779296875, 39.90646743774414, 13.393688201904297, 1277.2791748046875, -738.9537353515625, 87.13600158691406, 154.7971649169922, 484.96185302734375, 1321.35986328125, -157.34408569335938, -410.2900695800781, 1353.8707275390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000420.npy"} +{"epoch": 0.6167400881057269, "step": 421, "batch_size": 64, "mean": 218.4813232421875, "std": 316.6380615234375, "min": -867.9801025390625, "p10": -180.60339508056637, "median": 271.03016662597656, "p90": 597.6869018554688, "max": 885.879150390625, "pos_frac": 0.796875, "sample": [331.4568786621094, 78.11134338378906, 131.54624938964844, -153.62640380859375, 283.193359375, 470.22515869140625, 885.879150390625, -67.95791625976562, 216.4302520751953, 62.07460403442383, 368.51959228515625, 30.48232650756836, 594.633544921875, 680.1818237304688, 498.307373046875, 578.4228515625, 317.264404296875, -206.89222717285156, 120.84679412841797, 154.888916015625, 762.845458984375, 532.704833984375, 281.15924072265625, -274.9870300292969, 278.521484375, 237.0494842529297, 435.268798828125, 291.1630859375, 2.3680801391601562, 246.32058715820312, 598.9954833984375, 429.4632568359375, 652.9467163085938, 285.4478454589844, 373.727783203125, -429.28497314453125, 501.7618408203125, 401.39044189453125, -351.5361633300781, 604.6154174804688, 217.8701171875, -19.703359603881836, 374.387451171875, 805.2901611328125, 334.3617248535156, 255.5504913330078, 161.177490234375, -13.93681526184082, 11.940591812133789, 460.3486328125, 102.94729614257812, 31.59186553955078, 294.78277587890625, -867.9801025390625, -192.1649627685547, 63.37229537963867, 115.45050048828125, -141.5242919921875, -144.7425537109375, 272.83538818359375, 296.385986328125, -325.4464416503906, 269.2249450683594, 386.8555908203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000421.npy"} +{"epoch": 0.618208516886931, "step": 422, "batch_size": 64, "mean": 251.21853637695312, "std": 372.6368103027344, "min": -741.4208374023438, "p10": -114.95525360107418, "median": 241.5098648071289, "p90": 689.6527099609376, "max": 1382.27490234375, "pos_frac": 0.75, "sample": [268.769287109375, 664.992919921875, 887.8555297851562, 700.22119140625, 344.49200439453125, -237.9217071533203, -741.4208374023438, 721.365234375, -68.0967025756836, 1382.27490234375, -68.99772644042969, 162.50486755371094, 246.7913055419922, 45.04901123046875, 389.01129150390625, 187.83953857421875, 68.37374114990234, -138.10244750976562, -10.49294662475586, 229.91212463378906, 8.008513450622559, 774.7116088867188, 329.7757568359375, 177.836669921875, 445.30438232421875, 206.504150390625, 187.66897583007812, 632.4097900390625, 474.1167907714844, 639.7916870117188, 761.01123046875, 353.81146240234375, 345.2632141113281, 178.3197479248047, 408.5777587890625, 318.738037109375, 324.988525390625, -335.6454162597656, 412.1337585449219, -5.65423583984375, -32.0965461730957, 437.3912658691406, 236.22842407226562, -735.302978515625, -25.43416976928711, 625.1944580078125, 520.4981689453125, -6.06694221496582, 712.48681640625, -39.24143981933594, 426.19720458984375, 191.26490783691406, 102.08502197265625, 126.58766174316406, 634.6071166992188, 515.626220703125, 278.44000244140625, -57.36347961425781, 490.4604187011719, 488.7629699707031, -134.65133666992188, 165.44349670410156, 211.30679321289062, -726.531494140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000422.npy"} +{"epoch": 0.6196769456681351, "step": 423, "batch_size": 64, "mean": 275.04901123046875, "std": 324.23388671875, "min": -502.6209716796875, "p10": -122.69287414550777, "median": 238.06320190429688, "p90": 745.9862915039065, "max": 1158.902587890625, "pos_frac": 0.796875, "sample": [265.5863342285156, 129.78970336914062, 203.46205139160156, 22.61566162109375, -137.18798828125, -310.0942077636719, 770.2950439453125, 233.47463989257812, 617.433349609375, 546.9510498046875, 832.144775390625, -214.89010620117188, 890.174560546875, 242.65176391601562, 544.2166137695312, -21.37259292602539, 821.431396484375, 894.77001953125, 785.475341796875, 689.265869140625, -34.5123291015625, -62.60052490234375, 384.13507080078125, 164.50973510742188, 594.6923828125, 193.35809326171875, 172.7744140625, 59.4326171875, -502.6209716796875, -207.46542358398438, 377.62091064453125, -148.80946350097656, 299.3636169433594, 357.8899230957031, 555.170654296875, 270.25982666015625, 34.59635925292969, 432.96734619140625, -184.39697265625, 114.80328369140625, 266.3457336425781, 635.416015625, 198.30917358398438, 396.1353454589844, 1158.902587890625, 144.36158752441406, 425.5011901855469, -9.866317749023438, 402.82342529296875, 79.62318420410156, 96.91609191894531, 274.304931640625, 549.5123291015625, 446.1009521484375, 160.07244873046875, 227.4498748779297, -50.205810546875, 277.2446594238281, 165.7503662109375, -88.87094116210938, 577.8175659179688, 91.74381256103516, 165.85214233398438, 334.5344543457031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000423.npy"} +{"epoch": 0.6211453744493393, "step": 424, "batch_size": 64, "mean": 255.7906494140625, "std": 350.1225280761719, "min": -358.9618225097656, "p10": -166.3182830810547, "median": 224.63406372070312, "p90": 642.5917602539063, "max": 1330.3076171875, "pos_frac": 0.796875, "sample": [-333.2054443359375, 350.74151611328125, 24.643234252929688, -67.78887939453125, 90.694091796875, 543.838623046875, 396.1813049316406, 323.4677429199219, 103.1436538696289, -240.89105224609375, 225.814453125, 246.24368286132812, -41.91987609863281, -243.1193084716797, 275.1821594238281, 706.843017578125, 430.2738037109375, 1055.4176025390625, 150.9765167236328, 414.5971984863281, 475.9136657714844, 154.4439239501953, 632.35400390625, 630.9882202148438, 310.3750305175781, -127.45331573486328, 223.45367431640625, 154.76705932617188, -177.84576416015625, -163.62481689453125, 62.19731903076172, 1277.496826171875, -167.47262573242188, 318.74542236328125, 350.7064208984375, -96.21484375, 315.11138916015625, -233.6290740966797, 217.14892578125, 47.162261962890625, -97.05107879638672, 136.84164428710938, -358.9618225097656, 93.1524658203125, 313.98602294921875, 289.3847351074219, 262.1235046386719, 85.42996978759766, 254.89303588867188, 1330.3076171875, 417.74420166015625, 646.9793701171875, 348.0236511230469, 178.59854125976562, 382.22784423828125, 86.63656616210938, 842.8200073242188, 185.91424560546875, 174.57432556152344, 1081.8243408203125, 554.11474609375, 274.7124938964844, 121.77769470214844, 148.7886962890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000424.npy"} +{"epoch": 0.6226138032305433, "step": 425, "batch_size": 64, "mean": 184.33047485351562, "std": 322.9360656738281, "min": -796.1300048828125, "p10": -171.31147308349608, "median": 196.6532440185547, "p90": 573.5288391113282, "max": 804.9915771484375, "pos_frac": 0.78125, "sample": [-105.05657196044922, -41.00214385986328, 374.9977722167969, -338.55670166015625, -796.1300048828125, 192.6485595703125, 140.5489959716797, 272.68536376953125, 87.80105590820312, 140.1853485107422, 4.614234924316406, 513.1893920898438, -649.0679931640625, 12.305538177490234, 267.6059265136719, 57.33782958984375, 45.555702209472656, 95.0966796875, 618.2643432617188, -5.9330596923828125, 328.8247375488281, 158.11322021484375, -14.01821517944336, 279.12030029296875, 716.9580688476562, 12.704597473144531, 504.96881103515625, 35.98121643066406, -32.82097244262695, 804.9915771484375, 121.56317901611328, 589.3785400390625, 641.2557983398438, -174.3012237548828, 185.3971710205078, 249.21469116210938, 535.810546875, 213.3375701904297, 107.03228759765625, 741.3231201171875, -554.1869506835938, 391.1307373046875, 133.93238830566406, -419.3385925292969, 536.5462036132812, 358.490966796875, 436.75567626953125, 227.23312377929688, 6.613433837890625, 394.3261413574219, 189.2918701171875, 234.5, -180.94654846191406, -164.33538818359375, 208.53622436523438, 308.92779541015625, 475.2249755859375, 732.7373657226562, 499.3802185058594, 273.85052490234375, 477.7335205078125, -140.66830444335938, 278.8327331542969, 200.65792846679688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000425.npy"} +{"epoch": 0.6240822320117474, "step": 426, "batch_size": 64, "mean": 245.7498321533203, "std": 297.3548889160156, "min": -530.449462890625, "p10": -30.846286010742183, "median": 182.24010467529297, "p90": 665.4152099609377, "max": 843.7924194335938, "pos_frac": 0.8125, "sample": [254.24334716796875, 92.95276641845703, 109.1092300415039, 153.09365844726562, 237.7742919921875, 125.54676818847656, 89.65130615234375, 215.5817108154297, 731.5994873046875, 82.54145812988281, 183.5367889404297, 205.00177001953125, 2.9821739196777344, 1.8091983795166016, 637.990966796875, -222.42416381835938, 55.54957580566406, 176.49655151367188, 569.306640625, 180.94342041015625, -8.927772521972656, 785.0390625, -66.5857162475586, 201.28118896484375, 121.47584533691406, 117.17819213867188, -127.89408874511719, 349.8909912109375, 757.7293090820312, 428.8576354980469, 238.11402893066406, 615.630126953125, 69.0816879272461, 396.8086853027344, 162.37684631347656, 810.516357421875, 36.41778564453125, -335.27130126953125, 139.02865600585938, 526.442138671875, -32.177764892578125, 354.51556396484375, 843.7924194335938, 86.59081268310547, 438.813720703125, 49.585166931152344, 331.1209411621094, 572.6619262695312, -27.739501953125, 94.39753723144531, -286.15777587890625, 366.6439208984375, -9.921939849853516, 427.31585693359375, 471.42828369140625, 329.86175537109375, -14.589776992797852, 509.5562744140625, 785.5135498046875, -530.449462890625, 621.4613647460938, 677.16845703125, -0.694854736328125, 568.8160400390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000426.npy"} +{"epoch": 0.6255506607929515, "step": 427, "batch_size": 64, "mean": 185.90200805664062, "std": 281.4072570800781, "min": -421.413818359375, "p10": -148.4451690673828, "median": 169.51895904541016, "p90": 467.663265991211, "max": 1042.5074462890625, "pos_frac": 0.75, "sample": [349.9049377441406, 28.484722137451172, 363.9999694824219, 405.0696105957031, 774.477783203125, 62.94359588623047, -42.562767028808594, -40.63727569580078, 382.1312561035156, 251.16375732421875, -23.019683837890625, 79.10935974121094, 151.38894653320312, 170.268798828125, 168.7691192626953, 300.64434814453125, 450.1302185058594, 774.5703735351562, 29.198524475097656, -224.06585693359375, 46.233909606933594, 55.39654541015625, 333.4993896484375, 372.406005859375, 159.38272094726562, 358.317138671875, 247.75827026367188, 860.6683959960938, 30.208267211914062, 179.39096069335938, -144.91571044921875, 535.902099609375, 375.05377197265625, -149.95779418945312, -112.34236145019531, 397.60626220703125, 302.69171142578125, 51.197757720947266, 508.682861328125, 203.3148193359375, 244.343994140625, 139.6956329345703, 64.94783782958984, 111.3804931640625, -421.413818359375, 315.4088439941406, 386.7760009765625, 394.387451171875, -316.3697509765625, -320.8958435058594, 475.17742919921875, 211.25827026367188, 377.0274353027344, -3.9998245239257812, 1042.5074462890625, -105.68560791015625, 62.83343505859375, 308.4796447753906, 354.1452941894531, 131.4286346435547, -113.40129852294922, -208.3425750732422, -56.708946228027344, -197.71531677246094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000427.npy"} +{"epoch": 0.6270190895741556, "step": 428, "batch_size": 64, "mean": 253.0968017578125, "std": 374.240234375, "min": -819.1317749023438, "p10": -145.88828659057614, "median": 270.65966796875, "p90": 706.1392028808594, "max": 1109.6221923828125, "pos_frac": 0.75, "sample": [23.003131866455078, -172.41610717773438, 28.817590713500977, 459.9493713378906, 817.6632080078125, 413.6422119140625, 123.48356628417969, 564.7587890625, 389.73760986328125, 1109.6221923828125, -263.4686584472656, 89.39776611328125, 510.9228210449219, 193.93740844726562, 488.51348876953125, -90.99810791015625, -534.595703125, 708.75634765625, 281.751953125, 538.4373779296875, 465.9582214355469, 496.4085693359375, -60.060577392578125, -28.30099868774414, 512.0064697265625, 782.52685546875, 374.1292724609375, 263.2830810546875, -91.81304931640625, 323.1671142578125, -79.2174301147461, 316.10723876953125, 811.278564453125, -58.59874725341797, 278.0362548828125, -10.561689376831055, 618.35595703125, 492.82354736328125, 47.73711013793945, 81.92608642578125, 88.478515625, 154.76004028320312, 20.46404266357422, -111.67423248291016, -819.1317749023438, -42.04917907714844, 23.532089233398438, 107.02315521240234, 932.6008911132812, 181.55471801757812, 676.0609741210938, 494.7478332519531, 647.1513671875, -650.2805786132812, 547.3152465820312, 279.6988525390625, 639.7879638671875, 790.8579711914062, 386.9725341796875, 181.22413635253906, 700.0325317382812, -199.42555236816406, 112.93827819824219, -160.55145263671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000428.npy"} +{"epoch": 0.6284875183553598, "step": 429, "batch_size": 64, "mean": 198.12725830078125, "std": 362.75445556640625, "min": -637.475341796875, "p10": -246.07462310791016, "median": 212.0558853149414, "p90": 644.0177001953125, "max": 983.1301879882812, "pos_frac": 0.71875, "sample": [32.146202087402344, 134.57077026367188, -425.5489501953125, 330.0134582519531, -231.36114501953125, 681.8502807617188, -86.79686737060547, 82.33464813232422, 182.25140380859375, 365.64007568359375, -535.9957275390625, 181.36050415039062, 46.140037536621094, 128.6626434326172, -249.9003143310547, 391.6319580078125, 125.10433959960938, -243.2250518798828, 820.96240234375, -169.0949249267578, 983.1301879882812, 910.901611328125, -67.598876953125, 340.2665710449219, 196.8607177734375, 23.142963409423828, 328.9468994140625, 10.503459930419922, 646.1622314453125, 597.0194091796875, 291.9491882324219, 453.49261474609375, 396.1309814453125, -637.475341796875, 71.0698471069336, -209.8615264892578, -254.63827514648438, 227.2510528564453, 276.13397216796875, -82.2938003540039, 639.0137939453125, 354.335205078125, 261.5499267578125, -2.134857177734375, 780.4268798828125, 360.6642761230469, 556.8487548828125, -223.25979614257812, 231.8055877685547, -247.29586791992188, 395.1821594238281, 159.30982971191406, 544.88427734375, 317.18359375, -149.2324981689453, -549.6044921875, -36.646949768066406, 266.67706298828125, 93.19693756103516, 581.9908447265625, 385.0739440917969, 960.277587890625, 503.0855712890625, 434.97222900390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000429.npy"} +{"epoch": 0.6299559471365639, "step": 430, "batch_size": 64, "mean": 208.2819061279297, "std": 284.055419921875, "min": -420.93231201171875, "p10": -104.74919128417966, "median": 199.57675170898438, "p90": 611.7931396484375, "max": 999.9906616210938, "pos_frac": 0.75, "sample": [223.51394653320312, 190.72129821777344, -78.8980941772461, -114.10354614257812, 721.350830078125, 384.8516540527344, 70.64425659179688, 254.3102569580078, 220.619873046875, 552.5320434570312, 21.666091918945312, -32.09819793701172, 63.16614532470703, -175.35433959960938, 177.93846130371094, 605.9734497070312, -247.065673828125, 433.96197509765625, 79.00642395019531, 91.84707641601562, 208.4322052001953, 83.8149185180664, 167.6824493408203, 208.79067993164062, -50.988014221191406, 364.3613586425781, 330.8359069824219, 344.8700866699219, -82.92236328125, 188.2580108642578, 130.09645080566406, -43.33293914794922, 999.9906616210938, -16.543350219726562, 109.2795181274414, 716.1568603515625, -4.464729309082031, -420.93231201171875, 23.101655960083008, 229.1543731689453, 461.9931640625, 803.7589111328125, 442.1341857910156, 140.12200927734375, 262.3509521484375, 723.4730224609375, -136.20619201660156, -250.08242797851562, 451.2641906738281, 230.1800079345703, 19.211742401123047, -24.019664764404297, 445.1887512207031, 246.5485076904297, -7.072750091552734, 296.38970947265625, 303.14178466796875, -260.0447998046875, 383.11871337890625, 778.4923095703125, 614.2872924804688, 219.70361328125, 32.671180725097656, 223.21226501464844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000430.npy"} +{"epoch": 0.631424375917768, "step": 431, "batch_size": 64, "mean": 297.705322265625, "std": 312.3294677734375, "min": -420.41729736328125, "p10": -115.75538711547846, "median": 305.82411193847656, "p90": 649.1554504394531, "max": 978.351318359375, "pos_frac": 0.828125, "sample": [-259.44049072265625, 138.5708770751953, -165.6572723388672, 506.1511535644531, 194.72369384765625, 76.8482437133789, 798.30517578125, 169.37530517578125, 414.5133972167969, 511.7869873046875, 290.4737243652344, 457.5894470214844, 65.09793090820312, -55.1163330078125, -158.94192504882812, 116.06242370605469, -22.981353759765625, -310.7244873046875, 599.3147583007812, -240.3898468017578, 599.583740234375, 723.6409301757812, 971.5162963867188, 978.351318359375, 134.56129455566406, 586.5657958984375, 420.9203796386719, 465.12603759765625, 244.3074188232422, 268.32574462890625, 651.6800537109375, 374.02606201171875, 235.952880859375, 98.02857971191406, 458.49951171875, -420.41729736328125, 914.431884765625, 22.558883666992188, 323.0494689941406, -58.87871551513672, 465.3869323730469, 321.17449951171875, 924.6417846679688, -12.320674896240234, 140.61012268066406, 239.93478393554688, 413.5775146484375, 84.70453643798828, 331.03887939453125, 354.7755126953125, 343.4733581542969, 607.939208984375, 643.2647094726562, 364.73822021484375, 550.7738037109375, 441.1236877441406, 178.36549377441406, 74.56513214111328, -140.131103515625, 225.3455352783203, 528.9330444335938, 142.31842041015625, 188.64126586914062, 522.8736572265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000431.npy"} +{"epoch": 0.6328928046989721, "step": 432, "batch_size": 64, "mean": 281.7933654785156, "std": 295.2173767089844, "min": -245.5845184326172, "p10": -29.607712936401356, "median": 244.45230865478516, "p90": 692.6251037597658, "max": 1254.131103515625, "pos_frac": 0.84375, "sample": [62.77934265136719, 181.91976928710938, 208.3452606201172, 243.64205932617188, 199.7314910888672, 250.1775360107422, 136.80560302734375, -34.12131881713867, 198.24542236328125, 17.604934692382812, 42.98952102661133, 12.333492279052734, 654.9979248046875, 430.6217956542969, -54.11488723754883, 1254.131103515625, 708.7510375976562, 254.68887329101562, 8.450790405273438, 194.21734619140625, 131.122314453125, -18.852203369140625, 288.3616638183594, 73.29058837890625, 448.78399658203125, 258.6202697753906, 645.630126953125, 245.26255798339844, 435.5328369140625, 17.345703125, 223.88092041015625, -19.075965881347656, -245.5845184326172, 299.873046875, -154.72000122070312, 610.8773803710938, 141.5752410888672, -122.94319152832031, 315.9256591796875, 157.65098571777344, 240.7332763671875, -148.62130737304688, 269.2842712402344, 481.09857177734375, 287.3680725097656, 407.7163391113281, 572.7228393554688, 431.5440673828125, 450.2279968261719, 412.8936767578125, 152.95289611816406, 512.4633178710938, -1.356597900390625, 802.8782348632812, 292.7435607910156, 91.148193359375, 765.05078125, 496.1450500488281, 652.5006713867188, 893.3463134765625, -228.16964721679688, 761.7288818359375, 6.027046203613281, 727.5946044921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000432.npy"} +{"epoch": 0.6343612334801763, "step": 433, "batch_size": 64, "mean": 185.99977111816406, "std": 332.2810974121094, "min": -870.1160278320312, "p10": -159.42413024902342, "median": 225.5430450439453, "p90": 544.8085510253907, "max": 689.577392578125, "pos_frac": 0.75, "sample": [32.113983154296875, 513.4409790039062, 518.914794921875, 524.4435424804688, 428.8578796386719, -85.16145324707031, 333.19476318359375, -794.4338989257812, 507.8857116699219, 547.9700317382812, -870.1160278320312, 51.95082092285156, 635.2515869140625, 379.7098083496094, 118.5003662109375, -18.685291290283203, 163.89198303222656, 377.8313903808594, 499.6016845703125, -756.1724853515625, -18.203285217285156, 594.0956420898438, 598.875732421875, 244.8837890625, 144.86964416503906, 200.3527374267578, 77.91365051269531, 149.23171997070312, -96.41404724121094, 449.74224853515625, -2.34033203125, 689.577392578125, -285.5760803222656, 431.8011474609375, 268.38568115234375, 414.84503173828125, 537.4317626953125, -283.71539306640625, 294.17987060546875, -13.423271179199219, 659.14697265625, 531.03564453125, 265.5895690917969, -130.65869140625, 177.6552734375, 66.1164779663086, 295.5849304199219, 4.310249328613281, -170.23167419433594, -223.74530029296875, 612.9545288085938, 392.9911193847656, 222.37615966796875, 49.46541213989258, 228.70993041992188, -134.20652770996094, 108.7994155883789, 373.0517272949219, -50.6865234375, 35.513328552246094, 276.2538146972656, 427.7916259765625, 140.7113494873047, 239.95298767089844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000433.npy"} +{"epoch": 0.6358296622613803, "step": 434, "batch_size": 64, "mean": 233.06314086914062, "std": 358.6510009765625, "min": -248.54006958007812, "p10": -114.56968994140624, "median": 157.5232696533203, "p90": 669.1182983398437, "max": 1989.0224609375, "pos_frac": 0.734375, "sample": [489.2241516113281, 170.65756225585938, 1989.0224609375, 116.72401428222656, -117.46726989746094, 94.21713256835938, 188.61541748046875, 425.4303283691406, 26.25907325744629, -56.24638366699219, 257.8086242675781, -35.51560974121094, 2.746175765991211, 667.047607421875, -107.80867004394531, 99.01956176757812, -121.23723602294922, -248.54006958007812, 493.7417907714844, 619.0296020507812, 770.1114501953125, -173.49081420898438, 120.11505126953125, 251.58633422851562, 486.03485107421875, 203.2478485107422, -36.720726013183594, -25.006471633911133, 244.75025939941406, 260.9799499511719, 28.851715087890625, 141.6641845703125, 455.1419372558594, -130.3790740966797, -18.926254272460938, 474.6333923339844, 120.81619262695312, 185.62615966796875, 203.322998046875, 236.68475341796875, -32.93341064453125, 395.3016662597656, 670.0057373046875, 68.55882263183594, 163.10629272460938, 267.5563049316406, 469.6773376464844, 150.98802185058594, 151.94024658203125, 807.2129516601562, 273.9322509765625, -57.826568603515625, 787.802490234375, -74.08570861816406, 131.37306213378906, 61.814971923828125, 813.2565307617188, 230.010009765625, -102.97482299804688, -237.73019409179688, 908.6242065429688, 514.1632690429688, 4.6364898681640625, -200.14126586914062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000434.npy"} +{"epoch": 0.6372980910425844, "step": 435, "batch_size": 64, "mean": 252.46389770507812, "std": 311.1219787597656, "min": -632.4189453125, "p10": -70.20884246826171, "median": 224.2421646118164, "p90": 613.5241271972657, "max": 955.7738037109375, "pos_frac": 0.828125, "sample": [949.3775634765625, 442.95660400390625, -632.4189453125, 372.1544189453125, 588.2313232421875, 209.85635375976562, 342.9266357421875, 509.0615234375, 363.5501708984375, 119.67539978027344, 570.6320190429688, 201.5507354736328, 64.93278503417969, 236.514404296875, -74.34098815917969, 607.0278930664062, 955.7738037109375, 821.0048828125, 247.3958740234375, -119.45475769042969, 15.299530029296875, 6.682281494140625, -20.58770751953125, 92.21086120605469, -113.53643798828125, 128.9557342529297, 236.9504852294922, 154.67855834960938, -129.750732421875, 213.637451171875, 276.7253723144531, 38.76206970214844, 149.3446044921875, 508.8583679199219, 519.3616943359375, 344.6097106933594, 376.174072265625, -449.560302734375, 484.2784729003906, -356.7249755859375, 200.93612670898438, 294.9646911621094, 616.3082275390625, 11.2154541015625, 70.57999420166016, 2.191333770751953, 603.4501953125, 592.3364868164062, -3.3080711364746094, -37.94268798828125, 824.1190185546875, 45.19799041748047, 295.2870178222656, 89.67608642578125, 169.02810668945312, 692.2100219726562, 691.9356689453125, 281.2513122558594, 223.60411071777344, 520.1619262695312, 164.5609588623047, -60.567169189453125, 392.83624267578125, 224.88021850585938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000435.npy"} +{"epoch": 0.6387665198237885, "step": 436, "batch_size": 64, "mean": 220.94302368164062, "std": 299.0478515625, "min": -617.2203369140625, "p10": -127.47192306518555, "median": 200.33131408691406, "p90": 573.9732116699219, "max": 944.840576171875, "pos_frac": 0.796875, "sample": [-131.9987335205078, 140.27027893066406, -126.55838012695312, 57.722068786621094, 575.3416137695312, 272.1943054199219, 352.9288330078125, 816.1925048828125, 632.5405883789062, -119.9120101928711, 153.15843200683594, 827.2952880859375, -35.597808837890625, -139.5027313232422, 276.6951904296875, 287.81158447265625, -170.5148468017578, 44.859161376953125, 909.87158203125, -301.95721435546875, 450.15423583984375, 474.3766174316406, 259.53302001953125, 406.67706298828125, 570.7802734375, -6.39227294921875, 373.92919921875, 512.349853515625, 19.701766967773438, 73.19783020019531, 944.840576171875, 57.53504180908203, 510.9104309082031, 176.8969268798828, 304.3393249511719, 198.95899963378906, -2.4909210205078125, 18.028968811035156, 268.0826110839844, 130.893798828125, 419.05206298828125, 73.37875366210938, 221.50416564941406, 139.94287109375, 182.3658905029297, 283.6896057128906, 232.2282257080078, -169.2109375, 173.69700622558594, 207.6802978515625, 261.6090087890625, 213.015380859375, 413.8766174316406, 497.98028564453125, 910.0955810546875, 27.284194946289062, 86.44095611572266, 25.065261840820312, -617.2203369140625, 201.70362854003906, -127.86344146728516, 51.06888198852539, 449.63287353515625, -79.80636596679688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000436.npy"} +{"epoch": 0.6402349486049926, "step": 437, "batch_size": 64, "mean": 228.09780883789062, "std": 288.26373291015625, "min": -512.4094848632812, "p10": -86.24424438476562, "median": 220.6804428100586, "p90": 554.0132934570313, "max": 949.8761596679688, "pos_frac": 0.75, "sample": [104.36186218261719, -240.9345703125, 296.4149169921875, 544.01513671875, 386.5162658691406, -43.19672393798828, 604.0639038085938, 176.02102661132812, 318.72369384765625, -25.408294677734375, -512.4094848632812, 269.4919738769531, -296.920654296875, 489.7416076660156, -17.3480224609375, 192.9224853515625, 171.90829467773438, -268.48858642578125, 154.38873291015625, 519.9219970703125, 43.152000427246094, 464.63482666015625, 216.75559997558594, -32.98717498779297, -211.76718139648438, 621.6400146484375, 200.44815063476562, 114.93587493896484, 398.15472412109375, 467.94354248046875, 812.1870727539062, 488.15045166015625, 780.10986328125, -128.8769989013672, 949.8761596679688, -83.75325012207031, -60.38762664794922, 0.7323436737060547, 260.68780517578125, 175.87991333007812, 1.7345809936523438, -15.952728271484375, 276.1766052246094, 314.6235046386719, 426.107666015625, 120.88577270507812, 436.75042724609375, 434.3238220214844, 532.7644653320312, 558.2982177734375, 202.84877014160156, 385.8053283691406, 428.44110107421875, -49.8505859375, -79.7052230834961, 687.47265625, 330.4751892089844, 404.44140625, -87.31181335449219, 273.2451171875, 68.50030517578125, 417.7865295410156, 224.60528564453125, 4.491676330566406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000437.npy"} +{"epoch": 0.6417033773861968, "step": 438, "batch_size": 64, "mean": 261.7484436035156, "std": 299.0746154785156, "min": -361.3745422363281, "p10": -100.07209167480467, "median": 232.61322021484375, "p90": 666.1348327636721, "max": 986.5458374023438, "pos_frac": 0.78125, "sample": [136.14260864257812, 47.15064239501953, 386.32666015625, -125.77070617675781, 229.3409423828125, 123.701171875, 845.695556640625, -12.993288040161133, 175.39639282226562, -48.6922607421875, 232.38912963867188, 382.9748229980469, -16.97281265258789, 193.2406005859375, 427.37225341796875, 447.64263916015625, 298.268310546875, 986.5458374023438, 23.304454803466797, 345.35455322265625, 182.24423217773438, 614.5640258789062, 373.45782470703125, 610.3643798828125, 161.34613037109375, 232.83731079101562, 26.32758331298828, 343.9331359863281, -361.3745422363281, -60.523712158203125, -106.82620239257812, -3.1175365447998047, 414.5685729980469, 770.4844360351562, 628.7613525390625, 358.20654296875, 278.4711608886719, -353.08050537109375, 557.1932373046875, 282.0198059082031, 764.6885986328125, 161.736572265625, -158.72796630859375, 757.4070434570312, -84.3125, 357.7826232910156, 560.79736328125, 385.2347717285156, 544.8390502929688, 112.21868133544922, 95.76760864257812, 327.8071594238281, 763.425537109375, 78.26588439941406, -53.660308837890625, 146.4598846435547, 151.28799438476562, -313.01531982421875, -132.93109130859375, 584.697265625, 199.6272735595703, 682.1520385742188, 340.07257080078125, 454.0050354003906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000438.npy"} +{"epoch": 0.6431718061674009, "step": 439, "batch_size": 64, "mean": 206.4326171875, "std": 340.36627197265625, "min": -482.96636962890625, "p10": -119.53141174316403, "median": 160.7615509033203, "p90": 733.8090209960938, "max": 1045.6624755859375, "pos_frac": 0.75, "sample": [234.05654907226562, 738.0316162109375, 110.35421752929688, 337.6349182128906, 105.40264892578125, -84.74911499023438, 755.7940673828125, 12.594833374023438, 165.88880920410156, 40.17607879638672, -347.82464599609375, 791.9661254882812, 88.66529846191406, -61.379539489746094, -77.49971771240234, 322.3207702636719, 302.00823974609375, 596.849365234375, 677.5601806640625, 205.46804809570312, 109.11109924316406, 938.1536865234375, 223.38430786132812, -62.590728759765625, 206.261962890625, -214.1781005859375, 155.63429260253906, -90.07548522949219, -3.7972984313964844, 35.37053680419922, 342.5507507324219, -321.272216796875, 247.68128967285156, 490.1032409667969, 1045.6624755859375, 344.7078552246094, 537.3986206054688, 192.28384399414062, 43.85560607910156, 247.3760986328125, 129.30419921875, -55.216514587402344, -424.8365478515625, -71.56298828125, 0.9792251586914062, 262.0507507324219, 133.3880615234375, 257.35760498046875, -422.53387451171875, 596.390625, 801.2875366210938, 13.66970443725586, 234.8938751220703, 723.956298828125, 111.18590545654297, -132.15538024902344, 480.5947265625, 363.0372619628906, 84.85043334960938, 88.39607238769531, 984.1797485351562, 239.1424560546875, -482.96636962890625, -84.64643859863281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000439.npy"} +{"epoch": 0.644640234948605, "step": 440, "batch_size": 64, "mean": 245.0926513671875, "std": 342.2526550292969, "min": -229.68072509765625, "p10": -87.17445907592771, "median": 150.9848861694336, "p90": 694.5493408203125, "max": 1496.3935546875, "pos_frac": 0.796875, "sample": [240.79322814941406, 333.6954040527344, 390.7434387207031, 372.8759765625, -147.211181640625, 31.79540252685547, 604.8438110351562, 149.4189453125, 73.44163513183594, 829.7902221679688, 53.74437713623047, 49.338470458984375, 185.562744140625, 124.65093994140625, 698.6417846679688, -206.12924194335938, 147.61978149414062, -215.5004425048828, 27.891693115234375, 6.320762634277344, 170.06228637695312, 645.4834594726562, 65.12680053710938, 56.67786407470703, 371.70587158203125, 232.6699676513672, -103.54183197021484, 248.700927734375, 398.33233642578125, 199.3798370361328, 144.53570556640625, 765.3434448242188, -53.220977783203125, 267.86083984375, 12.087038040161133, -4.309906005859375, 1244.5335693359375, -198.37344360351562, 242.564453125, 22.18305015563965, 832.799072265625, 267.8344421386719, 975.792236328125, 497.486572265625, 216.52279663085938, 54.52564239501953, -97.57235717773438, 129.96971130371094, 455.6232604980469, 494.8544921875, 110.72552490234375, 685.0003051757812, -28.21570587158203, 152.5508270263672, 18.542766571044922, 489.980712890625, 40.75322341918945, -40.53235626220703, -62.912696838378906, 221.74526977539062, 1496.3935546875, 535.761474609375, -229.68072509765625, -12.148193359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000440.npy"} +{"epoch": 0.6461086637298091, "step": 441, "batch_size": 64, "mean": 243.21163940429688, "std": 430.8845520019531, "min": -753.8775024414062, "p10": -209.32743988037106, "median": 199.3617401123047, "p90": 780.7288330078127, "max": 1950.1273193359375, "pos_frac": 0.734375, "sample": [239.32237243652344, 143.38870239257812, 286.298583984375, 524.1126098632812, 405.855224609375, 798.3037719726562, -58.32476806640625, 27.348114013671875, 143.77804565429688, -380.9221496582031, -187.76365661621094, -78.3254623413086, 744.421630859375, 906.7129516601562, 311.0107421875, -508.7322692871094, 112.24909973144531, 798.3784790039062, 147.87103271484375, 16.961395263671875, 40.76615905761719, 102.72386169433594, 31.565582275390625, 801.110107421875, 796.2890625, 123.13546752929688, 214.6226348876953, 433.53619384765625, 369.58465576171875, 1950.1273193359375, 14.130743026733398, -488.8711853027344, 360.339599609375, 547.454833984375, -450.2642822265625, -112.50558471679688, 135.29013061523438, 253.22531127929688, 612.729248046875, -403.5002746582031, -66.14590454101562, 490.76025390625, -107.05436706542969, -753.8775024414062, 477.44036865234375, 429.611328125, 401.6566162109375, 452.538818359375, 308.9387512207031, -84.32022094726562, 184.10084533691406, -218.56906127929688, -94.719970703125, 464.3233642578125, 335.611572265625, 127.27044677734375, 646.1520385742188, 381.62957763671875, 580.6121215820312, 1115.30322265625, -47.385746002197266, -41.646934509277344, 134.56793212890625, 725.3139038085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000441.npy"} +{"epoch": 0.6475770925110133, "step": 442, "batch_size": 64, "mean": 283.212890625, "std": 388.65576171875, "min": -473.48419189453125, "p10": -64.58719902038573, "median": 168.62572479248047, "p90": 858.7568115234377, "max": 1219.9248046875, "pos_frac": 0.765625, "sample": [-68.8858871459961, 185.80667114257812, 547.611572265625, 20.8648681640625, 277.21038818359375, 145.76687622070312, -30.424949645996094, 240.43687438964844, 65.6083984375, -68.93939208984375, -21.055744171142578, 412.4182434082031, 1.7636032104492188, 516.6063842773438, 357.04754638671875, 672.3080444335938, 997.6198120117188, 389.26318359375, -11.810211181640625, 88.09007263183594, 96.36994171142578, 560.4081420898438, 22.9990234375, 256.2899475097656, -242.7748260498047, 220.94036865234375, 785.0003051757812, 306.3907470703125, 874.4857177734375, 161.8604736328125, 77.29840087890625, -473.48419189453125, 112.80569458007812, -54.55692672729492, -5.882087707519531, 581.6426391601562, -48.04738998413086, 475.8664245605469, 43.97820281982422, 1125.102783203125, -260.8731689453125, 392.9988708496094, 822.0560302734375, 73.58118438720703, 44.865966796875, 1219.9248046875, 786.04736328125, 294.2650451660156, 638.18212890625, 170.94253540039062, -8.34381103515625, -369.2793884277344, 58.9014892578125, 42.78321838378906, -26.634719848632812, -230.5489044189453, 312.7720947265625, 439.069091796875, 1172.710693359375, 719.5206909179688, 1141.273681640625, 893.9212036132812, 166.3089141845703, 37.18055725097656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000442.npy"} +{"epoch": 0.6490455212922174, "step": 443, "batch_size": 64, "mean": 179.22032165527344, "std": 401.8019104003906, "min": -866.1666259765625, "p10": -272.58109283447266, "median": 172.8091583251953, "p90": 640.9895019531252, "max": 1328.835693359375, "pos_frac": 0.734375, "sample": [1175.9376220703125, 86.01154327392578, 878.4112548828125, 19.340072631835938, 321.05047607421875, 113.45500183105469, -80.57990264892578, -105.95234680175781, -379.26885986328125, 136.54603576660156, 109.28013610839844, -6.7306976318359375, 174.03567504882812, 171.5826416015625, 77.27603149414062, -647.6952514648438, 119.00807189941406, -94.52687072753906, -50.056785583496094, -51.0101318359375, 1175.1429443359375, 335.183349609375, 555.4535522460938, 316.1357116699219, -693.7158813476562, -603.8688354492188, 225.45474243164062, 207.65196228027344, 664.6578979492188, -866.1666259765625, 410.27520751953125, 354.01483154296875, 271.9346923828125, 128.7482147216797, 167.4314727783203, -104.55982208251953, 181.04620361328125, 418.106689453125, 440.2757263183594, 200.9684295654297, -250.8867645263672, -45.57139587402344, 415.7846374511719, -290.3294372558594, 329.33251953125, 97.32994079589844, 298.8077087402344, 1328.835693359375, 272.22528076171875, 143.318115234375, 585.7632446289062, 82.66848754882812, 176.48570251464844, 139.25282287597656, -183.8013458251953, 256.1836853027344, -281.878662109375, 276.65887451171875, 114.49085235595703, 242.27639770507812, 840.8306884765625, 776.4405517578125, 190.81179809570312, 204.78662109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000443.npy"} +{"epoch": 0.6505139500734214, "step": 444, "batch_size": 64, "mean": 301.04541015625, "std": 393.59844970703125, "min": -669.7508544921875, "p10": -111.00416564941405, "median": 290.135009765625, "p90": 793.7643066406256, "max": 1449.796630859375, "pos_frac": 0.78125, "sample": [395.95611572265625, -296.73895263671875, 88.94434356689453, 576.90380859375, 43.35521697998047, 361.826171875, -181.4425811767578, 1449.796630859375, 877.2991943359375, -43.02031326293945, -117.64041137695312, 289.4505920410156, 263.31787109375, 572.3695678710938, 631.0460205078125, 221.35491943359375, 855.735107421875, 445.2900390625, -137.18661499023438, 92.97663879394531, -3.2544498443603516, -35.986900329589844, 415.9743347167969, 975.383544921875, 29.233230590820312, 393.74334716796875, 323.6542663574219, -36.42156982421875, 290.8194274902344, -400.3988037109375, -24.99444007873535, 1407.1771240234375, 304.93988037109375, 294.7945861816406, 317.583984375, 460.66058349609375, 1185.095703125, 440.54168701171875, 536.4442138671875, 237.40151977539062, -95.51959228515625, 971.8356323242188, 625.6414794921875, 54.958396911621094, 551.88427734375, 551.8642578125, 575.6810913085938, 5.027872085571289, 182.89356994628906, 151.5952911376953, 292.85302734375, 211.4827117919922, 327.5085144042969, 172.4254150390625, 474.89007568359375, 231.2315673828125, 28.033109664916992, -51.56593322753906, -259.6168518066406, 69.08486938476562, 649.165771484375, -669.7508544921875, 198.40896606445312, 514.9091186523438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000444.npy"} +{"epoch": 0.6519823788546255, "step": 445, "batch_size": 64, "mean": 274.0172119140625, "std": 353.38446044921875, "min": -444.21087646484375, "p10": -128.76868896484373, "median": 211.5077667236328, "p90": 689.347216796875, "max": 1214.563232421875, "pos_frac": 0.78125, "sample": [219.33541870117188, 46.740814208984375, 16.420066833496094, 838.7125244140625, 41.62849426269531, 593.373291015625, 489.47357177734375, 148.95208740234375, 68.572265625, 171.77732849121094, 132.652099609375, 557.8429565429688, -11.002182006835938, 415.9329528808594, 202.77078247070312, -444.21087646484375, 118.60333251953125, 466.4228210449219, 464.642333984375, 186.5458221435547, 413.2547302246094, 356.3747863769531, 514.5828857421875, 690.1990966796875, 76.02287292480469, 144.98236083984375, 1214.563232421875, 462.7839050292969, 494.56793212890625, 622.4317016601562, 767.5201416015625, 619.9415893554688, 167.93626403808594, 687.3594970703125, 28.008041381835938, -192.810302734375, 266.0340881347656, 303.1108093261719, -139.8323516845703, 522.28662109375, 4.737571716308594, 1066.2796630859375, 530.6826171875, -285.41949462890625, 787.089599609375, 366.22332763671875, 165.77734375, -223.247314453125, -69.04064178466797, -66.31608581542969, 294.3300476074219, -74.0174331665039, -102.95347595214844, 427.9530334472656, 185.9471435546875, 248.5425567626953, -32.079376220703125, -242.39852905273438, 1192.1490478515625, -84.40580749511719, 598.095703125, 203.68011474609375, 268.784423828125, -367.7956848144531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000445.npy"} +{"epoch": 0.6534508076358296, "step": 446, "batch_size": 64, "mean": 297.6376037597656, "std": 309.880126953125, "min": -593.6709594726562, "p10": -35.325327301025375, "median": 273.12139892578125, "p90": 748.6624389648439, "max": 1075.8548583984375, "pos_frac": 0.84375, "sample": [-20.53679656982422, 301.5478515625, 36.05915832519531, 786.8770751953125, 831.6087036132812, 349.5799255371094, 31.543170928955078, -593.6709594726562, 201.25819396972656, 649.4022827148438, 826.9451293945312, 403.0108642578125, 0.7388076782226562, 723.9465942382812, 546.0313720703125, 576.8560180664062, 573.3487548828125, 347.1002197265625, 160.88461303710938, 389.686279296875, -41.66326904296875, 137.2191162109375, 145.21864318847656, 240.3112030029297, 203.7122344970703, -334.84478759765625, 120.91078186035156, 206.34194946289062, 485.54376220703125, 261.2872314453125, 568.239013671875, -48.46452331542969, 57.257476806640625, 194.29208374023438, 18.51661491394043, 227.08401489257812, -13.182647705078125, 515.510986328125, 296.5710144042969, 833.267822265625, 368.74810791015625, 156.35862731933594, 759.2549438476562, 1075.8548583984375, 471.2088317871094, -48.333518981933594, 794.783203125, 457.0231018066406, 304.656005859375, 299.7851257324219, 434.9461975097656, -0.2670440673828125, -96.08270263671875, 173.95132446289062, 88.61101531982422, 163.87103271484375, 192.2509765625, 378.45269775390625, 683.2182006835938, 209.94390869140625, -253.62588500976562, 661.7161254882812, 284.95556640625, 292.1795349121094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000446.npy"} +{"epoch": 0.6549192364170338, "step": 447, "batch_size": 64, "mean": 303.20330810546875, "std": 364.894775390625, "min": -418.2218017578125, "p10": -116.30771484374999, "median": 250.1058349609375, "p90": 710.4942382812502, "max": 1873.76904296875, "pos_frac": 0.84375, "sample": [215.55630493164062, 126.0027847290039, 432.3634033203125, 218.8224334716797, 38.870513916015625, -418.2218017578125, 483.8578186035156, 274.4742736816406, 333.5957946777344, -66.864013671875, 62.30849075317383, 763.2919311523438, 266.693359375, 188.2503204345703, 666.29443359375, 531.5831909179688, 814.9196166992188, 73.22447204589844, 1873.76904296875, 787.8364868164062, 193.64230346679688, 1016.609375, 487.40789794921875, 440.5338134765625, -234.50636291503906, 233.518310546875, 576.67919921875, -151.1158447265625, 300.4760437011719, 187.9807891845703, -142.0040283203125, 21.561264038085938, 2.9873428344726562, -121.47610473632812, 389.99945068359375, -104.24813842773438, 195.48193359375, 572.1256103515625, 94.3609619140625, 32.76923370361328, 558.9758911132812, 73.68590545654297, 130.6330108642578, -309.4825439453125, 729.43701171875, 481.50860595703125, 427.7666320800781, -158.89752197265625, 197.6397705078125, 28.032480239868164, 431.606689453125, 406.1818542480469, 386.0101318359375, 347.9574890136719, 152.8968505859375, 586.2245483398438, 303.2273254394531, 122.86494445800781, 1002.062744140625, 605.2034912109375, 90.01677703857422, 614.5289916992188, 568.5787963867188, -31.060943603515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000447.npy"} +{"epoch": 0.6563876651982379, "step": 448, "batch_size": 64, "mean": 281.41815185546875, "std": 342.5064392089844, "min": -540.8357543945312, "p10": -127.09532318115232, "median": 229.603515625, "p90": 646.697802734375, "max": 1328.23291015625, "pos_frac": 0.828125, "sample": [-1.7606887817382812, 63.27480697631836, -341.4258117675781, 164.02801513671875, 629.4683227539062, 22.44805908203125, 183.11708068847656, -98.55985260009766, 295.35467529296875, -540.8357543945312, 483.3043212890625, 63.71613311767578, 620.0299072265625, 62.59393310546875, 235.83383178710938, 505.113525390625, 16.851491928100586, -37.37644958496094, 634.7620849609375, -141.52822875976562, -136.83412170410156, 111.74652099609375, 748.2483520507812, 444.9189453125, 223.37319946289062, 263.6141052246094, 186.47891235351562, 166.3388671875, 485.1251220703125, 808.4852294921875, 307.2384033203125, 566.910400390625, 42.035911560058594, 413.70965576171875, 1328.23291015625, 12.90211296081543, 80.75788879394531, 195.1587371826172, 114.4371109008789, 312.3333740234375, 620.2391357421875, 600.1906127929688, 241.41368103027344, -155.8321533203125, 1017.2139282226562, 329.16986083984375, 183.488525390625, -104.3714599609375, 448.3132019042969, 82.90249633789062, 321.0017395019531, -234.66477966308594, 1001.872314453125, -152.0938720703125, 590.0833129882812, 870.6939086914062, 45.890899658203125, 383.66314697265625, 123.50617218017578, 306.3545837402344, 574.7169799804688, 651.8131103515625, 550.3069458007812, 221.26910400390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000448.npy"} +{"epoch": 0.657856093979442, "step": 449, "batch_size": 64, "mean": 294.76324462890625, "std": 428.3898010253906, "min": -799.3037109375, "p10": -165.43976440429685, "median": 231.09127807617188, "p90": 782.8262207031252, "max": 2247.921142578125, "pos_frac": 0.765625, "sample": [191.9852752685547, -91.1227035522461, 325.96185302734375, 13.192794799804688, 755.5758666992188, 728.260498046875, 282.86370849609375, 50.59941864013672, 167.57525634765625, 246.69601440429688, 892.3289794921875, -179.5172882080078, -34.57147979736328, 215.48654174804688, 664.1136474609375, 25.458019256591797, 470.00177001953125, 180.86285400390625, 115.24330139160156, -193.5096435546875, 796.0096435546875, 282.19970703125, 558.9775390625, 680.9542236328125, 291.73211669921875, 683.6380615234375, -223.60794067382812, 34.59910583496094, 418.077880859375, 415.5574035644531, 198.22174072265625, 209.43325805664062, 45.4285888671875, 589.8502197265625, 501.40509033203125, 753.2056274414062, -117.51677703857422, 152.57713317871094, -120.01544189453125, -140.31211853027344, 2247.921142578125, 589.142578125, -208.1127471923828, 327.0726013183594, 388.25616455078125, 165.98345947265625, -62.65191650390625, -799.3037109375, -37.58511734008789, 979.7570190429688, 275.2571716308594, 827.0078125, 794.5049438476562, -210.34742736816406, 462.223876953125, 169.42959594726562, 427.54925537109375, 977.727294921875, 303.78778076171875, -49.60072326660156, -176.20875549316406, 29.734130859375, 167.63417053222656, 437.7698059082031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000449.npy"} +{"epoch": 0.6593245227606461, "step": 450, "batch_size": 64, "mean": 312.71868896484375, "std": 311.4277038574219, "min": -757.5628662109375, "p10": -10.591577911376948, "median": 267.7369384765625, "p90": 718.2314270019532, "max": 1172.798583984375, "pos_frac": 0.875, "sample": [348.1812744140625, 613.1373291015625, 724.4178466796875, 426.3196716308594, 101.546142578125, 686.4505615234375, 1172.798583984375, -68.89591979980469, 340.2535705566406, -5.598197937011719, 648.5928344726562, 498.6483154296875, 703.7964477539062, 254.67691040039062, 406.8782653808594, 779.0587158203125, 91.34552001953125, 116.9725341796875, 229.31732177734375, 94.09051513671875, 375.78277587890625, 609.310546875, 480.0472412109375, 751.798095703125, 193.55516052246094, -41.24209213256836, 99.75382995605469, 878.421875, 368.92584228515625, 645.9657592773438, 108.19718933105469, 370.5986328125, 267.62054443359375, 212.1907501220703, 305.32073974609375, 31.20273208618164, 209.9459686279297, 437.5004577636719, 248.38449096679688, 126.72590637207031, 876.1090698242188, -181.28660583496094, 194.24420166015625, 415.6561279296875, 135.76145935058594, 344.63861083984375, 56.282798767089844, 499.2958679199219, 223.1623992919922, 59.8719596862793, 470.5423583984375, -12.731597900390625, 553.8605346679688, 275.0001220703125, 23.652196884155273, -757.5628662109375, 315.802734375, 267.85333251953125, -18.30370330810547, 179.28407287597656, 134.97402954101562, 983.2805786132812, -16.337486267089844, 148.94903564453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000450.npy"} +{"epoch": 0.6607929515418502, "step": 451, "batch_size": 64, "mean": 232.50131225585938, "std": 412.1432800292969, "min": -1165.723388671875, "p10": -227.123355102539, "median": 239.6020965576172, "p90": 691.4697021484378, "max": 1166.313232421875, "pos_frac": 0.796875, "sample": [85.69540405273438, -84.85678100585938, 43.86846923828125, 258.86187744140625, 546.5524291992188, 298.5528259277344, 348.4818115234375, 725.33740234375, -40.82862091064453, 276.63555908203125, 519.529052734375, 1166.313232421875, 522.059814453125, 815.417236328125, 129.2720947265625, 220.34231567382812, 130.8875732421875, -1165.723388671875, -593.2817993164062, -250.16326904296875, 118.24947357177734, 84.9197769165039, 491.3979187011719, 535.6549682617188, 540.090087890625, 302.0939636230469, 472.33538818359375, 174.37306213378906, 209.00509643554688, 611.6678466796875, 121.66030883789062, 1095.671630859375, 922.4078369140625, 349.3274841308594, 495.0689392089844, 557.016845703125, 435.85894775390625, -385.73968505859375, 287.3533630371094, 281.7757568359375, -43.664276123046875, -110.48117065429688, 428.6990661621094, -971.3984375, 150.44883728027344, 196.51760864257812, 195.1599884033203, 195.47030639648438, 307.7550964355469, 41.775230407714844, 612.445068359375, -253.6688690185547, -173.36355590820312, 788.3679809570312, 523.0881958007812, 135.36001586914062, 396.1820068359375, -445.4695129394531, 202.96267700195312, 9.3680419921875, 745.105712890625, -171.1631622314453, 314.902587890625, 152.54153442382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000451.npy"} +{"epoch": 0.6622613803230544, "step": 452, "batch_size": 64, "mean": 277.29498291015625, "std": 276.45880126953125, "min": -332.5181579589844, "p10": -82.34412078857422, "median": 244.67642211914062, "p90": 672.3895935058595, "max": 899.6095581054688, "pos_frac": 0.8125, "sample": [752.71484375, -174.64901733398438, 554.2007446289062, 612.150634765625, -84.0781478881836, 486.38409423828125, 395.9810791015625, 518.211669921875, 505.1620178222656, -78.29805755615234, 542.75244140625, 260.9425964355469, 142.7157745361328, 153.69046020507812, 325.16986083984375, 292.7492370605469, 622.3286743164062, 292.12762451171875, 461.78668212890625, 80.69883728027344, 336.7069396972656, 0.9663772583007812, 405.2802734375, 87.53583526611328, 320.82159423828125, 689.5593872070312, 81.13726806640625, -247.00485229492188, 510.9151306152344, 225.33786010742188, -58.39137268066406, 465.3050537109375, 84.7498779296875, 129.69097900390625, 729.9004516601562, 115.26795959472656, 196.2852325439453, 662.4931640625, 121.88024139404297, 197.8128662109375, 228.41024780273438, -99.42138671875, 206.92303466796875, 690.379638671875, 208.3358154296875, 394.335693359375, 130.78460693359375, 740.26171875, 899.6095581054688, 91.35087585449219, 165.6472930908203, 447.92938232421875, 384.777587890625, -12.787418365478516, -332.5181579589844, 341.7579345703125, -127.35565185546875, 519.9290771484375, 429.09503173828125, 676.6309204101562, -122.53124237060547, 193.59524536132812, -24.182907104492188, -3.0696468353271484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000452.npy"} +{"epoch": 0.6637298091042585, "step": 453, "batch_size": 64, "mean": 258.44171142578125, "std": 342.95660400390625, "min": -347.0584411621094, "p10": -127.43044357299803, "median": 188.82168579101562, "p90": 691.2347900390627, "max": 1570.59912109375, "pos_frac": 0.734375, "sample": [568.5540771484375, 536.0327758789062, -115.37065887451172, 120.85491180419922, 230.99639892578125, 166.9409942626953, 261.816650390625, 99.69645690917969, 209.77322387695312, 62.94818115234375, 260.7158508300781, 515.3457641601562, 497.0696105957031, 569.4307861328125, 42.852603912353516, 88.49183654785156, 323.38897705078125, 169.29119873046875, -132.5989227294922, 56.10023498535156, 565.5731201171875, -174.5524444580078, 233.53726196289062, 310.34429931640625, 644.8399658203125, -211.00787353515625, -72.29991149902344, 711.1182861328125, -347.0584411621094, 360.027587890625, -61.47257995605469, 200.3211669921875, 527.5142211914062, 816.6832275390625, 791.8721313476562, -16.037120819091797, -64.71049499511719, 144.45706176757812, 1570.59912109375, 799.6514282226562, 403.6321105957031, -91.9367904663086, 774.9608764648438, -16.931617736816406, 270.29638671875, 103.43414306640625, 571.6650390625, 177.32220458984375, 401.19677734375, 391.748291015625, -249.50379943847656, 504.75042724609375, -166.01165771484375, 542.7260131835938, 60.38330078125, -67.94901275634766, -153.24818420410156, 165.3006591796875, 961.005615234375, 468.8268737792969, -30.658435821533203, -40.35639190673828, 138.39666748046875, 159.4888916015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000453.npy"} +{"epoch": 0.6651982378854625, "step": 454, "batch_size": 64, "mean": 294.9285888671875, "std": 351.8968811035156, "min": -627.368896484375, "p10": -27.241162109374976, "median": 236.39071655273438, "p90": 657.9208740234376, "max": 1385.812744140625, "pos_frac": 0.875, "sample": [236.78094482421875, 56.9005126953125, 971.6472778320312, 231.1817169189453, 579.0236206054688, 87.99217224121094, 634.00927734375, 306.31634521484375, 22.68698501586914, 173.56565856933594, -555.6647338867188, 582.0557250976562, 317.1202087402344, 331.383544921875, 236.00048828125, 274.8299560546875, 392.9197082519531, 371.4425048828125, 14.528633117675781, -627.368896484375, 30.61149787902832, 585.4260864257812, -51.38128662109375, 780.7939453125, -260.7757873535156, -74.04498291015625, 621.816650390625, 103.47098541259766, 56.95802307128906, 229.5926513671875, 528.538330078125, 140.71641540527344, 347.9437255859375, 116.9946517944336, 462.73095703125, 125.4852523803711, 226.940185546875, 120.7197265625, 301.806884765625, 196.69004821777344, 442.567138671875, 459.9066467285156, 108.38777160644531, 353.8476867675781, -5.5473785400390625, 668.168701171875, 1385.812744140625, 374.8086853027344, 54.58106231689453, 160.53515625, 1341.9161376953125, 1019.638916015625, 287.8486633300781, 521.0732421875, 70.2166976928711, 456.3302917480469, 550.62158203125, -81.84927368164062, 407.4430847167969, -36.53849792480469, 201.42323303222656, 104.73532104492188, 724.5967407226562, 76.52006530761719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000454.npy"} +{"epoch": 0.6666666666666666, "step": 455, "batch_size": 64, "mean": 339.64556884765625, "std": 382.1095275878906, "min": -548.8935546875, "p10": -69.55535087585447, "median": 236.27769470214844, "p90": 926.1752075195316, "max": 1297.5086669921875, "pos_frac": 0.875, "sample": [115.06135559082031, 165.11785888671875, 729.989501953125, 953.7691650390625, -361.302978515625, 350.7195129394531, 664.9130859375, -79.83805847167969, 1077.556640625, -205.5781707763672, 1049.897705078125, 220.0259552001953, 861.789306640625, -289.3708801269531, 1060.4803466796875, -112.46520233154297, 224.81201171875, 677.4317016601562, 234.44216918945312, 171.66773986816406, 105.97601318359375, 459.2610168457031, 217.63584899902344, 23.242136001586914, 374.4409484863281, 1101.396240234375, 648.436279296875, -136.31640625, 292.12091064453125, 402.20135498046875, 237.97393798828125, 545.4495849609375, 226.96337890625, 180.95639038085938, 205.3789520263672, 209.99957275390625, 272.29132080078125, 329.9908142089844, 92.7034912109375, -548.8935546875, 333.10797119140625, 84.79519653320312, 325.0887756347656, 565.1731567382812, 45.48931884765625, 74.33606719970703, 41.375396728515625, 451.88800048828125, 802.5907592773438, 234.58145141601562, 654.2088623046875, 138.36410522460938, 343.1568603515625, 382.7613830566406, 84.93744659423828, 112.02401733398438, 5.722797393798828, 810.6263427734375, 488.72265625, 409.5385437011719, 1153.175537109375, 193.37655639648438, -45.5623664855957, 1297.5086669921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000455.npy"} +{"epoch": 0.6681350954478708, "step": 456, "batch_size": 64, "mean": 334.0071716308594, "std": 336.36090087890625, "min": -188.68081665039062, "p10": -5.992239379882801, "median": 260.4949188232422, "p90": 838.3354553222657, "max": 1408.7548828125, "pos_frac": 0.890625, "sample": [123.05511474609375, 181.74571228027344, 764.9349975585938, 41.355987548828125, 4.9578857421875, 303.4558410644531, 71.90554809570312, -10.685150146484375, 213.23419189453125, 737.5368041992188, 420.94635009765625, 32.827484130859375, 126.60896301269531, 1408.7548828125, 294.3787841796875, -74.42060089111328, 59.68867492675781, 290.5590515136719, 336.5692138671875, 365.9922790527344, 264.2513427734375, 65.06321716308594, -63.8524169921875, 1000.3232421875, -36.727298736572266, 411.6693115234375, 1247.98193359375, 448.6028747558594, 539.0425415039062, 58.1461181640625, 329.08892822265625, 519.7657470703125, 661.1502685546875, 148.982177734375, 162.3839111328125, 256.7384948730469, 23.595293045043945, 467.09368896484375, 922.2448120117188, 95.79368591308594, 816.7769165039062, 377.0015869140625, 268.82232666015625, 65.06233978271484, -188.68081665039062, 370.68768310546875, 68.75562286376953, -18.145416259765625, 847.5748291015625, 74.10323333740234, 880.6139526367188, 1012.6958618164062, 339.1785583496094, -12.846466064453125, 238.04367065429688, 131.4518280029297, 398.0694885253906, 98.87350463867188, 473.17919921875, 171.00933837890625, 241.13522338867188, 564.4842529296875, 717.264892578125, 226.60794067382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000456.npy"} +{"epoch": 0.6696035242290749, "step": 457, "batch_size": 64, "mean": 339.88275146484375, "std": 333.91827392578125, "min": -355.375244140625, "p10": -84.51013603210444, "median": 382.4580383300781, "p90": 721.2239624023439, "max": 1191.8912353515625, "pos_frac": 0.828125, "sample": [-10.054618835449219, 446.6233825683594, 438.35809326171875, 359.1328125, 617.8945922851562, 682.8246459960938, 758.3623657226562, 730.2921142578125, 78.48973846435547, 53.586578369140625, 1072.7998046875, 455.7867431640625, 42.356204986572266, 240.64625549316406, 97.83387756347656, 230.7224578857422, 614.531494140625, 265.5733337402344, -0.7750473022460938, 538.4541015625, 700.06494140625, -13.46145248413086, 185.32862854003906, 562.329833984375, 382.17962646484375, 955.4198608398438, -355.375244140625, -172.08457946777344, 658.5136108398438, 582.5377197265625, 115.76329040527344, 535.78369140625, 464.9507751464844, 547.442138671875, 297.0589599609375, -109.99800109863281, 621.2576904296875, 443.6102600097656, 109.87088012695312, 11.9632568359375, 419.4337158203125, 833.8289794921875, 530.418212890625, 144.9351348876953, 35.61637496948242, 113.00904846191406, 617.1265258789062, -102.90807342529297, 539.4296264648438, 431.5575256347656, -242.6298828125, 633.9177856445312, 281.2812805175781, 809.31884765625, -284.27655029296875, 382.7364501953125, 104.83558654785156, -153.72259521484375, 394.66595458984375, -41.58161544799805, 300.47698974609375, 549.0900268554688, 27.449119567871094, 1191.8912353515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000457.npy"} +{"epoch": 0.671071953010279, "step": 458, "batch_size": 64, "mean": 302.67303466796875, "std": 383.2219543457031, "min": -549.4540405273438, "p10": -121.08548202514648, "median": 301.4980773925781, "p90": 687.1515014648439, "max": 1794.8275146484375, "pos_frac": 0.796875, "sample": [-549.4540405273438, 718.1093139648438, 479.3380126953125, 298.08984375, 591.2870483398438, -110.61412048339844, 1794.8275146484375, -83.17326354980469, 306.81219482421875, 545.699951171875, 157.44424438476562, 238.20697021484375, 481.14776611328125, 330.2706604003906, 733.2754516601562, -115.39190673828125, -121.89759826660156, 9.778661727905273, 219.39950561523438, 580.156982421875, 465.8650817871094, 504.8871765136719, 47.698081970214844, 541.4370727539062, 389.65478515625, 375.7314453125, 341.1472473144531, -18.41583251953125, 359.6619873046875, 548.4781494140625, 304.90631103515625, 711.0347900390625, 561.6968994140625, 823.6109619140625, 77.97283935546875, -5.74566650390625, 348.44805908203125, 74.15519714355469, 389.44012451171875, 262.26739501953125, 709.609130859375, 362.5413513183594, -129.58523559570312, -357.9420471191406, 209.74267578125, -262.34588623046875, 171.68267822265625, -119.19054412841797, 185.80221557617188, 1625.16796875, 542.1123657226562, 192.72329711914062, 250.0129852294922, 76.38752746582031, 172.07821655273438, 634.7503662109375, 324.9320983886719, 446.1982116699219, 252.9034881591797, 244.57928466796875, -164.9967803955078, 424.5519714355469, -276.20458984375, 248.3191680908203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000458.npy"} +{"epoch": 0.6725403817914831, "step": 459, "batch_size": 64, "mean": 331.5897216796875, "std": 413.1249084472656, "min": -760.7987060546875, "p10": -179.01899719238276, "median": 287.12890625, "p90": 892.5488098144532, "max": 1464.4971923828125, "pos_frac": 0.828125, "sample": [71.36039733886719, 29.61529541015625, 277.8040771484375, 212.91546630859375, 286.85125732421875, 413.7723693847656, -232.6925048828125, 92.52268981933594, 586.4515380859375, -272.0907287597656, 565.0306396484375, 389.5584716796875, 473.0850524902344, 534.3076171875, 4.66187858581543, 480.0126953125, -92.30459594726562, 683.212890625, 322.6678466796875, 930.0194091796875, 25.323516845703125, 956.3989868164062, 599.2147216796875, -59.5462532043457, 318.9421081542969, -238.47557067871094, 886.4002685546875, -137.59129333496094, 169.2809600830078, 15.030706405639648, 249.7304229736328, -20.49462127685547, 858.3787841796875, -760.7987060546875, 1464.4971923828125, 287.40655517578125, 1197.3067626953125, 561.5000610351562, 244.8547821044922, -290.167724609375, 150.09619140625, 245.7781982421875, 483.7030029296875, 165.59161376953125, 45.133636474609375, 265.7309875488281, -370.9389953613281, 189.81338500976562, 447.1884460449219, 600.4581298828125, 768.4961547851562, 90.63829040527344, 330.21636962890625, 1192.0313720703125, 301.2215576171875, 102.84786224365234, 912.6755981445312, -196.7737274169922, 751.7171020507812, 757.542236328125, 895.1838989257812, 349.8087158203125, 64.99737548828125, 594.63330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000459.npy"} +{"epoch": 0.6740088105726872, "step": 460, "batch_size": 64, "mean": 362.849609375, "std": 397.34759521484375, "min": -326.1785888671875, "p10": -141.39386596679688, "median": 317.8660583496094, "p90": 907.5962341308594, "max": 1532.1075439453125, "pos_frac": 0.828125, "sample": [903.4779663085938, 780.5442504882812, 547.5216064453125, -159.75216674804688, 109.09916687011719, 961.9359741210938, 951.41748046875, 281.95330810546875, 57.978294372558594, 218.5826873779297, -140.70779418945312, 693.8606567382812, 183.27586364746094, 530.96826171875, 189.9540252685547, 246.65301513671875, 251.98483276367188, -326.1785888671875, 161.56849670410156, 1275.425048828125, 1175.5186767578125, 234.8031005859375, 430.7835998535156, 27.33001708984375, 1001.993408203125, 260.9777526855469, 1532.1075439453125, 446.0526123046875, -141.68789672851562, 24.584442138671875, 330.8033752441406, -132.89950561523438, 582.8099975585938, -181.1868438720703, 545.0400390625, 592.2777099609375, -104.6793441772461, 466.8930358886719, 304.9287414550781, -43.21758270263672, 860.173583984375, 621.08349609375, 352.4653015136719, 410.03704833984375, 512.068359375, 168.57562255859375, 86.01097106933594, 165.52574157714844, 361.8500671386719, 253.6863250732422, -255.47618103027344, 115.42440032958984, 141.591552734375, 387.6798400878906, -265.6163330078125, 684.0255126953125, 909.3612060546875, 546.1105346679688, -227.94607543945312, 857.7670288085938, 563.3692626953125, 428.3769836425781, 30.548912048339844, 442.88671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000460.npy"} +{"epoch": 0.6754772393538914, "step": 461, "batch_size": 64, "mean": 287.71759033203125, "std": 411.5902404785156, "min": -579.1143798828125, "p10": -165.7444396972656, "median": 223.40584564208984, "p90": 782.7369201660157, "max": 1580.951904296875, "pos_frac": 0.734375, "sample": [112.3499755859375, -19.174331665039062, 784.470947265625, 394.4082336425781, -179.90264892578125, 500.5976867675781, -579.1143798828125, 1372.28466796875, 88.055419921875, 602.0429077148438, 129.81558227539062, -102.9870376586914, 307.0028076171875, 624.0115356445312, 1016.4620361328125, 448.16839599609375, 633.7069702148438, -106.35943603515625, 249.1953887939453, 400.42279052734375, 338.7325439453125, 778.6908569335938, 701.3958129882812, 973.9652099609375, -209.99732971191406, -62.691497802734375, 1152.292236328125, 1580.951904296875, 209.22586059570312, 304.663818359375, 477.3047180175781, 189.16566467285156, 224.03131103515625, 44.76544189453125, 156.23480224609375, 309.39776611328125, -232.32427978515625, -17.49072265625, -101.66201782226562, -35.137298583984375, 845.9384155273438, -85.94568634033203, 723.8848266601562, 675.8502197265625, -249.5846710205078, 272.9963073730469, 222.78038024902344, 611.7818603515625, 17.612091064453125, -132.7086181640625, 598.5179443359375, 72.4813461303711, 133.11720275878906, 135.157958984375, 501.2723388671875, -85.53620147705078, -191.90304565429688, 87.62149047851562, 279.0994873046875, 304.67254638671875, 144.29803466796875, -379.13330078125, 277.28515625, 177.3980712890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000461.npy"} +{"epoch": 0.6769456681350955, "step": 462, "batch_size": 64, "mean": 230.55760192871094, "std": 418.24053955078125, "min": -586.2332763671875, "p10": -279.3302322387695, "median": 180.06812286376953, "p90": 676.3773193359377, "max": 1373.8746337890625, "pos_frac": 0.703125, "sample": [332.05352783203125, 44.852943420410156, 5.313758850097656, -231.45375061035156, 419.9739074707031, 504.9482421875, 29.103792190551758, 502.7032165527344, 565.283447265625, 881.2637939453125, 487.3441162109375, -218.3994140625, -375.383056640625, 30.66982650756836, 162.28988647460938, 495.3380126953125, 554.9339599609375, 152.01480102539062, -225.25921630859375, 703.6810913085938, -299.8487243652344, 154.76901245117188, 391.6253356933594, -586.2332763671875, -160.81297302246094, 56.159820556640625, 612.6685180664062, -454.84320068359375, 23.104969024658203, -32.22785568237305, 1238.98876953125, 420.23822021484375, -319.1748352050781, 258.759033203125, 550.495849609375, 1062.781005859375, -222.2434844970703, -80.4703140258789, 1311.2041015625, 228.64256286621094, 170.32936096191406, -142.23155212402344, -187.2995147705078, 189.806884765625, -323.6409606933594, -42.79920959472656, 374.1672058105469, 544.7291870117188, -321.5359802246094, 151.6905517578125, -56.839569091796875, 447.536376953125, 1373.8746337890625, 542.1305541992188, 393.2474365234375, -141.9288330078125, 274.96258544921875, 552.320068359375, 776.665771484375, 480.19317626953125, 106.59114074707031, 137.554931640625, 239.51393127441406, 241.79251098632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000462.npy"} +{"epoch": 0.6784140969162996, "step": 463, "batch_size": 64, "mean": 303.08685302734375, "std": 436.7508544921875, "min": -501.42236328125, "p10": -258.9184097290039, "median": 267.85850524902344, "p90": 823.0969055175782, "max": 1440.370849609375, "pos_frac": 0.734375, "sample": [58.11750793457031, 608.9901123046875, -42.64490509033203, 264.753173828125, -208.6466064453125, 389.4869689941406, -165.03546142578125, 16.04741668701172, 456.87713623046875, -32.59196472167969, 598.9309692382812, -265.7428283691406, 580.50830078125, 675.4722900390625, 157.31619262695312, 296.1273498535156, 364.3187561035156, 622.0865478515625, 807.06787109375, -71.53190612792969, 25.088350296020508, 1440.370849609375, 394.3097839355469, 305.5984802246094, 1166.3819580078125, 670.8638916015625, 184.50485229492188, 243.88430786132812, 29.934661865234375, 226.87930297851562, -344.9307556152344, -46.522315979003906, -501.42236328125, 176.4937744140625, 829.9664916992188, -351.6321716308594, -36.10261535644531, -172.5997314453125, 392.4482116699219, -242.99476623535156, 664.685791015625, 314.827392578125, 605.9363403320312, 265.1563720703125, 793.5699462890625, 1040.7554931640625, -42.035491943359375, 270.5606384277344, 1013.9063110351562, -347.0289306640625, 184.5446014404297, 1296.0714111328125, 259.8598327636719, 698.4227294921875, -266.15228271484375, 53.815643310546875, 398.91461181640625, 271.8999938964844, -461.0357666015625, 424.83514404296875, 505.7195129394531, 553.96142578125, 1215.5146484375, 180.4265899658203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000463.npy"} +{"epoch": 0.6798825256975036, "step": 464, "batch_size": 64, "mean": 478.4560546875, "std": 476.39617919921875, "min": -523.4840087890625, "p10": -37.8988906860351, "median": 450.35365295410156, "p90": 996.2517028808595, "max": 2099.872802734375, "pos_frac": 0.890625, "sample": [-282.86627197265625, 960.3135986328125, 369.6253662109375, 233.85903930664062, 36.62396240234375, 1084.27783203125, 695.7884521484375, 132.2771759033203, 120.26235961914062, 434.99505615234375, 689.4169311523438, 634.260986328125, -523.4840087890625, 782.762939453125, 295.96783447265625, 596.9351196289062, 706.3499755859375, 215.3824920654297, 456.39361572265625, 789.262939453125, 759.0296630859375, 469.6868896484375, 864.8212890625, -191.9900665283203, 1011.6537475585938, 266.2537536621094, 635.5098876953125, 493.596923828125, -450.9042053222656, 167.43682861328125, 889.22705078125, 222.03515625, 547.7277221679688, 57.79104995727539, 444.3136901855469, 596.740966796875, 805.6597900390625, 227.80055236816406, 370.5892333984375, 301.42333984375, 246.52862548828125, 38.474857330322266, 883.2686767578125, 618.2319946289062, 110.76216125488281, 436.0268249511719, -119.95670318603516, 276.57440185546875, 16.53510284423828, 670.4801025390625, 576.8836059570312, 1496.6475830078125, 2099.872802734375, 1173.887939453125, 432.3331298828125, 1084.5439453125, 758.8408813476562, -61.227745056152344, 1712.0157470703125, 736.8572998046875, 520.2791748046875, -438.3421936035156, 344.8526611328125, 90.0081558227539], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000464.npy"} +{"epoch": 0.6813509544787077, "step": 465, "batch_size": 64, "mean": 380.36016845703125, "std": 493.7154235839844, "min": -1397.0531005859375, "p10": -162.67533416748046, "median": 438.0199737548828, "p90": 852.6529113769532, "max": 1853.6395263671875, "pos_frac": 0.859375, "sample": [684.6209716796875, 1380.4942626953125, 513.0576171875, 242.5097198486328, -268.51251220703125, 35.310089111328125, 579.1141967773438, 385.401611328125, 49.847694396972656, 41.180694580078125, 816.1670532226562, 282.302490234375, 382.9154052734375, 82.47087860107422, 635.046142578125, 559.3484497070312, 467.2933349609375, 425.3450012207031, 278.7674560546875, 72.12889862060547, -570.1525268554688, 975.7545776367188, 507.69488525390625, 218.15036010742188, 1853.6395263671875, -109.16957092285156, 453.84869384765625, 146.14849853515625, 368.76263427734375, 524.2957763671875, 710.9073486328125, 506.9530029296875, -169.40725708007812, 179.61984252929688, 249.50155639648438, 542.9988403320312, 18.750160217285156, 245.84375, -335.4336242675781, 658.4844970703125, 955.6554565429688, 225.23355102539062, 549.074462890625, -859.4469604492188, 693.3245849609375, 707.4691162109375, 165.85923767089844, 760.791015625, 111.50636291503906, 839.85693359375, 809.763916015625, -180.12852478027344, 1123.8280029296875, -1397.0531005859375, 911.5091552734375, 858.1369018554688, -146.96751403808594, 367.919921875, 682.7645874023438, 490.6353454589844, 57.78086853027344, 450.6949462890625, 833.8763427734375, 708.9654541015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000465.npy"} +{"epoch": 0.6828193832599119, "step": 466, "batch_size": 64, "mean": 335.2580261230469, "std": 494.2394714355469, "min": -1126.9891357421875, "p10": -328.97020874023434, "median": 286.6752166748047, "p90": 962.3815185546875, "max": 1475.7724609375, "pos_frac": 0.78125, "sample": [596.3165893554688, -348.90447998046875, 200.33636474609375, -399.3563537597656, 825.074951171875, 346.69146728515625, 503.1434020996094, 186.1515350341797, 653.515625, 71.77079772949219, 128.35455322265625, 1113.5257568359375, -393.7833251953125, 861.0147705078125, -26.01504135131836, 152.73046875, 319.68096923828125, 12.643573760986328, 455.3266906738281, 159.4959716796875, 540.1888427734375, 184.76007080078125, 534.24267578125, 377.94903564453125, 849.5247192382812, 160.78717041015625, -4.713768005371094, 803.0225830078125, 279.63726806640625, 550.8772583007812, 374.12628173828125, 172.64535522460938, 845.3543090820312, 1475.7724609375, -1126.9891357421875, 967.0209350585938, 412.0393371582031, 719.1597900390625, 1232.6458740234375, 485.67999267578125, 949.1513061523438, 4.840557098388672, 216.33111572265625, 202.23117065429688, 538.5855712890625, 1240.0753173828125, -203.25033569335938, -435.7266540527344, -66.79349517822266, 90.36700439453125, 1146.24609375, 293.7131652832031, 818.0253295898438, -20.233314514160156, 970.1986083984375, 442.00103759765625, -574.5069580078125, 0.1990509033203125, 951.5562133789062, -282.4569091796875, 270.0356750488281, -132.06988525390625, -409.9389343261719, 196.48782348632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000466.npy"} +{"epoch": 0.684287812041116, "step": 467, "batch_size": 64, "mean": 354.17047119140625, "std": 672.411865234375, "min": -1327.302001953125, "p10": -471.54986877441405, "median": 397.33689880371094, "p90": 1170.4131591796875, "max": 1939.212890625, "pos_frac": 0.71875, "sample": [854.9246215820312, 1588.4547119140625, 693.6704711914062, 1316.96435546875, -746.4620971679688, -1327.302001953125, 646.797607421875, 456.7754821777344, 499.2306213378906, 356.9459228515625, 320.3083190917969, 431.50244140625, 1767.41259765625, 158.03611755371094, 541.8643188476562, 451.25286865234375, 117.74329376220703, 1149.8155517578125, 1480.9881591796875, 492.58221435546875, 406.110107421875, -66.66822814941406, 71.31854248046875, 809.9696044921875, -354.41741943359375, -437.1165466308594, -692.532470703125, 554.6102294921875, 1172.218505859375, 416.24835205078125, 1061.412841796875, 170.56932067871094, 451.64398193359375, -539.4066772460938, 1043.7950439453125, 7.368865966796875, 1166.20068359375, 227.22991943359375, -74.4345703125, 867.6819458007812, 290.44677734375, -1176.4366455078125, 200.68157958984375, -274.67657470703125, 61.79344177246094, 449.76593017578125, -2.6053466796875, -348.9725341796875, -322.92864990234375, 761.8635864257812, 905.9116821289062, 821.7080078125, 1259.2362060546875, 388.5636901855469, 917.6121826171875, -19.34790802001953, 635.6116333007812, -29.368194580078125, -486.3070068359375, 1939.212890625, -986.1392211914062, -32.85995864868164, 42.88054656982422, 157.95654296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000467.npy"} +{"epoch": 0.6857562408223201, "step": 468, "batch_size": 64, "mean": 444.5817565917969, "std": 578.7382202148438, "min": -1028.61572265625, "p10": -119.88535919189451, "median": 370.33538818359375, "p90": 1413.2689819335942, "max": 2184.939208984375, "pos_frac": 0.875, "sample": [444.3622741699219, 38.51738739013672, 1477.0849609375, 567.6026611328125, 1489.4366455078125, 934.1784057617188, 551.8944702148438, 679.109619140625, 488.0071716308594, 584.6311645507812, 357.6864929199219, 782.7451782226562, 142.88833618164062, 454.8307189941406, 581.1719970703125, 106.5030517578125, 226.2911834716797, -185.27491760253906, 351.8741149902344, 239.09487915039062, 189.52838134765625, 13.862071990966797, 42.68578338623047, 382.9842834472656, 423.5818786621094, 1468.546142578125, 346.2132568359375, 1571.99169921875, 1043.5386962890625, 155.60665893554688, -400.1169128417969, 8.347244262695312, 567.9784545898438, 292.4871826171875, 406.1673278808594, 557.9375610351562, 116.91382598876953, 105.2335205078125, 579.6394653320312, -1028.61572265625, 1815.428466796875, 88.64530944824219, 444.05670166015625, 525.7970581054688, 417.6376953125, 165.73739624023438, -261.7005615234375, 309.5242004394531, 9.524642944335938, 1284.2889404296875, 200.62631225585938, 2087.018310546875, 276.3050231933594, 774.451171875, 3.3681106567382812, 462.4010314941406, 417.5222473144531, 2184.939208984375, 566.793212890625, -126.8304443359375, 320.41748046875, -417.64007568359375, -103.68016052246094, -148.54522705078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000468.npy"} +{"epoch": 0.6872246696035242, "step": 469, "batch_size": 64, "mean": 443.07672119140625, "std": 526.7197265625, "min": -741.7655639648438, "p10": -147.63352279663079, "median": 464.9999542236328, "p90": 1175.1594970703125, "max": 1907.375244140625, "pos_frac": 0.828125, "sample": [1066.3826904296875, 655.668212890625, 103.72628021240234, 548.2422485351562, 1907.375244140625, 557.4761962890625, 1240.700927734375, 45.98998260498047, -34.10508728027344, 156.7445068359375, 646.8450927734375, 129.8148651123047, -678.7996826171875, 33.452423095703125, 1178.2574462890625, -13.014350891113281, 488.9692077636719, 584.0140380859375, 462.0714111328125, 558.06884765625, 1152.6048583984375, 219.57351684570312, 119.06462097167969, 132.45970153808594, 1170.420654296875, 2.27313232421875, 841.7667236328125, 505.9339294433594, 1199.938232421875, -345.854736328125, 720.9924926757812, -179.9459991455078, 791.147705078125, 199.74891662597656, 704.3853149414062, 289.32818603515625, -59.728675842285156, -176.74276733398438, 10.90789794921875, -481.76531982421875, 467.9284973144531, 1005.7301635742188, 1215.1143798828125, 62.54475021362305, -364.16748046875, 197.99969482421875, 251.60330200195312, 1082.2672119140625, 383.71331787109375, 81.78748321533203, 1042.4407958984375, 945.3317260742188, 541.4976806640625, 1177.1904296875, 1258.308349609375, 773.4035034179688, 529.5413208007812, 346.332763671875, 89.89488220214844, -79.71195220947266, 184.34585571289062, 683.0897827148438, 768.09814453125, -741.7655639648438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000469.npy"} +{"epoch": 0.6886930983847284, "step": 470, "batch_size": 64, "mean": 386.737548828125, "std": 534.6903076171875, "min": -1197.7696533203125, "p10": -291.75819396972656, "median": 453.46177673339844, "p90": 1002.4114990234376, "max": 1697.283203125, "pos_frac": 0.75, "sample": [459.8858642578125, 504.85809326171875, -154.23193359375, 107.16153717041016, 198.63284301757812, -116.35909271240234, 327.715576171875, 111.4994125366211, -57.697174072265625, -673.9273071289062, 579.6582641601562, 736.5143432617188, 466.0272521972656, 447.0376892089844, 631.7828979492188, 1086.31787109375, -289.77374267578125, 718.4432373046875, -1197.7696533203125, 691.6676635742188, -193.79940795898438, 777.1790771484375, 188.34707641601562, -432.7843933105469, 254.2052001953125, 195.96377563476562, -292.6086730957031, 313.8109130859375, 485.14300537109375, 394.6091003417969, 690.693359375, -297.76019287109375, 1394.610107421875, 165.3403778076172, -26.57977867126465, 830.361572265625, 390.93951416015625, 626.5772094726562, 1697.283203125, 691.105712890625, 1266.7156982421875, 96.40890502929688, 797.2806396484375, 977.990966796875, -99.65724182128906, 1439.744384765625, 545.9013671875, 545.5595703125, 688.253662109375, 958.04443359375, 468.86773681640625, -295.5037841796875, -263.9732666015625, 263.8129577636719, 551.0673217773438, 788.9224853515625, -396.68133544921875, 1349.561279296875, 786.454345703125, -110.28889465332031, 1012.87744140625, 98.32809448242188, 328.0477600097656, 523.3875732421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000470.npy"} +{"epoch": 0.6901615271659325, "step": 471, "batch_size": 64, "mean": 321.73797607421875, "std": 542.1455078125, "min": -861.8800048828125, "p10": -278.3611755371094, "median": 204.81277465820312, "p90": 894.6962951660156, "max": 2061.860595703125, "pos_frac": 0.734375, "sample": [1235.3045654296875, -22.56372833251953, -66.4278564453125, 663.6156005859375, 363.836181640625, 727.0870361328125, 706.4476318359375, 2061.860595703125, 922.8099365234375, 264.23626708984375, 53.48484802246094, 325.9404296875, -170.2067413330078, 69.51065063476562, 385.9380187988281, -80.41839599609375, -250.94464111328125, -261.453125, 148.1458740234375, -75.23934936523438, 1611.93408203125, -376.56805419921875, 299.07684326171875, 378.9546203613281, 726.1666870117188, -113.40070343017578, -350.68084716796875, -64.86050415039062, -490.071533203125, 472.1000671386719, -253.99745178222656, 1075.1005859375, 760.0474243164062, 897.9450073242188, 496.3918762207031, 750.9248657226562, 860.4688720703125, 92.00575256347656, 147.9279022216797, 487.64190673828125, 192.71389770507812, 168.70217895507812, 95.63927459716797, 58.26740646362305, 841.5166015625, 1825.28857421875, 45.07745361328125, -861.8800048828125, 814.429443359375, 33.01304626464844, -354.6627197265625, 175.4317626953125, 70.67920684814453, 401.35418701171875, 74.39216613769531, 569.6373901367188, 21.08963966369629, -285.60748291015625, 441.7994384765625, 216.91165161132812, 683.1478271484375, 887.115966796875, -405.7317199707031, 474.83331298828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000471.npy"} +{"epoch": 0.6916299559471366, "step": 472, "batch_size": 64, "mean": 400.1733703613281, "std": 664.3963012695312, "min": -1389.5599365234375, "p10": -246.91765594482416, "median": 273.3364562988281, "p90": 1334.9525024414065, "max": 2335.417236328125, "pos_frac": 0.796875, "sample": [-1389.5599365234375, -456.90869140625, 861.1632080078125, -491.0302429199219, 198.2109832763672, 190.33242797851562, 625.2001342773438, -164.5762939453125, -53.22918701171875, -799.0219116210938, 315.5245056152344, 63.555240631103516, 142.1158905029297, 580.9274291992188, 58.965545654296875, 935.1412353515625, -39.67158508300781, 671.656494140625, 268.3338623046875, 18.14453125, 1253.24365234375, 46.96717834472656, 59.017333984375, 14.386215209960938, 177.7652587890625, 539.7618408203125, 306.9996032714844, -273.0387878417969, 306.7474365234375, 933.1177978515625, 35.228729248046875, 1027.5809326171875, 201.93246459960938, 981.6805419921875, 2125.079345703125, 1742.76171875, 187.4851531982422, 1468.478515625, 278.33905029296875, 545.8367919921875, 490.70831298828125, 1076.218994140625, 100.48713684082031, 304.149169921875, -204.2616729736328, 1369.9705810546875, 1628.479736328125, 106.01252746582031, 1545.5767822265625, 18.421035766601562, 2335.417236328125, -161.48838806152344, -265.19879150390625, -87.33850860595703, 1177.682861328125, 678.8079833984375, 714.4815063476562, 206.74435424804688, 395.4341125488281, -461.70135498046875, 307.1767883300781, 404.9815979003906, 285.6316223144531, 150.0569305419922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000472.npy"} +{"epoch": 0.6930983847283406, "step": 473, "batch_size": 64, "mean": 182.16502380371094, "std": 836.7240600585938, "min": -2009.171630859375, "p10": -535.9990478515625, "median": 194.9854507446289, "p90": 991.7993408203125, "max": 3822.724609375, "pos_frac": 0.65625, "sample": [-245.94198608398438, 912.10546875, 242.26316833496094, -341.935546875, -544.1724243164062, 1356.776123046875, -125.14910888671875, 193.99838256835938, 213.82313537597656, 355.9470520019531, 1231.29541015625, 648.6763916015625, -77.00111389160156, 142.00189208984375, -590.7205810546875, -468.2296142578125, -2.619232177734375, -2.7709808349609375, 195.97251892089844, 1264.364013671875, -368.5178527832031, -129.93360900878906, 737.7296142578125, 325.47503662109375, 90.09075927734375, 94.60077667236328, 258.178955078125, 343.942138671875, 189.50433349609375, 85.68496704101562, -2009.171630859375, 49.517982482910156, -535.7789306640625, 237.01734924316406, -1766.3311767578125, -1946.9339599609375, 3822.724609375, 305.05328369140625, 439.62548828125, 1296.0654296875, 286.33282470703125, 279.60626220703125, 760.1732177734375, 815.1094970703125, 187.86448669433594, 640.0145263671875, -536.0933837890625, -334.4399108886719, 210.2746124267578, 981.736328125, 606.05029296875, 996.112060546875, -414.01617431640625, -218.9727783203125, 70.66848754882812, -105.91788482666016, 902.2459716796875, -1525.8443603515625, 503.7132263183594, 158.33078002929688, -374.95855712890625, 312.52789306640625, 219.09494018554688, 1361.72265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000473.npy"} +{"epoch": 0.6945668135095447, "step": 474, "batch_size": 64, "mean": 327.47601318359375, "std": 611.2088012695312, "min": -1557.2181396484375, "p10": -259.3656311035156, "median": 316.39991760253906, "p90": 1021.6516967773439, "max": 2072.72119140625, "pos_frac": 0.765625, "sample": [-1089.825927734375, 81.24324798583984, 240.8306884765625, 1034.5848388671875, 789.211669921875, -635.8684692382812, 991.474365234375, 123.43534088134766, 280.3124084472656, 541.7780151367188, 1161.04248046875, 2072.72119140625, 29.425085067749023, 241.91519165039062, 100.79109954833984, 865.113525390625, 264.8580627441406, -9.443962097167969, -430.65093994140625, 368.15234375, 37.36701965332031, 395.2774353027344, -195.30987548828125, 163.60494995117188, 380.57159423828125, 763.4398193359375, 812.86376953125, 174.99383544921875, 575.9154663085938, 353.97894287109375, 697.93896484375, -101.0177001953125, 13.796257019042969, 338.186279296875, 567.6626586914062, 288.789794921875, 655.296142578125, 114.1468505859375, -235.79283142089844, -267.8988037109375, 729.99072265625, -99.83587646484375, 596.4761962890625, 531.0382690429688, 176.3914794921875, 294.6135559082031, 1402.4859619140625, 1083.044921875, -106.37400817871094, -700.2863159179688, -239.45489501953125, 919.4673461914062, 423.8819274902344, 646.2998657226562, 569.6793212890625, 220.41639709472656, 383.34326171875, -856.8641967773438, 734.67431640625, -147.84732055664062, 364.9609375, 1300.825927734375, -1557.2181396484375, 1733.843505859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000474.npy"} +{"epoch": 0.6960352422907489, "step": 475, "batch_size": 64, "mean": 434.46240234375, "std": 461.8392639160156, "min": -388.8052062988281, "p10": -185.18487396240232, "median": 375.6757354736328, "p90": 1028.2608886718756, "max": 1516.798583984375, "pos_frac": 0.796875, "sample": [108.1932373046875, -70.06721496582031, 341.8962707519531, 178.19436645507812, -312.46405029296875, 641.931396484375, 1382.176025390625, 373.15771484375, 1201.184814453125, 1091.9312744140625, 301.8227844238281, 307.3518371582031, 417.922119140625, -1.274688720703125, 633.1920166015625, -388.8052062988281, 682.9686279296875, 58.51618957519531, 808.0448608398438, -193.88003540039062, 782.1061401367188, 872.4382934570312, 772.008544921875, 246.28887939453125, 51.83889389038086, 319.90771484375, 377.01690673828125, 406.32366943359375, 55.6474723815918, 513.855712890625, 280.43463134765625, 528.1634521484375, -96.55079650878906, 276.2572937011719, 818.0231323242188, 865.6014404296875, 451.3049621582031, 752.1555786132812, -206.23878479003906, 1349.6048583984375, 526.1148071289062, -19.55207061767578, 284.7730407714844, -252.8257293701172, 855.6729736328125, 86.42308044433594, 550.6158447265625, -164.8961639404297, 337.88031005859375, 849.131591796875, -148.92465209960938, 847.261474609375, 526.2855224609375, 100.20480346679688, 1495.640625, -219.7935333251953, 320.14849853515625, 1516.798583984375, 708.08740234375, 1190.290771484375, 374.3345642089844, 879.6966552734375, -226.53335571289062, 410.5787353515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000475.npy"} +{"epoch": 0.697503671071953, "step": 476, "batch_size": 64, "mean": 336.776611328125, "std": 480.3819580078125, "min": -563.6574096679688, "p10": -178.10211944580078, "median": 261.4314270019531, "p90": 908.9715576171877, "max": 2213.591064453125, "pos_frac": 0.796875, "sample": [838.9903564453125, 199.1207733154297, -128.90740966796875, 726.640869140625, 261.89239501953125, 509.6947937011719, 1425.2269287109375, 190.70944213867188, 145.28372192382812, -130.98358154296875, 1024.964111328125, 43.88593292236328, 124.62760925292969, 407.2076416015625, 767.36572265625, 651.994384765625, 705.02587890625, 152.79135131835938, 255.38137817382812, 139.37033081054688, 551.1633911132812, 243.69677734375, -36.93733215332031, 1248.319580078125, 550.974853515625, 576.00927734375, -513.9200439453125, -330.15301513671875, 636.9468994140625, 83.2458724975586, 9.05645751953125, 275.2379455566406, -322.3226013183594, -258.8496398925781, 520.1727294921875, -175.06370544433594, -179.404296875, -528.8225708007812, 708.656494140625, 203.45513916015625, 132.22955322265625, 604.9512939453125, 287.8789978027344, -126.56221008300781, 124.83934783935547, 260.970458984375, 693.8964233398438, 1040.32421875, 2213.591064453125, 938.9635009765625, 348.73187255859375, -59.09593200683594, 353.7924499511719, 250.349609375, 107.57017517089844, 106.71975708007812, 501.3670349121094, 471.962890625, 101.78529357910156, 366.6363525390625, 325.739990234375, 1131.4024658203125, 367.5712890625, -563.6574096679688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000476.npy"} +{"epoch": 0.6989720998531571, "step": 477, "batch_size": 64, "mean": 349.0743408203125, "std": 614.7630615234375, "min": -1731.089599609375, "p10": -272.4799835205078, "median": 272.94847106933594, "p90": 996.4054809570313, "max": 2323.5859375, "pos_frac": 0.734375, "sample": [-562.0038452148438, 474.8106689453125, 957.8908081054688, -670.5684204101562, 1595.2630615234375, 249.27415466308594, 248.48641967773438, 939.0074462890625, 841.6089477539062, 1003.68798828125, 70.5741958618164, 7.469457626342773, 256.81982421875, 935.279296875, 1006.4005126953125, -135.24354553222656, 678.4672241210938, 523.2718505859375, 1229.2645263671875, -280.43988037109375, 594.7213745117188, 170.34490966796875, -9.924118041992188, 173.14495849609375, -253.90689086914062, 880.169189453125, 359.4475402832031, 145.56527709960938, -61.139312744140625, -67.80474853515625, -654.4085083007812, 564.1602783203125, -1731.089599609375, -694.73486328125, 1419.08642578125, 135.63961791992188, 275.5606384277344, 216.66162109375, 541.730712890625, 104.02658081054688, 565.05810546875, -2.0883750915527344, 621.108642578125, 979.4129638671875, 383.1446228027344, 412.38507080078125, 281.3359375, 869.0591430664062, 425.4598083496094, 240.63897705078125, 1194.334228515625, -251.53573608398438, -63.246055603027344, 270.3363037109375, -4.8077545166015625, 880.8868408203125, 676.16796875, -143.66818237304688, 35.931488037109375, -386.72088623046875, 171.33248901367188, 695.0338745117188, 2323.5859375, 691.041259765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000477.npy"} +{"epoch": 0.7004405286343612, "step": 478, "batch_size": 64, "mean": 402.06536865234375, "std": 531.4916381835938, "min": -909.4766845703125, "p10": -237.871109008789, "median": 391.9664001464844, "p90": 1230.1108886718753, "max": 1822.8262939453125, "pos_frac": 0.8125, "sample": [-267.90960693359375, 129.3504180908203, 823.2716064453125, 221.9998779296875, 44.917991638183594, 473.133544921875, 309.3858947753906, -418.15301513671875, 543.6575317382812, 7.9093017578125, 393.804443359375, 549.8588256835938, -527.5925903320312, -494.5533447265625, 309.0885314941406, 838.0570678710938, 847.52490234375, 1822.8262939453125, 547.571533203125, 131.93099975585938, -373.799072265625, -167.78128051757812, -422.07135009765625, 159.82943725585938, 488.33367919921875, 1022.4261474609375, 465.218994140625, 547.9259033203125, -56.251251220703125, 184.20106506347656, 1472.2869873046875, 1137.5367431640625, 390.12835693359375, 262.3650207519531, 407.0419921875, 170.18905639648438, 1336.3121337890625, 1408.2191162109375, 694.006591796875, -160.23179626464844, 627.5472412109375, -67.04534149169922, 121.36137390136719, -78.22270965576172, 742.8739624023438, 281.8611145019531, 168.17259216308594, 1269.7855224609375, 207.85752868652344, 423.053466796875, 567.9523315429688, 79.67842864990234, 362.521484375, 857.635009765625, 470.007080078125, 291.856689453125, 1384.6151123046875, 656.62451171875, 1324.5460205078125, -909.4766845703125, 629.5707397460938, 443.8079833984375, 616.2488403320312, 7.3830718994140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000478.npy"} +{"epoch": 0.7019089574155654, "step": 479, "batch_size": 64, "mean": 385.1879577636719, "std": 683.02978515625, "min": -1585.702880859375, "p10": -272.3506195068359, "median": 302.2447204589844, "p90": 1179.8433837890625, "max": 3148.469970703125, "pos_frac": 0.71875, "sample": [41.898536682128906, 60.735252380371094, 1181.1251220703125, 640.8128662109375, 1240.0050048828125, 1215.873291015625, -501.89361572265625, -252.67160034179688, 294.13543701171875, 35.3193359375, 847.7824096679688, -213.31143188476562, -201.2686767578125, 311.73095703125, 851.4384765625, 240.64581298828125, 201.3901824951172, -104.31378173828125, 1114.6434326171875, 454.8898620605469, 252.56265258789062, 482.007080078125, -112.60231018066406, 535.8450317382812, 3148.469970703125, 1292.6087646484375, 686.0008544921875, 737.5625, 718.0064697265625, 553.2302856445312, 1176.8526611328125, 253.53952026367188, 176.12664794921875, 830.7113037109375, -701.536865234375, 1902.22802734375, -25.042816162109375, 706.6262817382812, 454.3531188964844, 138.49497985839844, 421.4971008300781, 1139.5123291015625, 772.0289306640625, 462.07098388671875, -157.00120544433594, -1585.702880859375, 459.56463623046875, -396.5084228515625, -280.78448486328125, -111.49530029296875, 138.44468688964844, -141.17105102539062, -505.78375244140625, -568.6898193359375, 198.62794494628906, -81.7075424194336, 544.0030517578125, -37.859375, 53.0560302734375, 1304.0198974609375, 310.35400390625, 1081.08642578125, 145.29429626464844, 824.161865234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000479.npy"} +{"epoch": 0.7033773861967695, "step": 480, "batch_size": 64, "mean": 302.38421630859375, "std": 581.4031372070312, "min": -854.6018676757812, "p10": -443.15312194824213, "median": 337.36314392089844, "p90": 982.6928649902344, "max": 2243.948974609375, "pos_frac": 0.6875, "sample": [-468.7287902832031, 402.8211669921875, -155.79354858398438, 128.32733154296875, 570.7271728515625, 26.996604919433594, 261.8414611816406, 982.07177734375, 634.1441650390625, 862.9854125976562, 612.7802734375, 1305.000732421875, 555.8751220703125, -371.07757568359375, -601.9192504882812, -530.3399658203125, 802.6204223632812, 822.1863403320312, -148.73764038085938, 367.995849609375, 30.267967224121094, -203.25950622558594, 22.725204467773438, 456.2933349609375, 358.8371276855469, 190.84642028808594, 2243.948974609375, 332.8582763671875, 589.67724609375, -253.05433654785156, 813.447509765625, 982.9590454101562, 949.4605712890625, -240.38414001464844, -528.7647705078125, 341.8680114746094, 449.0115966796875, -49.42308044433594, 508.66632080078125, -854.6018676757812, -836.8958740234375, -644.7738037109375, 616.1947631835938, 1124.9776611328125, 1440.940673828125, -143.19973754882812, 364.1752014160156, 433.15692138671875, -47.18081283569336, 469.5676574707031, -8.982406616210938, -383.4765625, 167.242431640625, 216.00820922851562, 122.6883544921875, 7.949268341064453, 155.5164794921875, -16.57046890258789, 586.8745727539062, 1086.596923828125, 528.148193359375, -194.5505828857422, 1186.25244140625, 920.7728881835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000480.npy"} +{"epoch": 0.7048458149779736, "step": 481, "batch_size": 64, "mean": 464.44964599609375, "std": 578.2244873046875, "min": -933.6080322265625, "p10": -184.3487594604492, "median": 399.38323974609375, "p90": 1047.98486328125, "max": 2273.047607421875, "pos_frac": 0.765625, "sample": [2273.047607421875, -933.6080322265625, 585.9940795898438, 397.01593017578125, -226.4154815673828, -102.10087585449219, -106.33584594726562, 357.1153869628906, 485.59405517578125, 1587.35693359375, 91.31743621826172, -224.47476196289062, 195.1874542236328, -238.1116943359375, 522.7977905273438, 868.231201171875, -180.61624145507812, 697.80908203125, 1437.0994873046875, -2.9187164306640625, 37.32598876953125, 810.5626831054688, 331.7951965332031, 795.5780639648438, 460.4345703125, 199.35430908203125, 213.66854858398438, 1027.6376953125, 869.12060546875, -402.5368957519531, 111.111328125, 787.1478881835938, 401.75054931640625, -259.750244140625, 793.9627685546875, 892.0930786132812, -150.89215087890625, -44.73537063598633, 1056.705078125, 504.75579833984375, -148.24151611328125, 146.76927185058594, 953.355224609375, 895.9677124023438, 85.8248062133789, 131.6470947265625, 1790.5257568359375, 339.2485046386719, 1605.570068359375, 1024.752197265625, 236.13922119140625, -7.9669342041015625, 406.0653991699219, 460.4981384277344, 572.8683471679688, 270.815673828125, 218.51992797851562, 863.6683349609375, 808.5281372070312, 738.086181640625, 1546.6522216796875, -185.9484100341797, 365.75213623046875, 686.6028442382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000481.npy"} +{"epoch": 0.7063142437591777, "step": 482, "batch_size": 64, "mean": 205.19125366210938, "std": 626.0250854492188, "min": -2106.2060546875, "p10": -507.3819427490234, "median": 249.48712158203125, "p90": 869.1270568847658, "max": 1915.220947265625, "pos_frac": 0.703125, "sample": [161.83877563476562, -55.86778259277344, 333.6095275878906, -815.3740234375, 445.38763427734375, 518.8282470703125, -1060.6470947265625, 589.6497192382812, 593.2986450195312, 428.3805847167969, 394.3355712890625, 680.6575317382812, -10.392318725585938, 353.9107666015625, 679.5034790039062, 1179.057373046875, 234.8714599609375, -369.63531494140625, 379.12139892578125, 372.72589111328125, 10.349498748779297, 347.2068176269531, 386.6942138671875, 898.0764770507812, -1050.2305908203125, 355.3441162109375, 81.00237274169922, 891.9121704101562, 264.102783203125, 513.7989501953125, 661.8587036132812, 654.569580078125, 760.0235595703125, 234.74520874023438, -316.5642395019531, 178.04257202148438, 1098.681396484375, -466.5219421386719, 1299.004150390625, -524.8933715820312, 619.2120361328125, -50.82946014404297, -213.73184204101562, 79.4889907836914, 200.4663848876953, 79.52835083007812, -2106.2060546875, 335.50421142578125, 1915.220947265625, -4.503822326660156, 22.00079345703125, -104.72279357910156, 4.1376953125, 465.8355712890625, -1029.900634765625, 320.75970458984375, 142.18853759765625, 1325.2000732421875, -162.9581298828125, 87.76853942871094, -214.8148193359375, -127.42398834228516, 815.9617919921875, -576.4041137695312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000482.npy"} +{"epoch": 0.7077826725403817, "step": 483, "batch_size": 64, "mean": 475.9374084472656, "std": 584.6889038085938, "min": -499.2483215332031, "p10": -233.14519500732422, "median": 391.9900207519531, "p90": 1176.90869140625, "max": 3026.2529296875, "pos_frac": 0.8125, "sample": [-149.2707061767578, 119.63076782226562, 1317.5067138671875, 786.5553588867188, -306.72479248046875, 767.2975463867188, 413.3074951171875, 312.2796325683594, 753.909912109375, 630.6764526367188, 389.35540771484375, 796.4913940429688, 502.517333984375, 248.38038635253906, 275.1817932128906, 335.6748962402344, 983.5366821289062, 957.2939453125, -408.0692138671875, 538.6163330078125, 323.70904541015625, -62.83934783935547, 763.5557861328125, 822.921875, 50.76530456542969, -234.75430297851562, 1327.9736328125, 729.5625, 25.62310791015625, 1024.3876953125, -350.7261657714844, 1351.023681640625, 67.2546615600586, 924.5882568359375, 45.54039001464844, 1185.0513916015625, 272.7676696777344, 855.3275756835938, 195.76937866210938, 1099.052978515625, 782.4778442382812, 685.8578491210938, 478.7056579589844, -355.8399658203125, 112.95587921142578, 394.6246337890625, 469.3875427246094, 284.1922607421875, 1157.9090576171875, -20.004898071289062, 156.2374267578125, -499.2483215332031, 170.60891723632812, 3026.2529296875, -229.39060974121094, 184.85240173339844, 1340.4815673828125, 287.32073974609375, 484.2313232421875, -283.63885498046875, 762.20458984375, 1308.6988525390625, 276.65728759765625, -196.24224853515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000483.npy"} +{"epoch": 0.7092511013215859, "step": 484, "batch_size": 64, "mean": 421.85662841796875, "std": 512.5430297851562, "min": -693.7930908203125, "p10": -117.09284744262695, "median": 366.9254608154297, "p90": 1092.231286621094, "max": 1766.6778564453125, "pos_frac": 0.78125, "sample": [423.31024169921875, -131.73765563964844, 1732.32080078125, 140.1591339111328, -111.96241760253906, -265.882568359375, 599.2843627929688, 122.23021697998047, 1766.6778564453125, 194.66192626953125, 94.77285766601562, 489.0340270996094, 544.5758666992188, 557.8406982421875, 985.3890380859375, 561.9710083007812, 354.2278137207031, 64.62152099609375, -199.62777709960938, -693.7930908203125, 398.2295837402344, -42.1187858581543, 992.5439453125, 965.7744140625, 1593.927734375, 41.6427001953125, 389.15057373046875, 752.6026611328125, 11.119075775146484, -74.62521362304688, -31.93465805053711, -76.78314208984375, 831.7276611328125, 1189.0462646484375, 37.092262268066406, 220.43777465820312, 229.3297119140625, 106.54073333740234, 186.38548278808594, -149.12217712402344, -20.902305603027344, 314.2519836425781, 752.621337890625, 379.62310791015625, 1254.97802734375, 23.222900390625, 696.9157104492188, 1038.2059326171875, 422.1078796386719, -210.62332153320312, -50.46717834472656, 746.9757080078125, 1115.385009765625, 830.3090209960938, 61.92808532714844, 1150.0966796875, 926.973876953125, 432.88531494140625, 442.92266845703125, 127.9742202758789, 839.5999755859375, 56.99281311035156, -119.2916030883789, 987.0978393554688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000484.npy"} +{"epoch": 0.71071953010279, "step": 485, "batch_size": 64, "mean": 398.8216247558594, "std": 565.0696411132812, "min": -932.9967651367188, "p10": -112.24151153564449, "median": 278.9529266357422, "p90": 1065.2291503906251, "max": 2133.734375, "pos_frac": 0.78125, "sample": [667.080078125, 697.73779296875, 10.739456176757812, 260.9463806152344, 398.5987854003906, 113.09574127197266, 1157.6044921875, 176.11386108398438, 434.18487548828125, 463.4560241699219, 186.89398193359375, -30.60779571533203, 1404.8846435546875, 1081.5909423828125, 188.20074462890625, 978.3767700195312, -631.1881713867188, -40.56043243408203, 994.8173828125, 965.60205078125, 961.4091796875, 1199.8822021484375, 81.66204833984375, 471.9967041015625, 2133.734375, 1094.9969482421875, -36.95642852783203, 33.79814910888672, 770.8831176757812, -77.2104721069336, -54.603736877441406, -48.481170654296875, -521.305419921875, 160.3665771484375, 264.9581298828125, 1019.0418090820312, 525.42919921875, 120.64850616455078, 141.09124755859375, 707.5782470703125, -53.12579345703125, 139.44900512695312, 1921.718017578125, 467.4591064453125, -127.25481414794922, 349.1877746582031, 391.4768371582031, 632.807861328125, 292.9477233886719, -594.65185546875, 1027.0516357421875, 1014.474853515625, 57.104068756103516, -268.5751037597656, 50.10676574707031, -186.57777404785156, 856.6331787109375, 226.2178192138672, 352.4122619628906, 198.54568481445312, 20.68738555908203, -932.9967651367188, 881.9030151367188, 381.0964050292969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000485.npy"} +{"epoch": 0.7121879588839941, "step": 486, "batch_size": 64, "mean": 457.0060119628906, "std": 689.4300537109375, "min": -1209.8760986328125, "p10": -435.8781555175781, "median": 384.7865753173828, "p90": 1305.7870239257813, "max": 3059.820068359375, "pos_frac": 0.796875, "sample": [904.0462646484375, 552.7471313476562, 107.09950256347656, 532.3720092773438, 325.58697509765625, -754.0299072265625, 178.45809936523438, 59.048133850097656, 919.4938354492188, -153.77255249023438, 462.6169128417969, 1432.57275390625, 637.5949096679688, -784.3512573242188, -452.6630859375, 1530.6358642578125, 859.9552001953125, 906.5379638671875, -238.6913299560547, 983.166748046875, 1319.6158447265625, -396.71331787109375, 1640.393310546875, 1273.519775390625, 267.6634521484375, -111.15596771240234, 556.6444702148438, 327.4966125488281, 85.60177612304688, 1484.6334228515625, 1223.736083984375, 396.066650390625, -514.1049194335938, -757.7700805664062, 344.105224609375, 339.6468505859375, 135.52496337890625, 1319.9884033203125, 842.6619262695312, 813.6002197265625, 358.4942626953125, 669.238525390625, 498.62725830078125, -62.1673583984375, 563.5013427734375, 903.83349609375, 870.4549560546875, 596.4776000976562, 888.4850463867188, 1225.986083984375, 113.7955551147461, 347.9296875, 101.99124145507812, 373.5065002441406, 600.0582885742188, 140.7569122314453, 28.64533042907715, 584.4877319335938, -1209.8760986328125, 145.63014221191406, 369.3855895996094, -46.78948974609375, -473.465576171875, 3059.820068359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000486.npy"} +{"epoch": 0.7136563876651982, "step": 487, "batch_size": 64, "mean": 339.7569580078125, "std": 534.8098754882812, "min": -1681.393310546875, "p10": -236.00971679687493, "median": 272.87123107910156, "p90": 1042.0217407226564, "max": 1723.0501708984375, "pos_frac": 0.78125, "sample": [251.89833068847656, 1158.4779052734375, 202.86228942871094, -263.1175842285156, 1016.37646484375, 1113.1488037109375, 694.7139892578125, 459.2277526855469, 1632.57421875, -12.983757019042969, 237.64495849609375, -500.5565185546875, 1096.6968994140625, -172.75802612304688, 163.3328857421875, 1723.0501708984375, 276.88067626953125, -320.784423828125, 761.1009521484375, 773.9622192382812, 457.72088623046875, 603.8684692382812, -1681.393310546875, 166.44036865234375, -48.517112731933594, 652.1586303710938, 45.318626403808594, 904.7689208984375, 1264.5419921875, 21.89589500427246, 179.9427947998047, 300.5406494140625, 339.1565246582031, 556.9960327148438, 196.51513671875, -563.243896484375, 232.69552612304688, -507.8086853027344, 549.5036010742188, 268.8617858886719, 396.062255859375, 36.89862060546875, 602.5169677734375, -158.8836212158203, 561.3717651367188, 190.51641845703125, 126.24919128417969, 292.0010681152344, 424.296142578125, 245.664306640625, 100.65470886230469, 531.1882934570312, 260.9610595703125, 457.53631591796875, -24.014389038085938, -270.105224609375, 193.36691284179688, 1053.0125732421875, 710.9877319335938, 699.56103515625, -2.837066650390625, 578.828369140625, -133.07217407226562, 639.9716796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000487.npy"} +{"epoch": 0.7151248164464024, "step": 488, "batch_size": 64, "mean": 356.37518310546875, "std": 449.441650390625, "min": -518.4766845703125, "p10": -110.15949325561522, "median": 357.1648406982422, "p90": 1019.1304382324221, "max": 1701.868408203125, "pos_frac": 0.75, "sample": [-3.9266128540039062, 16.600162506103516, 1431.676025390625, 298.676025390625, 959.7473754882812, 249.32408142089844, 667.123291015625, -100.62916564941406, -130.62820434570312, 311.3323669433594, 418.75628662109375, 177.4365692138672, 594.152099609375, 1166.42919921875, 358.31121826171875, 819.2947387695312, -85.89620971679688, 69.73529815673828, -518.4766845703125, -55.4921875, 556.7947998046875, -114.2439193725586, 522.4039306640625, 367.0755310058594, 66.19847106933594, -268.560546875, 60.54592514038086, 657.5769653320312, 1701.868408203125, 234.20909118652344, 457.03277587890625, 616.5911254882812, -374.5455627441406, 1044.580322265625, 702.767578125, 12.511089324951172, 1168.6912841796875, 456.8663024902344, 356.0184631347656, 87.4302749633789, 583.6784057617188, 820.9638671875, 217.76214599609375, 414.69024658203125, 204.75331115722656, 410.8927307128906, 573.2548217773438, 1088.165283203125, -129.20175170898438, -64.1867446899414, -22.057912826538086, 446.6341552734375, 484.5736389160156, 23.248205184936523, 413.0977783203125, 593.0055541992188, -57.26325988769531, 449.4270324707031, -9.73746109008789, -74.55631256103516, -494.66973876953125, 541.998291015625, 259.3609619140625, 1178.8206787109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000488.npy"} +{"epoch": 0.7165932452276065, "step": 489, "batch_size": 64, "mean": 342.5459289550781, "std": 502.50189208984375, "min": -482.5206298828125, "p10": -155.48104171752925, "median": 190.9325714111328, "p90": 1092.5627075195314, "max": 1906.3333740234375, "pos_frac": 0.71875, "sample": [756.695068359375, 2.9370269775390625, 221.19786071777344, 82.76365661621094, 54.482261657714844, 147.59652709960938, 179.7167205810547, -169.05099487304688, 101.79139709472656, 126.165771484375, -235.67832946777344, 247.57150268554688, 564.756591796875, 826.1371459960938, 406.5967712402344, 124.49491882324219, -172.8668975830078, 207.875732421875, -123.81781768798828, 830.301513671875, -37.14433288574219, -112.37655639648438, 202.14842224121094, 249.95281982421875, -39.75082015991211, 149.03721618652344, 119.18498229980469, -41.759029388427734, 179.11610412597656, 538.209228515625, -92.3350830078125, 49.60432434082031, -10.160097122192383, -205.05157470703125, 1096.337646484375, 683.8850708007812, 57.9334831237793, 105.52006530761719, 333.1805419921875, -482.5206298828125, 652.8222045898438, -355.8901672363281, 1275.4697265625, -433.3541259765625, 1200.5908203125, 1083.7545166015625, 648.854248046875, 1906.3333740234375, 317.042724609375, 617.8529052734375, 1666.7764892578125, 622.8896484375, 537.19580078125, 1374.265869140625, -45.439292907714844, -78.54642486572266, 641.9573974609375, 506.6316223144531, 540.5135498046875, 394.8804931640625, -1.318817138671875, 1400.8240966796875, 581.6915283203125, -55.53619384765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000489.npy"} +{"epoch": 0.7180616740088106, "step": 490, "batch_size": 64, "mean": 373.4256896972656, "std": 644.287109375, "min": -1180.908935546875, "p10": -434.34369812011715, "median": 204.953369140625, "p90": 1231.4268310546877, "max": 1994.563720703125, "pos_frac": 0.796875, "sample": [74.37460327148438, -1057.9771728515625, 1182.133544921875, -175.14207458496094, 148.89666748046875, -24.691856384277344, 148.1727294921875, 899.7031860351562, 732.1903686523438, -571.6705932617188, 120.12786865234375, -60.263153076171875, 1246.521240234375, 95.79855346679688, 582.9515380859375, -458.4750671386719, -210.29714965820312, 1196.20654296875, 678.4187622070312, -643.7344970703125, 39.50016403198242, 143.21505737304688, -775.7786865234375, 102.49305725097656, -1180.908935546875, 1281.4124755859375, 138.65179443359375, 548.5217895507812, 482.4374084472656, 803.0699462890625, 337.39483642578125, 1994.563720703125, 158.42816162109375, 642.4176025390625, 86.87657928466797, -122.93897247314453, 1699.8477783203125, 792.5833129882812, 1585.00048828125, 86.93241882324219, 127.46600341796875, 226.42324829101562, 1269.4368896484375, 635.9185791015625, 141.66783142089844, 752.7505493164062, -378.03717041015625, 838.95458984375, 44.635009765625, 489.90411376953125, 377.964599609375, -516.003662109375, 1726.3824462890625, 21.049072265625, 1104.059814453125, 918.361572265625, 186.28314208984375, 150.07489013671875, 607.841796875, 862.07177734375, 58.31843948364258, 751.1953125, 531.9378051757812, 223.62359619140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000490.npy"} +{"epoch": 0.7195301027900147, "step": 491, "batch_size": 64, "mean": 253.20948791503906, "std": 481.7109375, "min": -792.7244873046875, "p10": -283.0273498535156, "median": 180.76647186279297, "p90": 864.332397460938, "max": 1569.5234375, "pos_frac": 0.71875, "sample": [314.95672607421875, -286.11639404296875, 744.0316772460938, 524.4696044921875, 1451.6749267578125, 186.7823028564453, 286.1106872558594, -114.1260986328125, 458.5574645996094, 83.50128173828125, 288.8348693847656, 91.15071105957031, 147.78903198242188, -492.2171630859375, 568.1517333984375, 113.38606262207031, 561.6845703125, 533.7354125976562, -83.20591735839844, 495.2585754394531, 174.75064086914062, -18.11989402770996, 921.956298828125, -495.2614440917969, 234.13807678222656, 240.3601531982422, 713.9410400390625, -274.5621337890625, 912.281494140625, 13.263116836547852, 1258.1839599609375, 362.5412292480469, -792.7244873046875, 146.502197265625, -210.57801818847656, 276.1216735839844, -10.229377746582031, 1090.4752197265625, 244.35145568847656, 390.36676025390625, 667.81689453125, -582.3617553710938, 1569.5234375, 42.036529541015625, -193.1624298095703, 174.42568969726562, -275.819580078125, 223.1887664794922, -55.48180389404297, 570.7061157226562, 78.91300964355469, -371.55084228515625, 666.0263061523438, -82.34327697753906, 24.0969181060791, 82.95262145996094, 349.4761962890625, 752.451171875, 117.69651794433594, -48.038482666015625, 1354.9771728515625, 146.41180419921875, 452.91162109375, -511.6139221191406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000491.npy"} +{"epoch": 0.7209985315712188, "step": 492, "batch_size": 64, "mean": 255.55410766601562, "std": 566.0716552734375, "min": -943.8262329101562, "p10": -416.1080047607422, "median": 159.55291748046875, "p90": 1094.114978027344, "max": 1394.915283203125, "pos_frac": 0.734375, "sample": [148.875732421875, 313.35052490234375, 554.08251953125, 26.1079044342041, 241.6373291015625, 59.38871765136719, 15.972991943359375, 736.5542602539062, -549.3599853515625, -152.2644500732422, 156.35403442382812, 706.806884765625, 607.1490478515625, -229.0855712890625, -943.8262329101562, 1002.3298950195312, -81.3078384399414, 1238.2706298828125, -199.10617065429688, 1325.4482421875, -428.25689697265625, 1394.915283203125, 349.9432678222656, -340.4170227050781, 353.953857421875, 369.3818359375, 240.76348876953125, -546.3471069335938, 78.58781433105469, 325.3234558105469, 10.25469970703125, 32.56855010986328, 162.75180053710938, -934.52490234375, -130.55709838867188, 1236.1622314453125, 869.9781494140625, 282.1322021484375, -721.1201171875, 148.7752685546875, 1012.353515625, -721.1322021484375, 1216.7330322265625, 22.61626434326172, -148.67703247070312, 798.5437622070312, 214.90284729003906, 82.75534057617188, 1054.741943359375, 51.427669525146484, 496.5521545410156, -161.09605407714844, 241.40203857421875, -387.7605895996094, 1110.9891357421875, 903.9822998046875, 625.4983520507812, 16.413619995117188, 75.32908630371094, 583.128662109375, -268.5258483886719, 1372.6890869140625, 45.72038269042969, 385.2271728515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000492.npy"} +{"epoch": 0.7224669603524229, "step": 493, "batch_size": 64, "mean": 275.22186279296875, "std": 521.0993041992188, "min": -1843.9498291015625, "p10": -209.9158493041992, "median": 189.7177734375, "p90": 1068.7117919921875, "max": 1294.2686767578125, "pos_frac": 0.734375, "sample": [1072.45263671875, 107.20235443115234, 529.5760498046875, 226.32049560546875, -93.43276977539062, 1287.315185546875, 885.6181640625, -18.530426025390625, 125.51619720458984, -394.30108642578125, 441.96075439453125, 99.23030090332031, 1026.65625, 505.70501708984375, 383.85247802734375, 146.23959350585938, -267.7734069824219, 360.8025207519531, 146.54867553710938, 42.4156494140625, 157.51036071777344, 233.66732788085938, 486.77679443359375, 386.70794677734375, -154.61370849609375, 421.51824951171875, 647.8721923828125, -236.91094970703125, -186.93780517578125, -107.4268569946289, 60.57623291015625, -1843.9498291015625, 320.499755859375, 26.292007446289062, 710.32470703125, 27.759302139282227, 1284.2060546875, 1294.2686767578125, 122.12938690185547, -214.8412628173828, -53.796417236328125, 337.97100830078125, 1176.387451171875, 815.33984375, 325.5148620605469, 1059.983154296875, -461.9809875488281, 334.7149353027344, 1256.696533203125, 1096.109375, 95.42903900146484, 607.6162109375, -243.8456268310547, 206.39926147460938, 217.5909423828125, -198.4232177734375, -103.75171661376953, 223.8532257080078, 651.865478515625, 173.03628540039062, 145.75933837890625, 27.3712158203125, -82.82931518554688, -41.61454772949219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000493.npy"} +{"epoch": 0.723935389133627, "step": 494, "batch_size": 64, "mean": 435.43121337890625, "std": 482.89764404296875, "min": -612.517578125, "p10": -109.63961181640619, "median": 408.5062713623047, "p90": 1044.1489074707033, "max": 1791.2457275390625, "pos_frac": 0.828125, "sample": [-2.420145034790039, 344.6048583984375, 597.0040893554688, 395.25494384765625, 1791.2457275390625, 1302.641357421875, -612.517578125, 726.3850708007812, 682.5365600585938, 64.02740478515625, 471.650390625, -28.185993194580078, 825.0672607421875, 319.66033935546875, -291.3826904296875, 1134.071044921875, 709.7108154296875, -220.44830322265625, 457.7825622558594, 818.38623046875, 125.33466339111328, -414.0092468261719, 269.6446838378906, 962.4345092773438, 851.927490234375, 63.531837463378906, 784.3275146484375, 1105.5712890625, 234.27687072753906, 358.2021179199219, -493.7176513671875, 67.45957946777344, 310.6734313964844, -253.67596435546875, 1704.3533935546875, 411.5523986816406, 441.0997314453125, 43.084007263183594, 431.69427490234375, 1200.69189453125, 1001.5917358398438, 492.245849609375, -49.37593078613281, 14.824516296386719, 494.28680419921875, 640.3612060546875, 727.7086181640625, 902.0469970703125, 359.28564453125, 683.2594604492188, -135.46690368652344, 122.87037658691406, 532.03857421875, 155.58847045898438, 138.68930053710938, 496.3116455078125, 848.8765258789062, 677.9755249023438, 226.21197509765625, 405.46014404296875, 1062.3876953125, -17.944416046142578, 346.8535461425781, 51.97998046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000494.npy"} +{"epoch": 0.7254038179148311, "step": 495, "batch_size": 64, "mean": 386.12274169921875, "std": 575.3947143554688, "min": -989.9065551757812, "p10": -207.7904342651367, "median": 307.53712463378906, "p90": 1076.8425781250003, "max": 2372.215087890625, "pos_frac": 0.78125, "sample": [359.6099853515625, 59.73030090332031, -76.29901123046875, -83.266357421875, 164.5794677734375, 418.662109375, -526.5121459960938, 216.568603515625, 268.3909912109375, 30.97674560546875, 162.9198455810547, 718.4224853515625, 906.3782348632812, -550.9490966796875, 380.69500732421875, 427.3235778808594, -278.0968933105469, 824.0001831054688, 80.60101318359375, 682.118896484375, 158.5061492919922, -31.368560791015625, 268.6762390136719, 82.410400390625, 292.5986022949219, 986.529052734375, 945.802490234375, 706.6759643554688, 266.5223693847656, 570.9691162109375, 270.19525146484375, 328.52337646484375, 145.89596557617188, 448.36602783203125, 1174.548095703125, 1101.02978515625, 776.2803955078125, -214.81617736816406, 2372.215087890625, 1353.271484375, 399.11590576171875, -40.85844421386719, 135.67758178710938, 651.9705200195312, 367.18365478515625, -607.8460693359375, 18.459457397460938, 408.8873291015625, 56.98387908935547, -324.6047058105469, -191.39703369140625, -8.481292724609375, 372.72625732421875, 614.4326171875, 1396.203857421875, 647.4580078125, 290.3164978027344, 844.3603515625, -989.9065551757812, 1235.2698974609375, 1020.40576171875, 322.47564697265625, 1985.79248046875, -81.45406341552734], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000495.npy"} +{"epoch": 0.7268722466960352, "step": 496, "batch_size": 64, "mean": 203.84725952148438, "std": 449.3397216796875, "min": -1006.80224609375, "p10": -377.0470611572265, "median": 157.57869720458984, "p90": 849.9583923339844, "max": 1185.262451171875, "pos_frac": 0.765625, "sample": [104.01425170898438, -476.571533203125, 938.9752197265625, 932.2733154296875, -709.8440551757812, 699.7429809570312, 148.11972045898438, 273.28497314453125, 853.6818237304688, 160.99624633789062, 1185.262451171875, 301.89453125, 428.6710510253906, 249.03912353515625, -262.8641357421875, -8.045589447021484, -215.9202880859375, 434.31256103515625, -761.9505004882812, 1114.949462890625, 805.603759765625, 320.3059387207031, 841.2703857421875, 442.18865966796875, 896.1508178710938, 242.5457763671875, -112.40281677246094, -22.533767700195312, 75.95231628417969, -412.3987121582031, 207.8211669921875, 208.06016540527344, 137.58334350585938, 438.6634216308594, 94.92028045654297, -103.31022644042969, 449.80859375, 128.31771850585938, -626.1231079101562, 261.16180419921875, -433.548095703125, -294.55987548828125, 697.2352905273438, 24.630470275878906, 154.16114807128906, 526.2577514648438, 120.32783508300781, 435.2594909667969, 571.9788818359375, 478.1153259277344, 90.75503540039062, -175.7627410888672, 412.37469482421875, 37.46788787841797, 166.27639770507812, -1006.80224609375, 57.29570388793945, 971.9671630859375, 31.269676208496094, 0.9439468383789062, 383.1067199707031, 33.80189895629883, 46.06853485107422, 53.99700927734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000496.npy"} +{"epoch": 0.7283406754772394, "step": 497, "batch_size": 64, "mean": 207.31849670410156, "std": 483.5421142578125, "min": -937.5123901367188, "p10": -335.73664855957026, "median": 177.72314453125, "p90": 912.5368469238285, "max": 1389.3687744140625, "pos_frac": 0.671875, "sample": [-360.7436828613281, 955.7180786132812, 651.4623413085938, 680.3873901367188, 10.170886993408203, -277.38690185546875, 489.178466796875, -186.85206604003906, -113.20576477050781, -243.9868927001953, 232.0671844482422, 1035.7484130859375, 1189.203857421875, 400.98223876953125, 47.347923278808594, 208.5892333984375, 395.1716003417969, 214.3456268310547, 696.7548828125, 456.1424255371094, -102.14024353027344, -57.82329559326172, 598.8746337890625, 965.675048828125, -38.71366882324219, 10.874876022338867, 820.760498046875, -122.24586486816406, 76.65025329589844, -669.4885864257812, 466.0606689453125, 89.11927795410156, 7.688434600830078, 259.004150390625, 58.56314468383789, 1312.0255126953125, -32.05035400390625, 1389.3687744140625, 43.18638610839844, 77.53799438476562, 165.68118286132812, 293.7213439941406, 338.37274169921875, 668.8145751953125, -937.5123901367188, 189.76510620117188, 593.3384399414062, -441.1308898925781, 330.7384948730469, 123.58282470703125, 350.1750793457031, 269.28619384765625, 951.8695678710938, -272.9680480957031, -112.27037811279297, 237.75128173828125, -156.74241638183594, -389.5157470703125, -97.61665344238281, -871.87353515625, 686.616943359375, -508.54339599609375, 339.6767883300781, -116.85581970214844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000497.npy"} +{"epoch": 0.7298091042584435, "step": 498, "batch_size": 64, "mean": 347.5252685546875, "std": 589.2314453125, "min": -1304.3675537109375, "p10": -288.0455276489257, "median": 372.3294372558594, "p90": 1039.7615112304688, "max": 1937.865234375, "pos_frac": 0.71875, "sample": [-812.986572265625, 208.77822875976562, 665.6172485351562, 213.30618286132812, -172.34097290039062, 692.6446533203125, 652.2651977539062, 1052.4422607421875, 405.3403625488281, 743.8577880859375, 511.5296630859375, 800.0056762695312, 268.9856872558594, 918.1334838867188, -1304.3675537109375, 685.6394653320312, 986.9108276367188, 1379.925048828125, -84.30083465576172, 566.0845336914062, 116.62194061279297, 678.81884765625, -126.67291259765625, 1010.173095703125, -233.11111450195312, 374.1026916503906, 393.44775390625, 65.70087432861328, -400.39874267578125, -31.94109344482422, 290.4936218261719, 370.5561828613281, -1066.1405029296875, 1368.5933837890625, 502.1346740722656, 58.2525634765625, -154.28582763671875, 462.2896728515625, 601.5906982421875, 485.7255859375, 1286.818603515625, -26.98442840576172, 48.24566650390625, 13.713085174560547, 555.6968383789062, 580.462890625, -378.2107849121094, -4.155517578125, 1343.822998046875, 940.6829223632812, 252.2407684326172, -310.425048828125, 331.8025817871094, -196.84735107421875, -235.82664489746094, 16.824737548828125, 481.34832763671875, 158.337646484375, 935.7325439453125, 617.478759765625, -339.4103088378906, 1242.3460693359375, 1937.865234375, -153.36474609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000498.npy"} +{"epoch": 0.7312775330396476, "step": 499, "batch_size": 64, "mean": 423.11358642578125, "std": 572.0934448242188, "min": -1022.9998168945312, "p10": -182.92580261230466, "median": 451.3472900390625, "p90": 1073.7696777343754, "max": 2244.95849609375, "pos_frac": 0.765625, "sample": [1105.730224609375, 95.56343078613281, 199.98355102539062, -1022.9998168945312, -77.89507293701172, 480.1156921386719, 559.9456787109375, 648.466552734375, -55.94495391845703, -91.91421508789062, 508.6311950683594, 529.4198608398438, 106.69152069091797, 999.195068359375, 555.5410766601562, 174.19064331054688, 720.471435546875, -658.2885131835938, 2244.95849609375, 288.315185546875, 223.30349731445312, 980.4031372070312, 697.144775390625, 27.813621520996094, -646.5299682617188, 423.2699279785156, 281.60260009765625, 986.096435546875, -244.9671173095703, 1313.3453369140625, 253.87789916992188, 15.67724609375, 556.2913208007812, -124.16251373291016, 295.8830871582031, -23.514150619506836, 583.6326293945312, 934.636474609375, 938.1625366210938, -231.4430389404297, 35.17366027832031, 665.3421020507812, 661.48828125, 605.6458129882812, 661.354248046875, 156.08612060546875, 703.0739135742188, 1721.34130859375, 1379.0164794921875, 108.63089752197266, 454.0311584472656, 770.5777587890625, 448.6634216308594, 821.9290771484375, -75.78280639648438, 813.987060546875, 1318.4217529296875, 347.7518615722656, -554.414794921875, -157.6339874267578, -193.76515197753906, 605.9395751953125, -17.72184181213379, 1249.433837890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000499.npy"} +{"epoch": 0.7327459618208517, "step": 500, "batch_size": 64, "mean": 352.2098388671875, "std": 575.6246337890625, "min": -1175.8612060546875, "p10": -440.3055755615233, "median": 285.41009521484375, "p90": 952.8133300781251, "max": 1780.4996337890625, "pos_frac": 0.796875, "sample": [64.88278198242188, -216.2469940185547, -673.5013427734375, 1780.4996337890625, -595.2086791992188, 545.371826171875, 967.0838012695312, 510.0058288574219, 841.5845336914062, 121.08458709716797, 811.2130126953125, 726.62451171875, 766.387939453125, -490.6368103027344, 5.534696578979492, 520.6529541015625, 37.39678955078125, 69.986572265625, 303.64129638671875, 1399.922607421875, 643.5704345703125, -62.72428894042969, 745.963134765625, 1412.294677734375, -10.091224670410156, 765.9459228515625, -490.49078369140625, 874.9064331054688, 919.5155639648438, -766.3357543945312, 857.6279296875, 816.27294921875, -82.64387512207031, 157.23143005371094, 52.93500518798828, 831.41552734375, -674.5201416015625, 128.83090209960938, 1128.509765625, -1175.8612060546875, -182.0343780517578, 50.2449836730957, 186.35595703125, 692.016357421875, 91.8411636352539, 21.6290225982666, 631.001220703125, 82.16268157958984, 128.294921875, 267.17889404296875, 1226.308837890625, 796.0176391601562, 833.4451904296875, 180.0465087890625, 609.5564575195312, 491.6852111816406, 708.1845703125, 306.4617919921875, 25.95210838317871, 690.3529052734375, 70.83454895019531, 1256.47705078125, -323.2067565917969, 131.9900665283203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000500.npy"} +{"epoch": 0.7342143906020558, "step": 501, "batch_size": 64, "mean": 373.15838623046875, "std": 592.7010498046875, "min": -931.5608520507812, "p10": -328.3506561279296, "median": 336.7568054199219, "p90": 1220.4635498046878, "max": 1953.343994140625, "pos_frac": 0.75, "sample": [-133.11355590820312, 384.7838134765625, 299.65570068359375, -413.6865539550781, -25.15158462524414, 1405.93212890625, 237.04466247558594, 943.142578125, 256.8671875, 102.66952514648438, 197.03536987304688, -179.57630920410156, 268.46453857421875, 125.40824890136719, 1953.343994140625, 744.547607421875, 1018.41796875, 407.114013671875, 447.0067443847656, 955.1304321289062, 1245.474365234375, -107.61015319824219, 295.96795654296875, 198.38323974609375, 893.1058349609375, -492.5455627441406, -642.4938354492188, 441.05072021484375, 1054.028564453125, 554.6538696289062, -825.6250610351562, 228.88250732421875, 547.8641967773438, 366.4879150390625, 421.088134765625, 1395.1737060546875, -257.71002197265625, 507.47235107421875, 359.0526428222656, 942.8218383789062, 1252.6531982421875, 12.677970886230469, -180.7509002685547, -355.8323059082031, 696.2996215820312, -18.50103759765625, 73.76878356933594, 533.79541015625, 68.59292602539062, -931.5608520507812, 314.4609680175781, 548.2860107421875, -264.226806640625, -100.57272338867188, 63.889530181884766, -481.983642578125, 1162.10498046875, 742.0913696289062, 1304.2255859375, 488.37841796875, 1813.69677734375, 556.760986328125, 449.197021484375, 14.125553131103516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000501.npy"} +{"epoch": 0.73568281938326, "step": 502, "batch_size": 64, "mean": 381.4633483886719, "std": 482.5098571777344, "min": -565.3206176757812, "p10": -77.87691879272457, "median": 312.5458221435547, "p90": 1015.9113464355471, "max": 2566.7119140625, "pos_frac": 0.859375, "sample": [613.032470703125, -327.13092041015625, 973.4894409179688, 393.9052429199219, 43.8115234375, 709.4496459960938, -141.0347900390625, 9.752685546875, 1302.7454833984375, 365.399658203125, 387.021728515625, 268.8642272949219, 299.7012939453125, 1074.155029296875, 945.6868286132812, 20.2947998046875, 37.58111572265625, 309.6842346191406, -565.3206176757812, 70.71112060546875, 692.9794311523438, 1034.0921630859375, 97.16936492919922, 424.3417053222656, 35.22148132324219, 199.41880798339844, 73.61921691894531, 339.0947570800781, 465.530517578125, 317.3236083984375, -238.76707458496094, 1111.02197265625, 20.75444793701172, 550.3917236328125, 441.6796875, 878.4682006835938, -93.82667541503906, 2566.7119140625, 14.171920776367188, 1.4876575469970703, 786.2529907226562, 36.50205993652344, 277.78857421875, 308.5794677734375, 399.1468505859375, 569.1595458984375, 1161.58740234375, 38.53296661376953, 645.677978515625, 451.23114013671875, 787.4542236328125, -3.0455474853515625, -40.66082000732422, 115.06951904296875, 1084.8887939453125, -133.53993225097656, 315.40740966796875, 83.93521118164062, 725.280517578125, 302.0609130859375, -132.71963500976562, 362.9072265625, 195.85511779785156, 353.6180419921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000502.npy"} +{"epoch": 0.737151248164464, "step": 503, "batch_size": 64, "mean": 343.6672668457031, "std": 466.2494201660156, "min": -353.9173583984375, "p10": -139.0587936401367, "median": 243.3192367553711, "p90": 830.9392395019531, "max": 2142.075439453125, "pos_frac": 0.75, "sample": [1.6187248229980469, -66.31269073486328, 1164.48291015625, 11.065963745117188, -151.40736389160156, -197.57196044921875, 455.18878173828125, 230.02406311035156, 85.93689727783203, -66.37801361083984, -353.9173583984375, 347.85101318359375, -182.81405639648438, 268.40313720703125, 996.9057006835938, 818.7616577148438, 337.6207275390625, 804.8275146484375, 73.79547119140625, 780.996337890625, -33.51490783691406, -42.47966384887695, 80.61625671386719, 33.50708770751953, 382.81060791015625, -129.73497009277344, -105.44097137451172, 530.8133544921875, 198.38601684570312, 134.13287353515625, 520.1304931640625, -63.4295654296875, 113.8502197265625, 679.8550415039062, 23.862091064453125, -236.61761474609375, 2142.075439453125, 1773.8031005859375, 548.1015625, 378.4029541015625, 105.24386596679688, 691.01708984375, 756.6033935546875, 26.293142318725586, -77.78179931640625, 658.743408203125, 150.33201599121094, 816.9479370117188, 242.00924682617188, 385.14093017578125, 174.34719848632812, 633.1572265625, 447.374267578125, 244.6292266845703, 1056.08642578125, 541.8155517578125, -143.05471801757812, 764.585693359375, -200.77706909179688, 836.158203125, 287.667236328125, 462.91241455078125, 857.2793579101562, -10.231781005859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000503.npy"} +{"epoch": 0.7386196769456681, "step": 504, "batch_size": 64, "mean": 414.3384094238281, "std": 563.3538208007812, "min": -676.8782958984375, "p10": -331.8172622680663, "median": 343.8470916748047, "p90": 1116.7745727539063, "max": 1789.207763671875, "pos_frac": 0.78125, "sample": [-15.073883056640625, 138.43646240234375, 897.8919067382812, 414.3507080078125, 133.6667022705078, 335.1185607910156, 657.9771118164062, -237.42303466796875, 1683.882080078125, 1365.2425537109375, 1789.207763671875, -594.3264770507812, 605.8585205078125, 751.095703125, -47.09532165527344, -94.00837707519531, 673.5189208984375, 211.28311157226562, -6.016731262207031, 52.175445556640625, 26.74590301513672, 1.1628036499023438, 352.57562255859375, -246.5740509033203, 239.1813507080078, 246.0415802001953, -523.6599731445312, 530.3082885742188, 181.88540649414062, 425.87506103515625, -402.6255187988281, 1095.28076171875, -368.3500671386719, 1347.58154296875, 812.72900390625, 670.2168579101562, 785.2550048828125, 579.2098999023438, 261.6435241699219, 325.9727478027344, 83.75640869140625, 1268.3458251953125, 874.2311401367188, 507.1394348144531, 918.6211547851562, -155.44845581054688, -542.1490478515625, 866.7579956054688, 53.46659851074219, 947.549072265625, 206.35946655273438, 717.6491088867188, 1125.9862060546875, 732.572021484375, 308.01409912109375, 285.17315673828125, -382.11212158203125, 991.7693481445312, 1677.90576171875, 362.4380798339844, -676.8782958984375, 561.72314453125, 722.4896240234375, 6.0806884765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000504.npy"} +{"epoch": 0.7400881057268722, "step": 505, "batch_size": 64, "mean": 219.666259765625, "std": 460.5285949707031, "min": -777.8680419921875, "p10": -284.54921722412104, "median": 166.59014129638672, "p90": 907.0903930664062, "max": 1370.2735595703125, "pos_frac": 0.6875, "sample": [14.621269226074219, 514.8759765625, -307.57763671875, 636.1805419921875, -48.64283752441406, 925.4527587890625, 56.30189514160156, -36.81492614746094, 46.893550872802734, 24.455215454101562, 777.5480346679688, 904.1351318359375, 1370.2735595703125, -16.1722412109375, 575.0384521484375, 472.73565673828125, -162.01438903808594, 391.45245361328125, 136.17535400390625, -230.8162384033203, 346.7716369628906, 579.7442626953125, 184.885498046875, 83.74298095703125, -539.0060424804688, -688.6915893554688, 196.04017639160156, 279.7239074707031, 1056.360107421875, 166.53936767578125, -86.14649963378906, 434.1910400390625, 400.3531494140625, -228.91000366210938, 238.21194458007812, 720.1148681640625, 51.901161193847656, 712.6011962890625, -101.8447494506836, 555.4584350585938, -732.9978637695312, -777.8680419921875, 162.9603729248047, 185.6011505126953, 14.4169921875, 57.80565643310547, 967.0282592773438, 479.4881896972656, -225.08033752441406, 963.313232421875, 166.6409149169922, -190.2027587890625, -359.87481689453125, 892.2620849609375, 195.2247772216797, -79.55601501464844, 908.55322265625, -489.1580810546875, -126.098876953125, 46.23158264160156, 908.35693359375, 228.160400390625, 493.5923156738281, -36.30088424682617], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000505.npy"} +{"epoch": 0.7415565345080763, "step": 506, "batch_size": 64, "mean": 368.9680480957031, "std": 519.1372680664062, "min": -799.6482543945312, "p10": -187.8771102905273, "median": 332.9208068847656, "p90": 1077.28134765625, "max": 2015.00537109375, "pos_frac": 0.75, "sample": [-443.43157958984375, 38.632843017578125, 738.8993530273438, -4.9699249267578125, 234.19747924804688, 325.958740234375, -201.07022094726562, 567.5352172851562, 822.2042846679688, 48.216365814208984, 519.2648315429688, 261.9244384765625, 289.06744384765625, 153.30751037597656, -733.001708984375, 1085.621337890625, 477.39349365234375, -2.7162628173828125, 339.88287353515625, 699.0321655273438, 499.3658447265625, 1370.5225830078125, 344.74468994140625, 113.34691619873047, 2015.00537109375, -154.41770935058594, -279.47564697265625, 604.2655029296875, 728.2905883789062, 261.5677490234375, 436.442626953125, 1230.2445068359375, 424.6042175292969, 104.351318359375, 586.5010986328125, -631.8640747070312, 351.61322021484375, 161.97509765625, 889.727783203125, 905.82421875, 463.40850830078125, 715.9257202148438, -157.0931854248047, 460.98870849609375, 96.14737701416016, -799.6482543945312, 267.6056213378906, 3.4696273803710938, 1110.3482666015625, 1031.3629150390625, 1095.4593505859375, -4.150108337402344, 324.7742919921875, 807.8313598632812, 732.0243530273438, 1074.840576171875, -38.83802795410156, 1078.327392578125, 191.25868225097656, -40.401222229003906, -323.82086181640625, -94.90713500976562, 529.8372802734375, -89.3821792602539], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000506.npy"} +{"epoch": 0.7430249632892805, "step": 507, "batch_size": 64, "mean": 315.6078186035156, "std": 588.3258056640625, "min": -945.0108642578125, "p10": -350.09112854003905, "median": 280.380126953125, "p90": 881.5725952148439, "max": 2295.4765625, "pos_frac": 0.734375, "sample": [6.855581283569336, 408.79022216796875, -34.27613830566406, -945.0108642578125, 1238.9844970703125, -111.560302734375, 62.45805358886719, 416.4222717285156, 35.772804260253906, 227.12579345703125, -54.31129455566406, 429.2716979980469, 52.58229064941406, 471.18963623046875, 649.120361328125, 758.6508178710938, -452.25128173828125, 298.435302734375, 185.4140167236328, 2073.233154296875, 473.7509765625, 639.3067016601562, 68.06256103515625, -196.14633178710938, 506.93231201171875, 321.1837158203125, 647.2958984375, 892.5921020507812, -387.892333984375, 412.1116943359375, 269.20159912109375, -62.759857177734375, -334.05609130859375, -102.43443298339844, 17.855504989624023, 574.185302734375, 620.2283935546875, 1239.670166015625, 647.7984008789062, 68.27410125732422, 283.73016357421875, 20.910751342773438, -42.6002197265625, 364.1888732910156, 2295.4765625, 855.8604125976562, 163.40777587890625, 342.97491455078125, -423.4491882324219, 715.8385620117188, 780.86083984375, -587.8284912109375, -34.66767883300781, 717.2364501953125, 1816.8685302734375, 640.3504028320312, 905.6522216796875, 438.28533935546875, 277.03009033203125, 4.302953720092773, 30.59545135498047, -195.41781616210938, -356.9632873535156, -845.7997436523438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000507.npy"} +{"epoch": 0.7444933920704846, "step": 508, "batch_size": 64, "mean": 269.07879638671875, "std": 625.8920288085938, "min": -1553.1361083984375, "p10": -388.6345001220703, "median": 218.46559143066406, "p90": 974.528210449219, "max": 2132.926513671875, "pos_frac": 0.6875, "sample": [168.47314453125, 164.32708740234375, -663.5458374023438, 681.5845947265625, 537.9393310546875, -12.9498291015625, 128.94607543945312, 1950.65966796875, 395.8139343261719, 744.2454223632812, 456.5740661621094, 1001.5711669921875, 319.1713562011719, 2132.926513671875, 572.3746337890625, 320.75323486328125, 195.26565551757812, -710.38916015625, 1213.1243896484375, 911.427978515625, -524.0341796875, -200.95323181152344, 327.81988525390625, -1104.4716796875, 1025.3231201171875, -200.47225952148438, 125.78607177734375, -241.15505981445312, -110.28202819824219, 346.767578125, -75.57496643066406, 346.6922607421875, 4.7346343994140625, -30.14990997314453, 1752.688232421875, -1553.1361083984375, 115.32238006591797, 132.8158416748047, 187.29217529296875, 791.51416015625, 336.2900085449219, 106.00910186767578, 109.65338134765625, 462.28204345703125, 648.9807739257812, 241.66552734375, 530.21142578125, 557.855224609375, 758.3477172851562, 558.0366821289062, 622.1187133789062, -309.8487243652344, -283.6678466796875, 1085.11962890625, 579.0191650390625, -248.75439453125, -115.1904296875, -47.192138671875, -416.28582763671875, 676.8687133789062, -349.0843505859375, 384.63665771484375, 114.7363052368164, -405.5845642089844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000508.npy"} +{"epoch": 0.7459618208516887, "step": 509, "batch_size": 64, "mean": 298.84124755859375, "std": 463.45233154296875, "min": -608.7313842773438, "p10": -303.9815216064453, "median": 244.83094024658203, "p90": 974.8138122558595, "max": 1632.935546875, "pos_frac": 0.78125, "sample": [-17.99555015563965, -74.83901977539062, -608.7313842773438, 981.2359619140625, -120.0833740234375, 528.0187377929688, 61.46332550048828, 88.45423126220703, 507.97845458984375, 320.7329406738281, 680.9926147460938, 133.61419677734375, 296.58526611328125, 102.29568481445312, 168.83949279785156, 253.6924285888672, 950.3187866210938, -476.09039306640625, -467.016357421875, 402.1681823730469, 1114.902587890625, 452.9515686035156, 148.6292266845703, 259.24920654296875, 394.579345703125, 722.8699340820312, 164.707275390625, 74.13216400146484, 332.8334045410156, 429.6949768066406, 646.7203979492188, -411.6483459472656, 133.82032775878906, -147.4463653564453, 372.1382751464844, 994.3099975585938, 711.5233764648438, 113.17292785644531, 217.33731079101562, -285.10546875, -2.78271484375, 505.04443359375, 332.61383056640625, 89.551513671875, 47.37205505371094, -580.055908203125, 1191.50390625, 1288.8541259765625, -120.55519104003906, 1632.935546875, -321.6329345703125, 182.58807373046875, -312.0712585449219, 48.164093017578125, 257.5480651855469, 365.08697509765625, 122.44896697998047, 219.67202758789062, 235.96945190429688, 503.88385009765625, 731.6160278320312, 1194.4075927734375, 402.841064453125, 959.8287963867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000509.npy"} +{"epoch": 0.7474302496328928, "step": 510, "batch_size": 64, "mean": 386.30255126953125, "std": 428.61993408203125, "min": -518.7579345703125, "p10": -163.43018188476555, "median": 380.16473388671875, "p90": 880.5156494140626, "max": 1440.7720947265625, "pos_frac": 0.78125, "sample": [252.22921752929688, 891.9432983398438, 1246.6268310546875, 435.53900146484375, -191.43209838867188, 811.4847412109375, 759.5302734375, 359.69781494140625, 33.32633972167969, 691.9409790039062, 407.5993957519531, 537.9701538085938, -299.2051086425781, 509.395263671875, 1116.7001953125, 838.7178344726562, 686.03515625, -518.7579345703125, -64.53227996826172, 853.8511352539062, 291.9169006347656, 358.7679443359375, 418.03924560546875, 210.96585083007812, 264.1983947753906, 665.0823974609375, 664.8672485351562, 893.8244018554688, 1051.6395263671875, -283.9373779296875, 818.4248046875, 1440.7720947265625, -98.09237670898438, 109.08721160888672, 740.0092163085938, 49.397743225097656, 281.4331970214844, 1153.8076171875, -300.395263671875, 298.70831298828125, 242.53321838378906, 83.62198638916016, 662.3623046875, 237.9298553466797, 555.2415161132812, -48.811073303222656, 461.9690856933594, 426.608642578125, 64.62089538574219, 400.63165283203125, 758.9124755859375, -85.6004867553711, 95.6243896484375, 769.9480590820312, -82.19099426269531, 533.276611328125, -325.9411926269531, 827.7694091796875, -45.54906463623047, -218.171875, -12.879079818725586, 694.4056396484375, 285.3278503417969, 54.545223236083984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000510.npy"} +{"epoch": 0.748898678414097, "step": 511, "batch_size": 64, "mean": 310.4611511230469, "std": 569.7615356445312, "min": -1479.0628662109375, "p10": -161.33842773437493, "median": 217.49087524414062, "p90": 931.8967468261719, "max": 1756.6630859375, "pos_frac": 0.765625, "sample": [-29.79546356201172, 1252.52880859375, 170.36734008789062, 149.8632354736328, 427.68487548828125, 135.45018005371094, 931.3986206054688, 1388.54736328125, 507.56597900390625, 145.65077209472656, 98.34619903564453, 739.9437255859375, 689.7570190429688, 632.261962890625, 83.72035217285156, 1237.231201171875, -1479.0628662109375, 396.28350830078125, -431.5257568359375, 676.1590576171875, 1404.7977294921875, 805.5966186523438, 80.97364807128906, 482.810302734375, 246.22735595703125, 50.44818878173828, -728.3885498046875, 406.55987548828125, -85.85562133789062, 589.6729736328125, 489.91192626953125, 2.1049346923828125, -20.819833755493164, 265.60308837890625, 293.1084899902344, 117.59722137451172, 932.1102294921875, -193.68820190429688, -4.53155517578125, 211.9671630859375, 846.1094970703125, 215.97280883789062, 42.24800491333008, -347.3258361816406, -56.219810485839844, 722.307373046875, 780.7615966796875, 90.57975769042969, -8.462377548217773, 52.76429748535156, -194.33285522460938, 454.6695861816406, 543.21142578125, -1.817117691040039, 219.00894165039062, 434.93206787109375, 222.44320678710938, -11.567802429199219, 1756.6630859375, 93.02668762207031, 448.8449401855469, -1322.427490234375, 137.01541137695312, 1682.525634765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000511.npy"} +{"epoch": 0.750367107195301, "step": 512, "batch_size": 64, "mean": 417.19671630859375, "std": 434.2760925292969, "min": -428.68487548828125, "p10": -91.3902809143066, "median": 372.56101989746094, "p90": 1065.6143310546877, "max": 1751.6025390625, "pos_frac": 0.828125, "sample": [1101.438720703125, 919.274169921875, 726.2068481445312, -268.3819580078125, -110.09019470214844, 18.929290771484375, 1751.6025390625, 273.5244140625, 65.26382446289062, -47.75714874267578, 157.4780731201172, 667.1636962890625, 3.84136962890625, 428.59283447265625, -299.8970031738281, -17.99252700805664, 551.9671630859375, 521.8958740234375, 359.62921142578125, -14.046808242797852, -191.17733764648438, 713.9440307617188, 118.7373046875, 553.4229125976562, 1195.4576416015625, 733.3294067382812, 255.73045349121094, 465.94293212890625, 15.692375183105469, 713.2972412109375, 325.6043701171875, 342.7166748046875, 491.76678466796875, 542.7932739257812, 236.01014709472656, 190.5549774169922, 1105.49658203125, 385.4928283691406, -158.68634033203125, 876.30517578125, 113.37442016601562, 1143.415283203125, 420.3273010253906, 1088.68408203125, 35.43785095214844, -428.68487548828125, 358.86279296875, 35.56938552856445, 250.29148864746094, 391.19818115234375, 1118.9813232421875, -11.096214294433594, 318.3825378417969, 709.535400390625, 309.9640808105469, 499.3794250488281, 745.2223510742188, 176.52984619140625, 431.692138671875, 775.7238159179688, -195.7685089111328, 1011.784912109375, 863.9414672851562, 836.767333984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000512.npy"} +{"epoch": 0.7518355359765051, "step": 513, "batch_size": 64, "mean": 297.572509765625, "std": 473.8290100097656, "min": -663.374755859375, "p10": -207.22743682861326, "median": 249.83712768554688, "p90": 1042.0314819335938, "max": 1438.91845703125, "pos_frac": 0.71875, "sample": [685.76513671875, 347.9437255859375, 326.92498779296875, 125.73249053955078, 304.7579650878906, 281.8180236816406, 291.62371826171875, 194.66697692871094, -179.38597106933594, 1005.4346923828125, 589.8258666992188, -166.8865966796875, 653.7241821289062, 65.62059783935547, 62.957786560058594, 763.0537719726562, 32.554779052734375, -23.3345947265625, 208.1288604736328, 281.84893798828125, 899.7205810546875, 1127.6591796875, 1276.0843505859375, 211.494384765625, 600.581787109375, 2.8472023010253906, 459.107177734375, 226.31671142578125, 332.5108642578125, -195.78387451171875, 94.74591064453125, 839.0888671875, 551.5897216796875, 423.51629638671875, 313.03369140625, -53.4364013671875, -134.2344207763672, -612.3705444335938, 273.3575439453125, 1438.91845703125, -40.07099914550781, 138.3392333984375, 67.038330078125, -663.374755859375, 617.4157104492188, -22.21380615234375, -397.9588623046875, -56.62737274169922, 1066.749755859375, 834.8319091796875, -539.8618774414062, -1.9435539245605469, 1057.7158203125, 1167.5294189453125, 323.43359375, 173.49014282226562, -364.43170166015625, 640.421875, -108.64237213134766, -212.13182067871094, -317.71746826171875, 1223.96826171875, 185.6748046875, 345.482666015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000513.npy"} +{"epoch": 0.7533039647577092, "step": 514, "batch_size": 64, "mean": 295.4927673339844, "std": 490.9858093261719, "min": -797.73486328125, "p10": -184.60428161621093, "median": 199.8984375, "p90": 913.1573242187501, "max": 1847.56689453125, "pos_frac": 0.703125, "sample": [-424.07354736328125, -145.47537231445312, 116.85090637207031, -71.56608581542969, 1080.596923828125, 223.81265258789062, 625.4901123046875, 590.5390014648438, 1847.56689453125, 917.1830444335938, 147.60142517089844, 624.6970825195312, -124.48218536376953, 33.80743408203125, 319.90936279296875, -108.64097595214844, -261.4247741699219, 188.39993286132812, 1167.7342529296875, 1125.773193359375, -149.39395141601562, -80.99250030517578, 339.5162048339844, -101.6094970703125, 34.709205627441406, 298.7516784667969, 284.62164306640625, 119.81625366210938, 454.29998779296875, -110.09009552001953, 892.0993041992188, 543.9781494140625, -193.867431640625, -42.93804931640625, 145.47177124023438, 283.60064697265625, 1149.616455078125, -162.99026489257812, 34.54845428466797, -244.92803955078125, 345.406982421875, -421.953125, -87.56647491455078, 462.47235107421875, 246.72396850585938, 105.82444763183594, -797.73486328125, -131.9344024658203, -262.47552490234375, 891.2322998046875, 211.39694213867188, 472.98236083984375, 436.73553466796875, 714.3936767578125, 903.7639770507812, 147.5899658203125, 304.70599365234375, 670.7545166015625, 721.3763427734375, 89.53153991699219, 746.2730102539062, 41.1331672668457, 138.41616821289062, 1593.96923828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000514.npy"} +{"epoch": 0.7547723935389133, "step": 515, "batch_size": 64, "mean": 225.076171875, "std": 442.7667541503906, "min": -1599.1011962890625, "p10": -245.14678649902342, "median": 225.03799438476562, "p90": 734.3702087402347, "max": 1325.9781494140625, "pos_frac": 0.75, "sample": [-109.33567810058594, -120.82600402832031, 481.5758972167969, 892.9921875, 576.4348754882812, -1599.1011962890625, 484.8462219238281, 429.52728271484375, 152.16790771484375, -571.3133544921875, -265.6177978515625, -11.902603149414062, 8.701904296875, 216.2021484375, 277.87542724609375, 297.6318664550781, 170.1896209716797, 536.2523193359375, -352.29791259765625, 138.59127807617188, 343.3828430175781, -106.23948669433594, 159.8214569091797, 329.45220947265625, 237.33518981933594, 73.58378601074219, 607.8402099609375, -255.821533203125, 506.51116943359375, 38.40348815917969, 772.6082153320312, 29.90433692932129, -220.23904418945312, 645.148193359375, 233.87384033203125, 375.45733642578125, 590.8617553710938, 13.636711120605469, 1199.5853271484375, -183.41049194335938, 162.2756805419922, 988.2586059570312, -367.1263427734375, -18.985984802246094, -396.2530212402344, 238.27877807617188, 108.84925079345703, 809.3547973632812, -11.362865447998047, 95.474853515625, 101.4157943725586, 1053.276611328125, 194.10049438476562, 154.9871826171875, 337.0713195800781, 599.342529296875, 501.4598083496094, -105.35995483398438, 336.8797607421875, 383.5682373046875, 317.1559143066406, 234.10427856445312, 1325.9781494140625, 337.8408203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000515.npy"} +{"epoch": 0.7562408223201175, "step": 516, "batch_size": 64, "mean": 350.2784423828125, "std": 423.8194274902344, "min": -575.0857543945312, "p10": -163.24182586669917, "median": 350.2778625488281, "p90": 908.2671203613285, "max": 1241.86328125, "pos_frac": 0.796875, "sample": [183.30526733398438, 217.25399780273438, 768.3536376953125, -125.96692657470703, 576.3359985351562, 777.8609619140625, 470.2342224121094, -128.68702697753906, 248.3542022705078, 486.33599853515625, 1241.86328125, 461.55584716796875, 515.4243774414062, 512.3876953125, 180.4442596435547, -123.48403930664062, 589.3065795898438, 7.8334808349609375, 126.61846923828125, 1211.37060546875, -575.0857543945312, 103.6463623046875, -178.051025390625, -451.9931640625, 940.789306640625, 489.69329833984375, -510.3858337402344, 832.3820190429688, 1197.0367431640625, 188.5713653564453, 320.2629089355469, 808.9959106445312, 992.2962036132812, 259.573486328125, 570.5291137695312, 669.5684814453125, 277.03167724609375, -14.038028717041016, 478.8871154785156, 366.66650390625, 160.9579315185547, -44.654815673828125, 965.1503295898438, 249.13662719726562, 74.33291625976562, 1043.893310546875, 206.53981018066406, 201.79559326171875, 20.688316345214844, 483.0647277832031, 422.69146728515625, 498.872314453125, 86.79151916503906, 581.0501708984375, 800.9819946289062, -218.72918701171875, -343.34820556640625, -462.0380859375, 516.7219848632812, 616.8665771484375, 532.555419921875, 778.3236083984375, 333.88922119140625, -50.79335403442383], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000516.npy"} +{"epoch": 0.7577092511013216, "step": 517, "batch_size": 64, "mean": 408.0665283203125, "std": 538.670166015625, "min": -655.7013549804688, "p10": -191.9057479858398, "median": 367.2696228027344, "p90": 925.2973266601563, "max": 2370.55078125, "pos_frac": 0.765625, "sample": [540.2921142578125, 699.818115234375, 98.049072265625, 344.94818115234375, 841.05419921875, 253.37338256835938, -392.62384033203125, 843.8239135742188, 513.1073608398438, -203.3366241455078, 559.9391479492188, 1898.7725830078125, 1353.463134765625, 369.58233642578125, 508.4737548828125, -126.21541595458984, 267.49676513671875, 564.5325927734375, 125.03936004638672, 783.9463500976562, 1475.582763671875, 669.0352783203125, -6.980672836303711, 274.5314636230469, -655.7013549804688, -260.32501220703125, 123.06896209716797, 400.5074157714844, -375.6038513183594, 2370.55078125, 391.92071533203125, -10.062973022460938, -137.29005432128906, 929.7713623046875, 134.96441650390625, 620.638671875, -462.6051025390625, 458.7773742675781, -51.1998291015625, 409.412109375, 805.1146240234375, -104.2844467163086, 364.9569091796875, 303.1481628417969, 629.4696044921875, 547.6724853515625, 717.5935668945312, 390.75823974609375, 34.92945098876953, 1321.0584716796875, 129.77130126953125, 1121.808349609375, 356.328125, 914.85791015625, 180.505615234375, 751.2454833984375, -272.40301513671875, 211.17611694335938, 273.7471618652344, 414.0709228515625, -165.23370361328125, 855.2294311523438, 255.03372192382812, -62.824501037597656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000517.npy"} +{"epoch": 0.7591776798825257, "step": 518, "batch_size": 64, "mean": 336.003662109375, "std": 432.7972717285156, "min": -842.8505859375, "p10": -239.32052612304685, "median": 276.92066955566406, "p90": 925.6082275390626, "max": 1189.91455078125, "pos_frac": 0.796875, "sample": [128.94223022460938, 1140.7247314453125, 515.60595703125, 349.97589111328125, 246.7366180419922, 260.8826904296875, 498.6963806152344, 673.3876953125, 83.78779602050781, 232.19061279296875, 933.2696533203125, 662.6155395507812, 104.5147705078125, -286.75885009765625, 550.64501953125, 243.35707092285156, 400.895751953125, 172.52740478515625, 59.2762451171875, 492.52105712890625, 996.9091796875, -43.306739807128906, -223.50686645507812, 134.04766845703125, -68.36627197265625, 316.3471984863281, 783.6973266601562, 136.19482421875, 769.8211669921875, 885.5684204101562, 907.7315673828125, 66.09385681152344, 1183.255615234375, 1189.91455078125, 892.8496704101562, 696.580322265625, 118.99419403076172, -110.88031005859375, 499.3396301269531, 604.6056518554688, 293.93695068359375, -101.8785400390625, 477.6662292480469, -246.09780883789062, 466.8794860839844, -376.31915283203125, -348.801025390625, -842.8505859375, 354.6785583496094, 490.2149963378906, 33.40946960449219, 212.18115234375, -264.5906982421875, 189.79248046875, -395.20465087890625, 1020.63720703125, 1013.90380859375, 253.72140502929688, -32.24985122680664, 902.5640258789062, 292.9586486816406, 135.93714904785156, 562.8551025390625, 211.2041778564453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000518.npy"} +{"epoch": 0.7606461086637298, "step": 519, "batch_size": 64, "mean": 231.53121948242188, "std": 444.94757080078125, "min": -949.0491943359375, "p10": -280.0728302001953, "median": 249.53829193115234, "p90": 861.2126464843753, "max": 1260.77490234375, "pos_frac": 0.65625, "sample": [-82.81826782226562, 535.5498657226562, 337.8779602050781, -434.5287780761719, 1005.5730590820312, -261.21875, 76.91807556152344, 498.97344970703125, 985.9931030273438, 612.9601440429688, 111.63700866699219, 593.1902465820312, -233.75540161132812, 895.0741577148438, 320.0937194824219, -853.0052490234375, 1187.117919921875, 574.39794921875, 79.99181365966797, -75.90003967285156, 295.1595458984375, -32.97663879394531, 50.45935821533203, 320.33209228515625, 517.8058471679688, 134.61465454101562, 288.6554870605469, 447.00238037109375, 493.89556884765625, -18.43760108947754, -33.18255615234375, 671.8211669921875, -144.41983032226562, -93.591796875, 333.38604736328125, 42.244850158691406, 554.05908203125, -40.600257873535156, -280.8697509765625, -77.92817687988281, 339.462158203125, -278.2133483886719, 980.1351928710938, 449.929931640625, -949.0491943359375, 577.3357543945312, 1260.77490234375, 39.842811584472656, -49.22564697265625, -3.0577545166015625, 152.85964965820312, 895.0060424804688, -382.3453674316406, 636.2236328125, -328.5924072265625, 249.8466796875, 782.3613891601562, 18.670772552490234, 353.7666931152344, 513.3770141601562, 249.2299041748047, -43.67594528198242, 364.8865661621094, -313.10235595703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000519.npy"} +{"epoch": 0.762114537444934, "step": 520, "batch_size": 64, "mean": 316.0799560546875, "std": 492.4458923339844, "min": -1066.5230712890625, "p10": -177.17986755371092, "median": 241.601318359375, "p90": 966.5399353027345, "max": 2128.75244140625, "pos_frac": 0.75, "sample": [-156.70608520507812, -290.8548583984375, 995.80029296875, 114.69401550292969, 462.9303283691406, 151.187744140625, 421.4401550292969, -59.70008850097656, -185.954345703125, 172.1512451171875, 11.380630493164062, -55.222137451171875, 633.9439697265625, -47.37922668457031, -37.356483459472656, 553.2345581054688, 1298.0233154296875, 2128.75244140625, -192.00094604492188, 338.5931396484375, 1100.457763671875, 1380.3359375, -321.5484924316406, 192.47012329101562, -36.820465087890625, 746.9788818359375, 124.41254425048828, 571.746826171875, 474.8458251953125, 321.9464111328125, 250.0484619140625, 251.0968780517578, 1251.434814453125, 156.09779357910156, 75.28872680664062, 471.3455810546875, 535.3140869140625, 447.4075622558594, 764.3948364257812, -1066.5230712890625, 529.1773071289062, 137.94944763183594, 193.99513244628906, 317.97637939453125, 41.79425048828125, 981.8162231445312, 40.64727020263672, 893.4216918945312, 466.56549072265625, 233.1541748046875, 181.3281707763672, -289.5069580078125, -231.86825561523438, 329.73834228515625, 322.4642028808594, 569.9920654296875, -151.5869140625, 930.895263671875, -154.22630310058594, 571.1174926757812, 373.52960205078125, -131.53111267089844, 1.230804443359375, 123.35488891601562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000520.npy"} +{"epoch": 0.7635829662261381, "step": 521, "batch_size": 64, "mean": 340.51385498046875, "std": 454.4649658203125, "min": -766.3245849609375, "p10": -105.91214599609374, "median": 226.33580017089844, "p90": 966.1593505859375, "max": 1674.8177490234375, "pos_frac": 0.796875, "sample": [446.3504638671875, 753.2576904296875, 189.47906494140625, 27.033035278320312, -82.18399810791016, -766.3245849609375, -198.749267578125, -19.613845825195312, -99.920166015625, 586.44140625, 42.6514892578125, -239.367431640625, 11.877361297607422, 638.0852661132812, 475.8005065917969, 1102.9920654296875, 76.98443603515625, 8.62442398071289, -235.3376007080078, 947.515869140625, 213.51231384277344, 4.7895050048828125, -102.73186492919922, 239.15928649902344, 539.156982421875, 678.0612182617188, 605.129150390625, 151.20986938476562, 95.1670913696289, 31.39263916015625, 406.2271728515625, 593.2195434570312, -25.026954650878906, 1054.3201904296875, 166.57275390625, 599.4481201171875, 524.0728759765625, 195.46249389648438, 573.733642578125, -107.2751235961914, 242.70230102539062, 401.6090087890625, 349.7220458984375, 86.37528991699219, 194.8955078125, 1072.7440185546875, -118.48616027832031, 1130.959716796875, -6.379180908203125, -513.14453125, 570.1239013671875, 415.061767578125, 703.1187744140625, 6.5393524169921875, 711.4385986328125, 974.1494140625, 105.27433776855469, 1464.775146484375, 33.860313415527344, 1674.8177490234375, 481.06097412109375, 99.60893249511719, 846.2070922851562, 764.6553955078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000521.npy"} +{"epoch": 0.7650513950073421, "step": 522, "batch_size": 64, "mean": 304.40625, "std": 487.6728820800781, "min": -795.6722412109375, "p10": -160.09324493408204, "median": 244.43445587158203, "p90": 1037.7237976074218, "max": 1326.188232421875, "pos_frac": 0.796875, "sample": [235.29713439941406, 699.5376586914062, 898.3917846679688, -28.69506072998047, 1043.992431640625, 117.7158203125, 1326.188232421875, 190.12478637695312, 55.38431167602539, 13.649883270263672, -152.87570190429688, 423.8899841308594, 763.9095458984375, 41.51158142089844, 505.3223876953125, -77.02487182617188, 302.8480529785156, -95.14280700683594, 448.8817138671875, 257.2490234375, 649.2283935546875, 627.5333862304688, 99.37616729736328, 315.1240234375, -602.192626953125, 1171.273681640625, 300.27520751953125, 149.8270263671875, 493.11712646484375, 557.0027465820312, 302.22015380859375, 118.11363220214844, 110.86360168457031, 44.22821044921875, 290.81585693359375, -795.6722412109375, 534.774658203125, 130.65011596679688, 179.1114501953125, -363.681640625, -163.1864776611328, 444.64483642578125, 974.391357421875, 88.62772369384766, -459.027587890625, 539.580078125, 920.0725708007812, 305.3541259765625, -769.3681030273438, 1179.284912109375, 1141.6781005859375, -39.038665771484375, 1266.1221923828125, 50.42371368408203, 126.09561157226562, -602.2442626953125, 27.018836975097656, -90.1856918334961, 606.6293334960938, 253.57177734375, 1023.0969848632812, 84.07431030273438, 1204.6842041015625, 87.55592346191406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000522.npy"} +{"epoch": 0.7665198237885462, "step": 523, "batch_size": 64, "mean": 327.42437744140625, "std": 560.4797973632812, "min": -1002.2962036132812, "p10": -282.9564392089843, "median": 283.86358642578125, "p90": 1098.0642089843752, "max": 1750.0721435546875, "pos_frac": 0.6875, "sample": [-143.76365661621094, 1177.545166015625, 128.9683380126953, 264.25042724609375, 384.66131591796875, -300.07000732421875, 422.459228515625, 472.5729675292969, 955.322509765625, 430.07977294921875, -853.2615356445312, 726.4091186523438, -1.1029205322265625, -115.87405395507812, -396.5157165527344, 23.814619064331055, 254.7423095703125, 751.7984008789062, -489.7036437988281, 209.19094848632812, 609.09033203125, 1027.939453125, -119.0821533203125, 10.89659309387207, 492.774169921875, 115.27619934082031, 303.47674560546875, 124.25137329101562, -94.30785369873047, 1515.3194580078125, 565.9114379882812, 129.05506896972656, -55.8990478515625, -1002.2962036132812, 1128.11767578125, 873.03271484375, -455.2230529785156, -35.28425598144531, 529.6984252929688, -165.55445861816406, 701.4249267578125, -99.20220947265625, 1750.0721435546875, 350.2860412597656, 779.3244018554688, -29.079925537109375, 328.7110290527344, 133.94711303710938, 1627.5777587890625, 487.0421447753906, 582.3929443359375, 152.08865356445312, 572.0960083007812, 168.3157958984375, 752.8136596679688, 1201.1544189453125, 819.816162109375, -83.54756164550781, 613.4334716796875, 1172.716796875, 590.440673828125, -714.755859375, -243.0247802734375, -57.601070404052734], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000523.npy"} +{"epoch": 0.7679882525697503, "step": 524, "batch_size": 64, "mean": 341.17156982421875, "std": 521.1823120117188, "min": -1562.6910400390625, "p10": -87.22211303710937, "median": 407.4591979980469, "p90": 918.4738830566407, "max": 1695.056884765625, "pos_frac": 0.75, "sample": [19.135543823242188, 868.8897705078125, 435.08355712890625, 537.5540161132812, 434.6937561035156, 257.761474609375, 368.51300048828125, 29.46778106689453, 577.0908813476562, 324.12841796875, 437.43560791015625, 97.13357543945312, 414.4714660644531, 562.8721923828125, 691.9537353515625, 408.9719543457031, -12.7484130859375, 908.6312866210938, 277.0653076171875, 615.5741577148438, -1562.6910400390625, 448.0322265625, 589.7952270507812, 1695.056884765625, -53.43559265136719, -22.65679931640625, 922.692138671875, 391.9123840332031, 532.2096557617188, -17.630435943603516, 7.776088714599609, 368.807373046875, 506.67071533203125, -211.6251678466797, 710.5401611328125, 282.4762878417969, 1206.930908203125, 1491.3671875, 724.1771850585938, 492.57513427734375, -35.24816131591797, -882.352783203125, 12.07684326171875, -59.01409149169922, 660.4572143554688, -88.84475708007812, 368.008544921875, 7.640830993652344, 461.19952392578125, 483.9961242675781, 1101.855224609375, 981.1891479492188, 615.5652465820312, -1009.2618408203125, -74.83052825927734, -103.28953552246094, 990.770751953125, 642.4471435546875, -59.683433532714844, 405.9464416503906, 587.6219482421875, 393.72216796875, -238.2152099609375, -83.43594360351562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000524.npy"} +{"epoch": 0.7694566813509545, "step": 525, "batch_size": 64, "mean": 438.11553955078125, "std": 538.010986328125, "min": -790.8737182617188, "p10": -93.9471954345703, "median": 342.2528839111328, "p90": 1266.124340820313, "max": 1625.926513671875, "pos_frac": 0.796875, "sample": [380.95635986328125, 1108.949462890625, 436.7550048828125, 142.05496215820312, 36.12987518310547, 408.1159362792969, 1625.926513671875, 832.0304565429688, 385.3682556152344, -270.2761535644531, 1305.5780029296875, -454.04931640625, 856.4324340820312, 194.8290557861328, 241.51980590820312, -146.1193084716797, 154.58787536621094, 285.3971252441406, 992.866455078125, 656.15234375, 531.783935546875, 63.582157135009766, -94.664794921875, 150.52322387695312, -182.32501220703125, 347.919189453125, 511.92047119140625, 556.0537109375, 336.5865783691406, 1174.0657958984375, 1082.8282470703125, 53.668968200683594, 1576.9781494140625, -8.549903869628906, 1613.1685791015625, 350.70977783203125, 169.42779541015625, 251.82260131835938, 405.49957275390625, -92.27279663085938, 1603.796630859375, -790.8737182617188, -36.12762451171875, -8.734365463256836, 319.6644592285156, 308.4302978515625, 500.1363220214844, 127.94747924804688, 597.6290893554688, 184.23931884765625, 734.634521484375, 1458.4755859375, -64.87905883789062, -0.93743896484375, 692.8292846679688, 370.64630126953125, 317.7685241699219, 108.74034881591797, -384.397216796875, 993.6121826171875, 209.23178100585938, 1583.298095703125, 382.3489685058594, 859.983154296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000525.npy"} +{"epoch": 0.7709251101321586, "step": 526, "batch_size": 64, "mean": 420.246337890625, "std": 556.9052124023438, "min": -1063.6470947265625, "p10": -202.66725463867186, "median": 421.4828796386719, "p90": 1177.990075683594, "max": 1472.4678955078125, "pos_frac": 0.734375, "sample": [1127.797119140625, 812.3160400390625, 32.92433547973633, 264.4771728515625, 360.896240234375, 277.7560119628906, -208.3822784423828, -0.18695640563964844, 547.1725463867188, -701.4600830078125, 607.1007690429688, 805.6802978515625, 791.0569458007812, 248.36831665039062, 1241.539306640625, 1017.9654541015625, -122.67239379882812, 749.5148315429688, 294.7823791503906, 611.499755859375, 397.7764892578125, 534.10791015625, 229.71217346191406, 1074.31201171875, 532.9118041992188, 590.3087158203125, 1472.4678955078125, 0.1345367431640625, 374.49298095703125, -189.3321990966797, -222.88719177246094, 657.4629516601562, -499.0543518066406, 1389.98193359375, -177.10546875, 313.1722717285156, 998.2860107421875, 720.13330078125, 206.38333129882812, -1063.6470947265625, 836.5667114257812, 669.1077880859375, 438.13372802734375, 1277.0897216796875, -531.6384887695312, 741.1485595703125, 723.394287109375, 1113.0699462890625, -57.616355895996094, 502.30023193359375, -11.483306884765625, 240.5102081298828, 302.9755554199219, 1214.57421875, 1026.39501953125, -181.24557495117188, 1258.369140625, 605.0421752929688, 404.83203125, -55.18748474121094, -147.04840087890625, -16.466115951538086, -754.32275390625, 1199.5013427734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000526.npy"} +{"epoch": 0.7723935389133627, "step": 527, "batch_size": 64, "mean": 293.5634765625, "std": 432.2890319824219, "min": -708.191650390625, "p10": -235.4161666870117, "median": 234.38238525390625, "p90": 803.1895385742188, "max": 1217.2864990234375, "pos_frac": 0.765625, "sample": [-565.3636474609375, 492.0705871582031, 331.8761291503906, 390.3924255371094, 685.2420043945312, 158.30654907226562, 1217.2864990234375, -103.00415802001953, -239.77151489257812, 771.05322265625, -156.0542449951172, -197.08676147460938, 669.9265747070312, 250.1126251220703, -708.191650390625, -593.569091796875, 496.91558837890625, -71.40637969970703, 676.2018432617188, 460.54888916015625, -345.55328369140625, 1072.5435791015625, 112.7313232421875, 60.32285690307617, 140.5143280029297, 809.874755859375, -22.530197143554688, 67.21027374267578, 925.7503662109375, 703.0816650390625, 544.1182861328125, 1061.3233642578125, 435.9293212890625, 665.7728881835938, 1159.6739501953125, 1037.7950439453125, 787.5906982421875, -27.201208114624023, 214.6849822998047, 237.25350952148438, 641.4829711914062, 165.46145629882812, 330.77935791015625, 221.45802307128906, -130.514404296875, 63.642791748046875, 325.5828857421875, 341.0450744628906, 160.9522705078125, -433.69036865234375, 277.0982666015625, 184.58551025390625, 706.8345947265625, 218.18994140625, 115.78677368164062, 83.41178894042969, 689.805908203125, 664.315673828125, 530.2672119140625, 218.3604278564453, 231.51126098632812, -226.91934204101562, 41.301307678222656, -239.0576629638672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000527.npy"} +{"epoch": 0.7738619676945668, "step": 528, "batch_size": 64, "mean": 356.39044189453125, "std": 578.6063842773438, "min": -753.078125, "p10": -395.55372924804686, "median": 284.60243225097656, "p90": 1176.3712158203125, "max": 1946.773193359375, "pos_frac": 0.734375, "sample": [7.749229431152344, 1218.000244140625, 306.3672790527344, 291.8287658691406, 137.81866455078125, -396.86236572265625, -112.84253692626953, 705.1298828125, 578.3967895507812, 1225.626220703125, 411.85308837890625, -99.55625915527344, 412.5897216796875, -425.20721435546875, 116.7765884399414, 670.9050903320312, 358.189208984375, -126.44766235351562, 1165.6968994140625, 1946.773193359375, -17.666847229003906, 1027.9224853515625, 494.99969482421875, 722.5581665039062, 575.0034790039062, 916.2998657226562, -542.105712890625, 1317.4140625, -236.253173828125, 1096.738037109375, -551.2777099609375, 277.3760986328125, 385.8652648925781, 213.09860229492188, 318.452880859375, 180.26695251464844, 547.02001953125, 146.86915588378906, 505.9900817871094, 18.355445861816406, -225.82041931152344, 854.7549438476562, -157.15866088867188, -753.078125, 1481.74365234375, 214.69284057617188, 477.1646728515625, 1638.267578125, 161.6179962158203, 40.01068115234375, -577.6348266601562, -552.5634765625, 674.78173828125, 1180.9459228515625, 145.91940307617188, 832.5149536132812, 616.12109375, 208.19747924804688, -392.500244140625, -15.477432250976562, 116.13400268554688, -119.14387512207031, 1067.882080078125, 101.90474700927734], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000528.npy"} +{"epoch": 0.775330396475771, "step": 529, "batch_size": 64, "mean": 237.14752197265625, "std": 585.0148315429688, "min": -1584.3756103515625, "p10": -478.7118286132812, "median": 238.74124145507812, "p90": 901.0221923828128, "max": 1566.9703369140625, "pos_frac": 0.671875, "sample": [97.94862365722656, -221.6194305419922, 1566.9703369140625, -24.69134521484375, 173.63299560546875, 589.9404296875, 457.7382507324219, 767.6803588867188, 553.5167236328125, 1447.119873046875, -280.5191955566406, 509.19580078125, 120.69905853271484, -481.97088623046875, 585.8867797851562, -159.8647003173828, 140.64434814453125, 75.6709213256836, 802.8151245117188, -69.9792251586914, -371.6921081542969, 287.85418701171875, 1166.89501953125, 761.139892578125, 992.3920288085938, -721.9035034179688, -37.38910675048828, 262.6841735839844, -593.304443359375, 664.3567504882812, 274.3262634277344, 542.7189331054688, -136.98178100585938, 355.17156982421875, -52.144256591796875, 36.75126647949219, 747.940673828125, 74.50486755371094, 610.40478515625, 760.0241088867188, -1584.3756103515625, 203.83892822265625, 1012.1732177734375, -540.0037231445312, 616.1257934570312, -1037.0994873046875, 111.77181243896484, 453.41925048828125, 932.8942260742188, 37.131507873535156, -786.9905395507812, 426.061767578125, -57.138832092285156, 680.014404296875, -471.10736083984375, 487.4315490722656, 214.79830932617188, 826.6541137695312, 604.968017578125, 1268.65185546875, -335.0563049316406, -197.9375457763672, -313.4888610839844, 350.1405334472656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000529.npy"} +{"epoch": 0.7767988252569751, "step": 530, "batch_size": 64, "mean": 394.1950988769531, "std": 707.1098022460938, "min": -2146.034423828125, "p10": -285.1765563964844, "median": 265.4164276123047, "p90": 1461.4590698242193, "max": 2280.613037109375, "pos_frac": 0.734375, "sample": [323.7450256347656, -380.6220397949219, -12.858955383300781, 176.07318115234375, 319.1336975097656, -270.2833557128906, 1750.9473876953125, 570.1223754882812, 353.9961242675781, -855.5955810546875, 119.82019805908203, -80.07390594482422, -16.36492919921875, -140.51205444335938, 738.6797485351562, 777.2002563476562, -51.65861511230469, 4.23516845703125, -405.06195068359375, 1310.887451171875, 1663.729248046875, 602.7403564453125, 965.6138305664062, 758.7120971679688, -111.78276062011719, 831.1864013671875, -110.343017578125, 177.35610961914062, -304.6800537109375, 605.9511108398438, -5.167112350463867, 1639.7376708984375, 1170.5137939453125, 523.7686157226562, 338.9269714355469, 47.51325607299805, 851.8043212890625, 69.29190826416016, -291.5593566894531, -426.9673156738281, 155.59861755371094, 1846.8668212890625, 710.91455078125, 2280.613037109375, 77.36376953125, 329.4670104980469, 402.3329772949219, 256.6404113769531, 1518.44775390625, 212.49325561523438, 369.3437194824219, -0.6743106842041016, 860.7401733398438, -2146.034423828125, 1328.4854736328125, 151.60523986816406, 11.592193603515625, 1702.39599609375, 549.8233032226562, 274.19244384765625, 81.7646484375, 166.95257568359375, 637.2872924804688, 222.11862182617188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000530.npy"} +{"epoch": 0.7782672540381792, "step": 531, "batch_size": 64, "mean": 348.384521484375, "std": 561.2249755859375, "min": -1051.8900146484375, "p10": -240.5874786376953, "median": 238.29672241210938, "p90": 1072.9100524902349, "max": 1888.339111328125, "pos_frac": 0.78125, "sample": [597.6168212890625, 104.7226791381836, 595.6563720703125, 1114.1201171875, 929.9428100585938, -539.2804565429688, 224.60104370117188, 226.65676879882812, 1747.614501953125, -76.35317993164062, -266.4173583984375, 469.2530212402344, 10.360456466674805, 419.0141906738281, 283.6479187011719, 195.97364807128906, 440.3729553222656, 227.1607208251953, -1051.8900146484375, 371.31036376953125, 976.7532348632812, 742.0460815429688, 377.77508544921875, 81.30351257324219, 790.7698364257812, 192.041259765625, 473.1450500488281, 229.0251007080078, 491.2762756347656, -55.636680603027344, 1232.474365234375, -134.66888427734375, 667.338623046875, 691.507568359375, 105.88853454589844, 239.10556030273438, -29.27544403076172, -619.6021728515625, -16.328716278076172, 172.1898193359375, 1888.339111328125, 364.4688720703125, 129.35458374023438, 27.857620239257812, 1789.8836669921875, -492.2596130371094, 569.8753662109375, 1449.7823486328125, 267.5521240234375, 936.3564453125, 1181.778564453125, -0.07846832275390625, 601.4099731445312, -241.73861694335938, 237.48788452148438, 130.6779327392578, 503.75048828125, 141.9752960205078, 408.8085021972656, 32.00996398925781, 192.04403686523438, 525.2164306640625, -741.2544555664062, -237.9014892578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000531.npy"} +{"epoch": 0.7797356828193832, "step": 532, "batch_size": 64, "mean": 353.33392333984375, "std": 659.3086547851562, "min": -1337.625, "p10": -447.53729248046875, "median": 294.3863067626953, "p90": 1231.153649902344, "max": 1927.8829345703125, "pos_frac": 0.734375, "sample": [1175.70556640625, 478.93511962890625, 1581.2039794921875, -260.1360778808594, 1277.7781982421875, 231.95297241210938, 817.4222412109375, 817.1458740234375, -1337.625, 285.23809814453125, -358.2615966796875, 1475.2607421875, 491.5278015136719, 765.7198486328125, -449.0484619140625, 755.606201171875, 570.091064453125, -1010.229736328125, 71.12669372558594, 332.0802917480469, 421.8810119628906, 1599.849609375, 1927.8829345703125, 850.7451782226562, 612.656005859375, 604.4139404296875, -125.21424865722656, 1084.010498046875, 1254.9171142578125, 188.4952392578125, -122.34406280517578, 313.2911682128906, -157.72579956054688, 953.99072265625, 890.3470458984375, 126.20046997070312, 201.85324096679688, 1060.3463134765625, -74.36197662353516, 292.1464538574219, 55.92009735107422, 836.08935546875, 198.3609619140625, 585.0777587890625, 154.31959533691406, -88.87528228759766, 29.90363311767578, 746.1126098632812, 188.68081665039062, 11.477714538574219, -528.0750732421875, -732.0263061523438, 269.75390625, 1627.185791015625, 296.62615966796875, -101.8619384765625, 359.3641662597656, -933.00390625, -19.36920166015625, 215.62857055664062, -444.01123046875, 616.870361328125, -679.0523071289062, 333.3998718261719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000532.npy"} +{"epoch": 0.7812041116005873, "step": 533, "batch_size": 64, "mean": 262.97039794921875, "std": 572.2893676757812, "min": -1517.08251953125, "p10": -325.80812377929686, "median": 221.8073272705078, "p90": 933.852752685547, "max": 2138.424072265625, "pos_frac": 0.65625, "sample": [613.7100830078125, -384.10711669921875, 785.398681640625, -525.4921264648438, 447.44525146484375, 203.61984252929688, 1182.235595703125, 590.59375, 256.7889404296875, 411.04534912109375, 775.058837890625, 638.0210571289062, 620.946533203125, 660.0101928710938, -165.18173217773438, 423.0999450683594, 945.3965454101562, 172.14920043945312, 344.7267150878906, -843.6170654296875, 1279.1087646484375, 262.7763671875, -181.72222900390625, 555.899169921875, 42.01747512817383, 619.310791015625, -51.309913635253906, 145.70176696777344, 226.21881103515625, 2138.424072265625, 445.30780029296875, 396.9035949707031, -89.3179931640625, -37.882652282714844, -22.13656997680664, -1517.08251953125, -347.4946594238281, 417.9190979003906, -255.22677612304688, 440.8414611816406, 157.5937042236328, 147.70469665527344, 1293.67919921875, 1130.739990234375, -193.11593627929688, -8.068252563476562, -305.17901611328125, -4.7342529296875, 326.6734924316406, 217.39584350585938, 334.8729553222656, -151.00390625, 906.917236328125, 132.771240234375, -334.649169921875, -291.38336181640625, -835.4617919921875, 199.49267578125, 207.16683959960938, 1189.421875, -226.15997314453125, -38.789337158203125, 808.5484008789062, 545.5675659179688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000533.npy"} +{"epoch": 0.7826725403817915, "step": 534, "batch_size": 64, "mean": 365.05377197265625, "std": 560.8082275390625, "min": -896.5723266601562, "p10": -248.14581604003905, "median": 257.7046127319336, "p90": 1023.8048217773438, "max": 1986.0703125, "pos_frac": 0.78125, "sample": [459.1802978515625, 1018.93310546875, -40.167205810546875, 1986.0703125, 42.357364654541016, 895.0958862304688, -493.40045166015625, 243.02316284179688, 282.487060546875, -524.9461059570312, 63.153053283691406, 1227.774658203125, -246.76895141601562, 178.17727661132812, 1261.51611328125, 883.1704711914062, 915.5156860351562, -350.3030700683594, 396.6980895996094, 833.769775390625, 319.04156494140625, -237.91867065429688, 637.97607421875, 454.75897216796875, 244.82273864746094, 389.2220458984375, 1025.8927001953125, -66.64608764648438, 456.01129150390625, 175.30050659179688, 162.885498046875, 882.5755615234375, 270.58648681640625, 24.546958923339844, 700.4519653320312, -527.08203125, -139.00167846679688, 1758.4130859375, 821.9480590820312, 1037.66552734375, 157.560546875, 188.81748962402344, -59.751708984375, 151.04159545898438, 781.0403442382812, 200.83409118652344, 113.37855529785156, 851.2890625, 1501.58740234375, 934.2128295898438, 709.0797729492188, 374.9386291503906, 643.26416015625, 44.68986892700195, 392.7078552246094, 179.3316650390625, 312.8151550292969, 228.66268920898438, 239.34945678710938, 19.337343215942383, -199.58377075195312, -896.5723266601562, -248.73590087890625, -678.6397094726562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000534.npy"} +{"epoch": 0.7841409691629956, "step": 535, "batch_size": 64, "mean": 419.6590576171875, "std": 565.7296752929688, "min": -845.811279296875, "p10": -203.88623352050777, "median": 409.71868896484375, "p90": 1062.5039306640626, "max": 2399.271728515625, "pos_frac": 0.796875, "sample": [-343.8468017578125, 2399.271728515625, -329.53515625, 542.627685546875, -173.94422912597656, 236.12835693359375, 114.53466796875, 984.311279296875, 133.39251708984375, 627.3319091796875, 246.67100524902344, -71.34749603271484, 260.3093566894531, 912.465087890625, 465.3136901855469, 403.83526611328125, 128.51669311523438, 1308.688720703125, 291.4200134277344, 435.05548095703125, 1022.28955078125, 551.8264770507812, 436.8790283203125, 487.1650695800781, 19.704605102539062, 1555.235595703125, 1176.1851806640625, 459.98223876953125, -57.570281982421875, 325.04742431640625, 1492.1575927734375, 770.0802001953125, 1016.9637451171875, -355.1266174316406, -329.0292663574219, -845.811279296875, 485.3384094238281, 812.9666137695312, 77.95953369140625, 415.60211181640625, 366.46478271484375, 1079.1400146484375, 852.536376953125, 211.11590576171875, 571.9827270507812, -104.90495300292969, 50.43657684326172, 747.226318359375, -216.71852111816406, -559.7291259765625, 607.9962158203125, 64.7069320678711, 531.1304931640625, 63.263877868652344, -137.94065856933594, -96.7967529296875, 1023.6864013671875, 185.89840698242188, 1705.3486328125, 269.23779296875, 29.732023239135742, 513.3240966796875, 432.36749267578125, 579.629638671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000535.npy"} +{"epoch": 0.7856093979441997, "step": 536, "batch_size": 64, "mean": 498.6971435546875, "std": 707.3242797851562, "min": -1274.9525146484375, "p10": -109.56207046508787, "median": 295.2587585449219, "p90": 1205.6970947265627, "max": 3922.043212890625, "pos_frac": 0.828125, "sample": [201.2278594970703, 784.9376831054688, 312.1182861328125, 1018.712890625, 1299.338134765625, 48.14805603027344, 182.14605712890625, 165.02349853515625, 61.86391830444336, -29.99114990234375, 953.18896484375, 328.5274963378906, 1218.65869140625, 874.6253662109375, -272.8618469238281, 527.6279907226562, 1270.453125, 100.86666870117188, 887.6314697265625, 855.4147338867188, 2220.034423828125, 598.0836181640625, -115.5710678100586, 835.331298828125, -154.06185913085938, 1276.60546875, 28.37279510498047, 608.3909912109375, 747.1201171875, -95.54107666015625, 3922.043212890625, 803.6033935546875, 302.42901611328125, 181.86178588867188, 505.63214111328125, 803.3383178710938, 126.47282409667969, 1148.9749755859375, -422.2985534667969, 63.34817123413086, 138.79212951660156, -1274.9525146484375, 1635.86376953125, 993.4776611328125, 786.3125, -224.38816833496094, 124.6798095703125, 258.379150390625, 288.0885009765625, 123.419189453125, 218.17010498046875, -67.28412628173828, 281.6321105957031, 230.05975341796875, 248.68515014648438, 757.4921264648438, 150.23231506347656, -66.02030944824219, -318.333251953125, 8.999053955078125, 1094.8800048828125, 791.6634521484375, 389.489990234375, 1175.453369140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000536.npy"} +{"epoch": 0.7870778267254038, "step": 537, "batch_size": 64, "mean": 343.6505126953125, "std": 581.0789184570312, "min": -1239.528076171875, "p10": -300.3589782714844, "median": 276.2516174316406, "p90": 1123.7184692382814, "max": 1372.5775146484375, "pos_frac": 0.796875, "sample": [-1239.528076171875, -786.736083984375, 16.260162353515625, 216.30352783203125, -543.3525390625, 288.763916015625, 27.32612419128418, 963.4622192382812, 12.97906494140625, -42.892459869384766, 484.8427429199219, -9.311721801757812, 520.004150390625, 447.86846923828125, -300.6064453125, 914.0291748046875, 1194.1917724609375, 39.313201904296875, 125.03894805908203, -14.003837585449219, 36.46197509765625, 972.7952270507812, 56.99091339111328, -787.650390625, 1092.6234130859375, -299.78155517578125, 1372.5775146484375, 1211.418212890625, 368.496826171875, 569.1663818359375, 1071.4530029296875, 404.90411376953125, 1025.978271484375, 307.08013916015625, 827.723876953125, 17.706947326660156, 108.02201843261719, 582.6873168945312, 263.73931884765625, -510.111572265625, 251.76547241210938, 491.8216857910156, -88.08818054199219, -899.907470703125, 27.705638885498047, 1137.044921875, -53.70651626586914, 192.14759826660156, 328.7046203613281, 824.0411987304688, 170.28759765625, 1369.56201171875, 117.19298553466797, 1356.8101806640625, 560.0243530273438, 54.801292419433594, 1270.6387939453125, 105.60324096679688, 1019.8931884765625, 392.2275390625, 539.5441284179688, 705.4296264648438, 36.97991943359375, 1076.8740234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000537.npy"} +{"epoch": 0.788546255506608, "step": 538, "batch_size": 64, "mean": 376.39959716796875, "std": 511.9606628417969, "min": -552.5037231445312, "p10": -215.6068115234375, "median": 355.06573486328125, "p90": 1008.263348388672, "max": 1978.1910400390625, "pos_frac": 0.75, "sample": [160.3732452392578, 504.5670166015625, 742.86669921875, 1129.33203125, 519.4118041992188, -219.21160888671875, 914.7882080078125, 597.4862060546875, 542.2337036132812, 467.1681823730469, -107.39781951904297, 1680.858154296875, 575.2884521484375, 378.4991455078125, 222.3273162841797, 979.0225830078125, -492.54150390625, 3.6018600463867188, 331.63232421875, 1105.1634521484375, 255.3307647705078, 496.8849182128906, 118.88198852539062, 660.1725463867188, 605.2984619140625, 79.94087219238281, -120.1480712890625, 1428.219970703125, -26.44891357421875, -384.9170227050781, -552.5037231445312, 801.7095336914062, 176.17323303222656, -105.13041687011719, 1978.1910400390625, 691.998779296875, 841.4491577148438, 584.1045532226562, 315.8437805175781, 661.828857421875, 176.8896484375, -145.51910400390625, 11.205402374267578, 619.1254272460938, 216.52609252929688, 668.4100341796875, -474.0527038574219, 513.016357421875, -314.2940368652344, 107.39154052734375, 609.9156494140625, 466.08880615234375, 323.23486328125, 697.7890014648438, -202.0356903076172, -52.174461364746094, 227.369140625, 58.37604904174805, 1092.6748046875, 1020.7951049804688, -316.66766357421875, -207.19561767578125, 558.9285888671875, -108.57471466064453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000538.npy"} +{"epoch": 0.7900146842878121, "step": 539, "batch_size": 64, "mean": 360.6948547363281, "std": 478.0931701660156, "min": -748.0493774414062, "p10": -146.54980773925777, "median": 277.2389831542969, "p90": 932.4374389648442, "max": 1704.988037109375, "pos_frac": 0.765625, "sample": [267.63653564453125, 286.8414306640625, 92.72823333740234, -13.927858352661133, 395.9190673828125, 415.8023681640625, -212.4055938720703, 57.24170684814453, -28.017852783203125, 1445.6083984375, -248.92193603515625, -25.797067642211914, 1704.988037109375, 152.46180725097656, 984.4384765625, 637.3468017578125, 732.0162963867188, 721.9188842773438, 80.412841796875, 777.13525390625, 1561.462158203125, 103.931640625, 196.2885284423828, 732.3870849609375, 183.10238647460938, 198.16177368164062, -15.280288696289062, 459.122802734375, -116.99842834472656, 715.5145263671875, -399.1960144042969, 679.6966552734375, -748.0493774414062, 515.109130859375, 639.1776123046875, 101.1256103515625, 63.615943908691406, 547.9242553710938, 219.3844451904297, -117.46347045898438, -68.15614318847656, 1399.8416748046875, -208.73226928710938, 672.668701171875, -41.81145095825195, -159.015380859375, 999.4547729492188, -254.52183532714844, 60.322166442871094, 13.324867248535156, 608.8201904296875, 248.81008911132812, 775.8297119140625, 346.4149475097656, 519.8871459960938, 301.03631591796875, 811.1016845703125, 551.6574096679688, 424.50689697265625, 407.147216796875, 1.2948436737060547, 1234.3780517578125, 546.5258178710938, 151.2425994873047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000539.npy"} +{"epoch": 0.7914831130690162, "step": 540, "batch_size": 64, "mean": 470.0218505859375, "std": 662.1915893554688, "min": -859.353515625, "p10": -357.0869323730467, "median": 335.1390075683594, "p90": 1401.9480346679688, "max": 2555.187744140625, "pos_frac": 0.765625, "sample": [517.7066650390625, 586.4639282226562, 770.8077392578125, -448.96160888671875, 1971.2681884765625, -190.52764892578125, 335.5509338378906, 151.50921630859375, -859.353515625, -506.2259521484375, -72.96976470947266, 546.265869140625, -159.20924377441406, 264.23748779296875, 2555.187744140625, -61.25220489501953, 255.1068115234375, 100.39114379882812, 595.7591552734375, 686.015380859375, -88.49502563476562, 135.82281494140625, 419.1116027832031, 824.5960693359375, 798.32666015625, 1370.4002685546875, 631.1587524414062, 1415.468505859375, 1562.44287109375, 334.7270812988281, 562.1978149414062, 435.303466796875, 713.6873779296875, 66.8439712524414, 302.21087646484375, 484.4067687988281, -66.82962036132812, -16.6636962890625, 1601.3443603515625, 271.6595764160156, 1103.8536376953125, 1835.956787109375, 1586.2120361328125, 1310.4708251953125, -520.2887573242188, 689.1151123046875, 227.82510375976562, 264.22137451171875, 755.2992553710938, 315.34442138671875, -428.469482421875, 306.4263000488281, 704.2918701171875, 860.40185546875, 190.16864013671875, 300.9526672363281, 770.1724853515625, 181.97415161132812, 1348.41748046875, -37.373573303222656, 8.089942932128906, 659.7147216796875, -696.7598266601562, -450.1092224121094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000540.npy"} +{"epoch": 0.7929515418502202, "step": 541, "batch_size": 64, "mean": 243.10484313964844, "std": 569.4824829101562, "min": -1172.068603515625, "p10": -474.44540405273426, "median": 207.50424194335938, "p90": 891.2317443847658, "max": 1939.8819580078125, "pos_frac": 0.75, "sample": [-831.691162109375, 190.78756713867188, 641.302978515625, 112.75155639648438, 161.8907470703125, 95.32662963867188, 291.2289733886719, 78.49775695800781, 237.62451171875, -53.4432373046875, -220.36642456054688, 1322.46728515625, -777.7163696289062, 34.56190490722656, 935.3826904296875, 76.87271881103516, 223.412353515625, 1207.531982421875, 111.2328109741211, 786.205322265625, 1522.1676025390625, 850.227783203125, 13.882476806640625, -620.66748046875, 183.24493408203125, -251.4791259765625, 406.79412841796875, 311.4871826171875, -515.334228515625, 684.668212890625, 646.5108642578125, 62.49219512939453, 751.5848388671875, 180.3641815185547, 213.63821411132812, 158.7178955078125, 192.687744140625, 214.90882873535156, 654.4566650390625, 459.87860107421875, -128.84800720214844, 171.30335998535156, 787.238037109375, 908.8048706054688, 651.1026611328125, 277.7889404296875, -1172.068603515625, -555.8770751953125, 451.5494079589844, 594.7042236328125, -333.9478454589844, 380.3176574707031, 201.37026977539062, 1939.8819580078125, 1169.124267578125, -379.03814697265625, -894.1602172851562, 483.5469055175781, -332.8901062011719, -215.41006469726562, -206.55572509765625, 219.07769775390625, 544.9915161132812, 252.61183166503906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000541.npy"} +{"epoch": 0.7944199706314243, "step": 542, "batch_size": 64, "mean": 463.46759033203125, "std": 635.0188598632812, "min": -1116.8154296875, "p10": -269.2774948120117, "median": 417.2616882324219, "p90": 1201.4050659179688, "max": 2205.59130859375, "pos_frac": 0.796875, "sample": [982.21484375, 363.800537109375, 229.41278076171875, 292.3572692871094, 198.02972412109375, 988.2560424804688, -364.14459228515625, 2205.59130859375, 1210.9788818359375, 474.92962646484375, 14.499618530273438, 539.19873046875, 863.9873657226562, -16.877655029296875, -87.7949447631836, 463.2091064453125, 1944.5213623046875, 319.67486572265625, -143.28314208984375, 1667.851318359375, 331.3481750488281, 129.13636779785156, 441.50665283203125, 483.29620361328125, -381.5693664550781, 208.0402069091797, -89.0460205078125, -411.049072265625, -241.14540100097656, 991.1400146484375, -281.3341064453125, 853.1807861328125, 864.9834594726562, 177.29049682617188, 1708.2392578125, -98.57461547851562, 685.43310546875, 519.7274169921875, -577.386962890625, 687.0142822265625, 349.44354248046875, 486.28411865234375, 675.775634765625, 123.5707015991211, 1699.424560546875, 175.79428100585938, 278.6153869628906, -1116.8154296875, 740.8146362304688, 175.72885131835938, 745.5585327148438, 333.21295166015625, 106.16755676269531, 441.5783996582031, 1634.6883544921875, 1031.515869140625, 393.0167236328125, -934.7445678710938, 1179.066162109375, 327.3723449707031, 467.7210998535156, 660.568359375, 678.39013671875, 862.5338134765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000542.npy"} +{"epoch": 0.7958883994126285, "step": 543, "batch_size": 64, "mean": 326.91180419921875, "std": 754.9144897460938, "min": -987.5075073242188, "p10": -375.7522064208984, "median": 147.3399887084961, "p90": 1320.7195434570317, "max": 3345.689453125, "pos_frac": 0.65625, "sample": [21.29511260986328, -6.2955322265625, 805.4842529296875, -25.35723876953125, 7.807638168334961, 2277.112548828125, 4.307374954223633, -43.18782043457031, -987.5075073242188, 712.7413330078125, -70.77677154541016, -803.2081298828125, 74.25897216796875, 194.3218994140625, 2243.584228515625, 57.04164123535156, 38.89508056640625, 3345.689453125, -590.438232421875, 24.94062042236328, 503.3035888671875, -400.33111572265625, -311.19586181640625, 198.10948181152344, -442.8782043457031, -91.5119400024414, -106.360595703125, 561.95849609375, -441.79754638671875, 219.7646026611328, 589.5338134765625, 221.49562072753906, -315.89984130859375, 1361.462890625, -148.90914916992188, -174.2670440673828, 1938.4560546875, 616.1412353515625, 1225.6517333984375, 90.16613006591797, 1568.388427734375, 134.8728790283203, -228.90122985839844, 327.08123779296875, 816.9239501953125, -32.29810333251953, 375.66363525390625, 1782.5184326171875, -383.90716552734375, 624.1568603515625, 227.53005981445312, -199.40350341796875, 336.58026123046875, -356.7239685058594, 490.72479248046875, 603.579345703125, 301.8289489746094, -123.22501373291016, 40.32548522949219, 402.1224060058594, 159.80709838867188, 540.8240966796875, 629.0311889648438, 511.2532958984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000543.npy"} +{"epoch": 0.7973568281938326, "step": 544, "batch_size": 64, "mean": 387.87322998046875, "std": 581.82373046875, "min": -982.05126953125, "p10": -181.7893585205078, "median": 369.4586639404297, "p90": 1155.7386718750001, "max": 1894.2705078125, "pos_frac": 0.765625, "sample": [1171.8990478515625, 824.0594482421875, 356.71728515625, 490.4412536621094, 1623.3502197265625, -982.05126953125, 382.2000427246094, -98.45992279052734, 777.7197265625, -47.148040771484375, 1286.902099609375, -100.56867980957031, 751.9364013671875, 239.07093811035156, -968.8248291015625, 356.3995666503906, 682.7686157226562, 227.877685546875, 56.09661865234375, 396.5888977050781, 414.46380615234375, 389.03717041015625, -240.85812377929688, 277.72528076171875, 71.82852172851562, 155.1586456298828, 776.5267333984375, 76.12567138671875, -316.192626953125, -196.97219848632812, 1634.509765625, 198.87689208984375, -81.85973358154297, 638.1204833984375, -973.201171875, 1118.0311279296875, 477.074951171875, 383.2337646484375, 637.8739013671875, -146.36273193359375, 253.2340087890625, 13.540279388427734, 274.8922424316406, 1609.4326171875, 690.6228637695312, -73.26506042480469, 159.2645721435547, 1894.2705078125, 922.616455078125, 576.2346801757812, 5.493648529052734, 160.26968383789062, 766.1618041992188, 1452.5972900390625, 633.8407592773438, 428.3006896972656, 485.8376770019531, -128.44334411621094, 526.9863891601562, 129.26170349121094, -25.918075561523438, 780.2572021484375, -207.16378784179688, 775.4453735351562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000544.npy"} +{"epoch": 0.7988252569750367, "step": 545, "batch_size": 64, "mean": 454.80206298828125, "std": 572.7142333984375, "min": -675.2298583984375, "p10": -79.63759384155271, "median": 303.20289611816406, "p90": 1387.784069824219, "max": 1834.6236572265625, "pos_frac": 0.859375, "sample": [-113.52804565429688, 32.063079833984375, -675.2298583984375, 80.09652709960938, 267.7671203613281, 846.8375244140625, 181.2099609375, 104.89640808105469, 544.8445434570312, -351.5038757324219, 498.5699157714844, 711.41650390625, 4.100212097167969, 644.1480102539062, 1246.6837158203125, 146.78594970703125, 1611.574462890625, 1336.9635009765625, 1396.499267578125, 372.7393493652344, 778.976806640625, 71.95205688476562, 262.64599609375, 210.3141632080078, 98.6971664428711, 321.6940002441406, 86.06787109375, 474.34405517578125, 884.4534912109375, -54.403526306152344, 1477.77099609375, 425.92822265625, 107.96212005615234, -90.45219421386719, 1834.6236572265625, 1030.4071044921875, 55.99861145019531, 1415.669189453125, -634.105712890625, 451.8621826171875, -40.013755798339844, 519.8223266601562, 108.97604370117188, 650.62255859375, 373.49462890625, 673.955322265625, 911.521484375, 1367.4486083984375, 284.7117919921875, 196.36151123046875, 154.55165100097656, 81.88386535644531, 284.4233093261719, 405.1966857910156, 146.28457641601562, 1592.3358154296875, 1323.696044921875, -233.21609497070312, 406.4678039550781, 209.0445556640625, 652.3948974609375, -668.762939453125, 147.41195678710938, 1461.378662109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000545.npy"} +{"epoch": 0.8002936857562408, "step": 546, "batch_size": 64, "mean": 296.3592529296875, "std": 490.6263732910156, "min": -1588.97607421875, "p10": -232.52848358154296, "median": 275.9789581298828, "p90": 881.8617187500001, "max": 1259.843994140625, "pos_frac": 0.734375, "sample": [-176.75595092773438, 109.42194366455078, 410.6649169921875, -11.926788330078125, 178.59381103515625, 492.69952392578125, 271.46588134765625, 578.0330810546875, 183.4984588623047, 221.21142578125, -202.31399536132812, -215.40594482421875, -156.64122009277344, -129.64373779296875, 763.1103515625, 110.45414733886719, 10.356361389160156, 272.1781921386719, 918.58447265625, 279.77972412109375, -102.00255584716797, 862.31298828125, -277.7548522949219, 1223.4613037109375, 664.2929077148438, 850.275634765625, 805.9254760742188, 461.88519287109375, 890.23974609375, 309.8215026855469, 1259.843994140625, -239.86671447753906, -4.480583190917969, 446.3971862792969, 470.2625732421875, -782.6096801757812, 711.0505981445312, 527.110107421875, 765.5849609375, 912.4254760742188, -1588.97607421875, 753.432373046875, 220.19384765625, 280.5586242675781, 479.3079528808594, 776.2587280273438, 53.721923828125, 787.4147338867188, -60.346656799316406, 156.65484619140625, 60.77191925048828, 115.80828094482422, 720.3748779296875, 285.4281921386719, -277.0827941894531, 559.5274047851562, 122.67684936523438, 1078.0635986328125, -289.4101867675781, 120.6552963256836, -56.50489807128906, 390.6654052734375, -353.34613037109375, 969.6035766601562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000546.npy"} +{"epoch": 0.801762114537445, "step": 547, "batch_size": 64, "mean": 391.1941223144531, "std": 549.2232055664062, "min": -1126.350830078125, "p10": -135.22178726196287, "median": 389.0903625488281, "p90": 1002.0400756835937, "max": 2130.188720703125, "pos_frac": 0.765625, "sample": [542.8841552734375, 61.00525665283203, -1126.350830078125, 1004.90966796875, -43.75886154174805, 22.733673095703125, 326.3190002441406, 692.802978515625, 180.770751953125, 423.7643737792969, 155.60089111328125, 1689.767578125, 38.845035552978516, 1769.9881591796875, -50.88517761230469, -126.55823516845703, 532.93896484375, 710.0376586914062, -165.16734313964844, 995.3443603515625, -54.75603485107422, 77.58856964111328, 460.11163330078125, 260.33953857421875, 611.92822265625, -214.0562744140625, 578.0662231445312, 240.0568084716797, 216.91188049316406, 377.5715026855469, 945.9178466796875, -138.9347381591797, 448.5862731933594, 669.5656127929688, 597.1087036132812, 450.54095458984375, -122.42576599121094, 866.6150512695312, 400.6092224121094, 39.43914794921875, 693.8662719726562, 2130.188720703125, 662.9603881835938, -573.7457275390625, -256.7665710449219, 22.441612243652344, -358.093994140625, 1146.4019775390625, 95.45109558105469, 562.1854248046875, 1512.497802734375, 235.6078338623047, 534.7398681640625, 430.11370849609375, 213.44288635253906, 317.16558837890625, 775.7453002929688, -62.8289794921875, 400.76922607421875, -73.44943237304688, 726.451171875, 1214.918701171875, 420.9427490234375, -80.35883331298828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000547.npy"} +{"epoch": 0.8032305433186491, "step": 548, "batch_size": 64, "mean": 356.83355712890625, "std": 596.674560546875, "min": -966.30224609375, "p10": -230.15341491699218, "median": 298.69114685058594, "p90": 872.4000305175782, "max": 2886.806884765625, "pos_frac": 0.75, "sample": [-15.185836791992188, 876.478759765625, 309.90374755859375, 335.5822448730469, -109.27625274658203, 358.4631652832031, -966.30224609375, 1326.7025146484375, 426.88775634765625, 396.9737854003906, 421.2369384765625, 544.6804809570312, 1102.2501220703125, 233.05145263671875, 730.9867553710938, -234.22607421875, -130.29603576660156, 356.5706787109375, 897.7694091796875, 204.38577270507812, 862.8829956054688, 286.8012390136719, 204.2794189453125, -395.83502197265625, 485.3228454589844, 65.94405364990234, 173.94337463378906, 333.41143798828125, 287.4785461425781, 167.7042999267578, -335.8299255371094, 2886.806884765625, 792.196044921875, 229.46484375, 121.22412109375, -478.05963134765625, -72.34742736816406, 684.8429565429688, -220.65054321289062, 2219.790771484375, 41.104793548583984, -61.22793197631836, -283.62628173828125, 473.576416015625, -117.31069946289062, 69.94326782226562, 474.58575439453125, 141.4427490234375, 387.8700256347656, -241.24972534179688, 480.48126220703125, 396.9397277832031, -22.326372146606445, 1935.27197265625, 416.66168212890625, 474.165283203125, 107.52540588378906, -15.696075439453125, 311.70208740234375, 267.8664855957031, 55.717185974121094, 561.47607421875, 819.5535278320312, 796.89111328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000548.npy"} +{"epoch": 0.8046989720998532, "step": 549, "batch_size": 64, "mean": 274.5236511230469, "std": 545.8773193359375, "min": -1029.5499267578125, "p10": -273.37737121582023, "median": 166.56822204589844, "p90": 982.442205810547, "max": 1722.9912109375, "pos_frac": 0.671875, "sample": [288.0493469238281, 850.3916625976562, 88.43263244628906, -47.085548400878906, 117.13615417480469, 67.29150390625, -153.6485137939453, 507.50799560546875, 713.4424438476562, 988.0467529296875, -192.4073028564453, 1319.751708984375, 17.125526428222656, 164.91299438476562, -94.22787475585938, -328.2698974609375, 969.3649291992188, -339.45294189453125, -174.79595947265625, -788.941162109375, 624.6365966796875, 1079.8182373046875, -311.88311767578125, 316.27618408203125, 348.7323913574219, 286.0274353027344, 439.27825927734375, -48.69951629638672, 763.3671875, 943.51025390625, 31.82318115234375, 233.41392517089844, 277.8026123046875, -213.10745239257812, 398.1512145996094, -1029.5499267578125, 123.1797866821289, 599.1412353515625, 74.06331634521484, 14.407978057861328, -41.064239501953125, -17.729576110839844, 338.828857421875, 729.0468139648438, 1281.3966064453125, -106.50617980957031, -172.8763885498047, 330.54302978515625, -299.20733642578125, 1662.1107177734375, 1416.4364013671875, -772.2379760742188, -71.51747131347656, 683.61767578125, 432.12554931640625, -129.57168579101562, 153.92822265625, 168.22344970703125, 609.9144287109375, 1722.9912109375, 411.1927490234375, 301.93511962890625, 142.2155303955078, -127.29679107666016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000549.npy"} +{"epoch": 0.8061674008810573, "step": 550, "batch_size": 64, "mean": 334.7535400390625, "std": 521.6289672851562, "min": -1098.598876953125, "p10": -275.55433349609365, "median": 326.0226593017578, "p90": 927.3974731445314, "max": 1976.849609375, "pos_frac": 0.8125, "sample": [392.3042907714844, 327.9276428222656, 1111.9212646484375, 385.7474060058594, 301.2383117675781, 798.7841186523438, 1040.18359375, 313.81817626953125, -148.82786560058594, 208.59605407714844, 581.6221313476562, 390.11865234375, 548.0469970703125, 149.49539184570312, 324.11767578125, 1816.4410400390625, 66.15764617919922, -315.5877990722656, 81.33815002441406, 32.915714263916016, -1098.598876953125, -6.4909210205078125, 483.691162109375, 875.9325561523438, -11.451370239257812, 944.5225219726562, 887.4390258789062, 112.95368194580078, 247.08331298828125, 102.06370544433594, 38.80682373046875, -646.4445190429688, 482.75537109375, -127.53438568115234, 387.2864685058594, 803.3765258789062, 68.02980041503906, 520.967041015625, 1976.849609375, 450.9201965332031, 253.0420379638672, 621.9276123046875, 139.24440002441406, 46.175994873046875, 1370.9317626953125, 395.7398681640625, 1066.9095458984375, 140.50794982910156, 196.81967163085938, 618.3230590820312, 436.4845275878906, 638.5109252929688, -644.363525390625, 540.2120971679688, -182.14291381835938, 375.1786193847656, 274.80120849609375, 512.1046142578125, 382.0002746582031, -392.60540771484375, 485.9368896484375, -360.33099365234375, -482.1448974609375, 92.44633483886719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000550.npy"} +{"epoch": 0.8076358296622613, "step": 551, "batch_size": 64, "mean": 542.4798583984375, "std": 538.1138916015625, "min": -470.8398742675781, "p10": -60.906350326538075, "median": 482.7066192626953, "p90": 1391.8273681640626, "max": 1931.3369140625, "pos_frac": 0.796875, "sample": [-5.835536956787109, -32.33473205566406, 426.9822692871094, 805.6148071289062, 681.63330078125, 620.0479125976562, -470.8398742675781, -8.444469451904297, 1394.7528076171875, -283.9488525390625, -22.46161651611328, 397.13739013671875, -64.91175842285156, 577.5811157226562, 584.0706176757812, 1612.8284912109375, 479.5343933105469, 342.946533203125, 581.6904907226562, 1781.1085205078125, 967.30126953125, 349.3470458984375, 515.884765625, 627.8111572265625, 311.20147705078125, 298.6707763671875, 983.3573608398438, 392.25299072265625, 657.7570190429688, 346.4442138671875, 369.1396789550781, -51.56039810180664, 1538.12744140625, 166.29034423828125, 481.89263916015625, 645.9073486328125, 773.9271850585938, 687.4267578125, 549.113037109375, 89.68016052246094, -178.08114624023438, 1057.6641845703125, 1385.0013427734375, 483.5205993652344, -33.29680252075195, 1567.206298828125, -295.1681213378906, -299.1285095214844, 1456.3692626953125, 854.3090209960938, -281.9283752441406, 152.45858764648438, 1382.500244140625, 876.17724609375, 477.241455078125, 581.7392578125, 744.5543212890625, 345.16278076171875, 445.79498291015625, 906.7120971679688, 162.14979553222656, 1931.3369140625, 520.44970703125, 378.84051513671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000551.npy"} +{"epoch": 0.8091042584434655, "step": 552, "batch_size": 64, "mean": 605.8823852539062, "std": 716.0411376953125, "min": -757.0923461914062, "p10": -129.67884063720697, "median": 531.0127563476562, "p90": 1423.697314453125, "max": 2670.122314453125, "pos_frac": 0.859375, "sample": [881.2468872070312, 77.87262725830078, 1016.5790405273438, 2626.0712890625, 764.180908203125, 1244.829833984375, 598.3406982421875, 1232.681884765625, 33.76068115234375, 559.301025390625, 672.8270263671875, 554.4649658203125, 168.1190948486328, 514.629150390625, 189.68174743652344, 693.84375, 267.4154968261719, 664.8392333984375, 1349.1031494140625, -409.4828796386719, 44.867156982421875, 704.269287109375, 535.4219970703125, 348.3677062988281, -757.0923461914062, 4.452507019042969, 37.11674499511719, 1561.4583740234375, 251.33970642089844, -293.8394775390625, 1428.8624267578125, -640.3737182617188, 845.485107421875, -56.59803009033203, 511.5560302734375, 147.1293182373047, 495.9451904296875, 2457.918212890625, 1309.0943603515625, 1932.508544921875, 586.5800170898438, -181.46090698242188, 568.7803344726562, 939.4346923828125, 683.107177734375, 2670.122314453125, 251.87371826171875, 349.58892822265625, 1411.6453857421875, 2255.468017578125, 526.603515625, 151.76971435546875, 453.186279296875, -332.6996765136719, -68.22467041015625, 439.0570373535156, -156.01634216308594, 135.7578887939453, 434.91357421875, 565.9283447265625, 641.5528564453125, 1058.111572265625, 772.544189453125, 50.65471649169922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000552.npy"} +{"epoch": 0.8105726872246696, "step": 553, "batch_size": 64, "mean": 303.1957702636719, "std": 568.080322265625, "min": -833.753662109375, "p10": -208.92323913574216, "median": 208.37925720214844, "p90": 988.7486877441406, "max": 2567.246826171875, "pos_frac": 0.765625, "sample": [2567.246826171875, 917.865966796875, -200.01235961914062, 494.6927795410156, 486.00506591796875, -125.12308502197266, 54.633697509765625, 9.662773132324219, 146.0420379638672, 262.5179443359375, 127.04949951171875, 251.29307556152344, 107.41413116455078, 101.48251342773438, -60.43018341064453, 262.115966796875, -546.1642456054688, 14.204864501953125, 354.24700927734375, -92.28292846679688, 249.30520629882812, 158.29151916503906, 345.8850402832031, -537.8062133789062, 555.9884643554688, -89.64503479003906, 1221.8543701171875, 81.57489013671875, 7.596015930175781, 281.5733642578125, -149.25904846191406, 454.6160583496094, -227.41522216796875, -149.89955139160156, 563.6068115234375, 321.13043212890625, 369.2000732421875, 160.389892578125, 989.93310546875, -79.2022933959961, 402.7666320800781, -833.753662109375, 569.103759765625, 509.8177185058594, 183.57943725585938, 168.802734375, -761.32861328125, 52.30560302734375, 512.0404052734375, 22.21813201904297, 1190.185546875, 12.416526794433594, 1276.4005126953125, 581.9348754882812, 903.6761474609375, 1135.335693359375, 233.1790771484375, 985.9850463867188, -576.0919799804688, 1784.781494140625, -212.7421875, 863.8297119140625, 90.62274932861328, 649.2848510742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000553.npy"} +{"epoch": 0.8120411160058737, "step": 554, "batch_size": 64, "mean": 426.5704345703125, "std": 703.3275756835938, "min": -1008.0779418945312, "p10": -473.4370300292968, "median": 389.1060791015625, "p90": 1416.9748535156255, "max": 2249.103759765625, "pos_frac": 0.765625, "sample": [187.1820526123047, -676.9273681640625, 815.5572509765625, 678.5550537109375, 232.51712036132812, 944.30615234375, 539.994873046875, 422.6651306152344, -404.5207824707031, 417.697265625, 2249.103759765625, 427.7083740234375, 368.36859130859375, 306.7127990722656, -222.6220245361328, 498.8492431640625, 582.6455078125, 541.9190673828125, 561.062744140625, -28.890281677246094, -703.8508911132812, -695.3194580078125, -502.9725646972656, 7.242332458496094, 2108.15380859375, -212.27490234375, 246.09097290039062, 410.9778747558594, 379.53326416015625, 918.8564453125, 575.6553955078125, 1457.38623046875, 1646.337890625, 180.35601806640625, 911.0230712890625, 377.1062927246094, 860.2811279296875, 13.55117416381836, 1599.0040283203125, -150.70372009277344, -62.266510009765625, 18.23540496826172, 1194.013427734375, 608.87744140625, 1026.5382080078125, 86.46940612792969, 313.694580078125, 1.719146728515625, 889.8173217773438, 792.45458984375, -1008.0779418945312, 684.64111328125, 254.90689086914062, -633.3596801757812, 1162.143798828125, -740.1514892578125, -386.3141174316406, 36.48737335205078, 398.67889404296875, 1322.681640625, 1763.3609619140625, 1826.0538330078125, 89.6520004272461, -208.0677490234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000554.npy"} +{"epoch": 0.8135095447870778, "step": 555, "batch_size": 64, "mean": 491.5250244140625, "std": 749.5938110351562, "min": -1247.889404296875, "p10": -207.0268829345703, "median": 458.08164978027344, "p90": 1569.1054443359376, "max": 2411.475830078125, "pos_frac": 0.796875, "sample": [587.963134765625, 440.31048583984375, 1453.67724609375, 299.6122131347656, 638.6954956054688, 1252.7481689453125, 488.55523681640625, 259.6259460449219, -167.19049072265625, 180.47787475585938, -162.55125427246094, 853.6185302734375, 785.9228515625, 488.2962646484375, 17.34112548828125, 67.57267761230469, 186.16238403320312, -219.59201049804688, 2411.475830078125, 1166.7889404296875, 165.0869140625, 2167.31640625, 49.00696563720703, 539.5457763671875, 538.1559448242188, 121.70846557617188, 843.1124267578125, 35.50422668457031, 494.9866027832031, 78.43223571777344, 691.1738891601562, 1002.980224609375, 598.0340576171875, 481.7334289550781, 19.863037109375, 170.5267791748047, 743.9686279296875, -579.636474609375, 129.06167602539062, -177.708251953125, -987.6138916015625, 1158.6375732421875, 1781.6968994140625, 2352.298583984375, -536.9508056640625, 476.7707824707031, 2004.0323486328125, 123.34678649902344, 475.8528137207031, 70.97821044921875, 1564.2139892578125, -536.7680053710938, 1571.2017822265625, -1247.889404296875, -117.65045928955078, -147.04922485351562, 1782.907470703125, 553.4312744140625, -151.22970581054688, 171.9832763671875, 1178.1494140625, 330.1108703613281, -225.5068817138672, 670.28369140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000555.npy"} +{"epoch": 0.8149779735682819, "step": 556, "batch_size": 64, "mean": 300.2801208496094, "std": 445.6376647949219, "min": -444.3118896484375, "p10": -230.37686462402343, "median": 254.6309814453125, "p90": 956.1118713378908, "max": 1415.3575439453125, "pos_frac": 0.71875, "sample": [1261.0096435546875, 310.663818359375, 116.89901733398438, -237.551513671875, 292.9202575683594, 142.28338623046875, 614.5283813476562, 411.5885925292969, -200.9176483154297, 328.0389709472656, 102.6393051147461, 737.6917724609375, 283.59844970703125, -261.2091979980469, 249.5394287109375, 507.97149658203125, 1148.375244140625, -31.9801025390625, 698.2818603515625, -128.3638916015625, -444.3118896484375, -90.68010711669922, -387.6736145019531, 148.75367736816406, -415.3643798828125, -170.29266357421875, 438.6305847167969, 3.4730682373046875, 896.7881469726562, -328.8447265625, 371.42376708984375, 373.77435302734375, 312.4430236816406, 110.17017364501953, 851.7890014648438, -68.1422348022461, 421.21734619140625, 198.5607147216797, 234.99234008789062, -32.713157653808594, -89.966064453125, 830.842529296875, 183.62745666503906, 123.94593048095703, -213.63601684570312, 501.29388427734375, 1056.001953125, 479.64019775390625, 984.3713989257812, 66.1999282836914, 238.37237548828125, -147.3919677734375, 917.1633911132812, 306.66650390625, 1365.5950927734375, -14.689384460449219, 972.8040771484375, 1415.3575439453125, 488.1686096191406, 436.87469482421875, 385.4909362792969, -330.9871826171875, 259.7225341796875, 232.45965576171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000556.npy"} +{"epoch": 0.8164464023494861, "step": 557, "batch_size": 64, "mean": 433.4267883300781, "std": 699.504150390625, "min": -1455.416748046875, "p10": -315.915869140625, "median": 321.45404052734375, "p90": 1316.5631835937502, "max": 2581.6357421875, "pos_frac": 0.765625, "sample": [-1455.416748046875, 78.06394958496094, -1026.26953125, 897.1573486328125, 342.06719970703125, 270.6610107421875, 115.94187927246094, 532.221923828125, 678.7201538085938, 96.17868041992188, 944.5020141601562, 1341.0029296875, 999.0634155273438, 55.4454345703125, 111.77191162109375, 300.84088134765625, 471.05780029296875, 849.3474731445312, -85.72247314453125, 100.79285430908203, 80.30702209472656, 1259.537109375, 1659.31591796875, 650.9685668945312, -18.55157470703125, 550.9232177734375, 1.6832275390625, -336.31390380859375, -222.1846923828125, 753.9906005859375, 150.34564208984375, 687.7049560546875, 1118.3509521484375, 470.3543701171875, 259.2738037109375, 223.7104034423828, 432.0481872558594, -564.4736938476562, 885.6127319335938, 569.6510620117188, 741.5371704101562, -319.23004150390625, 477.9671630859375, 135.4947967529297, 146.8380889892578, -488.19873046875, 744.236083984375, 1630.84814453125, -346.3308410644531, 2581.6357421875, -72.96220397949219, 2265.751708984375, 1596.11669921875, 155.14279174804688, 1068.204833984375, -19.260696411132812, -251.2509765625, 1441.9097900390625, -308.18280029296875, 1166.9512939453125, -66.19173431396484, 496.8206481933594, 447.586669921875, 284.1986083984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000557.npy"} +{"epoch": 0.8179148311306902, "step": 558, "batch_size": 64, "mean": 589.447265625, "std": 618.3509521484375, "min": -1575.6715087890625, "p10": -93.48960571289062, "median": 495.21099853515625, "p90": 1367.0212280273438, "max": 1856.903564453125, "pos_frac": 0.828125, "sample": [405.07208251953125, 1108.025146484375, 236.13243103027344, 1328.80517578125, 178.36306762695312, 339.9713439941406, 539.5537109375, 17.394790649414062, 905.7674560546875, -327.4811706542969, -146.09371948242188, 1196.115478515625, 1291.2784423828125, 1157.1751708984375, 471.42535400390625, 1149.3033447265625, 667.0209350585938, 320.9509582519531, 1727.9578857421875, 1601.885986328125, -239.24029541015625, 385.4955139160156, 347.583984375, 1272.570556640625, -30.69916534423828, 698.3583984375, 1383.3995361328125, 466.32879638671875, 1407.193359375, 525.078125, 385.4077453613281, 600.1517333984375, 950.7102661132812, 1243.5985107421875, 1387.7701416015625, 1015.781005859375, -1575.6715087890625, -95.66134643554688, 383.4581604003906, 142.85353088378906, 372.8977966308594, 5.134920120239258, 135.57786560058594, 404.33013916015625, 495.08746337890625, -66.68916320800781, -120.57368469238281, 1856.903564453125, 1140.9449462890625, -0.2213726043701172, 783.1199951171875, 1094.1383056640625, 626.0047607421875, 478.6346740722656, 1031.9716796875, -632.0043334960938, 771.008544921875, 330.8305969238281, 495.33453369140625, -88.42221069335938, 219.75238037109375, 1503.16162109375, 1005.058349609375, 1059.552734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000558.npy"} +{"epoch": 0.8193832599118943, "step": 559, "batch_size": 64, "mean": 497.17767333984375, "std": 637.0014038085938, "min": -827.7745361328125, "p10": -110.45960922241211, "median": 337.32310485839844, "p90": 1261.9763916015627, "max": 2372.28759765625, "pos_frac": 0.8125, "sample": [983.3463745117188, 1349.4493408203125, 662.4190673828125, 675.33056640625, 124.23108673095703, 50.322906494140625, 147.16725158691406, 171.8334503173828, 1290.920166015625, 2372.28759765625, 1178.38330078125, 404.189697265625, 1109.72509765625, 52.041107177734375, 114.87332153320312, 776.459716796875, 1019.5462646484375, 1383.83935546875, -804.301025390625, -401.11749267578125, 1194.44091796875, 327.2183532714844, 218.45736694335938, 440.3839111328125, 94.31318664550781, 1191.51220703125, -141.69711303710938, -56.23785400390625, 1148.2791748046875, 244.37689208984375, -19.99614715576172, 319.3388671875, 114.9411392211914, 26.70893096923828, 180.29185485839844, -110.80839538574219, -29.61568832397461, -33.41997528076172, 238.46568298339844, 171.54800415039062, 1927.126953125, 559.6919555664062, 1019.81494140625, 107.99942779541016, -827.7745361328125, 757.6871948242188, 738.795654296875, 595.9775390625, 1125.798095703125, -109.6457748413086, 526.3001098632812, 270.28472900390625, 347.4278564453125, 1105.043701171875, 1505.9007568359375, 1820.27783203125, -423.37335205078125, 780.5863647460938, 1038.1446533203125, 466.21893310546875, 38.84485626220703, 73.25572204589844, -543.4700317382812, 739.0069580078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000559.npy"} +{"epoch": 0.8208516886930984, "step": 560, "batch_size": 64, "mean": 357.69287109375, "std": 565.0054931640625, "min": -996.400634765625, "p10": -245.65267333984374, "median": 303.09552001953125, "p90": 1133.5769165039064, "max": 2097.720947265625, "pos_frac": 0.734375, "sample": [2097.720947265625, -325.7869873046875, -95.31339263916016, 1066.47509765625, -394.8238830566406, 651.9838256835938, 46.444725036621094, -141.86663818359375, 78.88043212890625, 374.4868469238281, 240.1785888671875, 166.73007202148438, 235.3343505859375, 533.409423828125, 1427.1424560546875, -244.77630615234375, 754.884765625, 1275.8941650390625, 508.66534423828125, 1018.2033081054688, 764.4158935546875, -79.945068359375, 190.16567993164062, 1298.2156982421875, 890.7634887695312, -610.0862426757812, 9.581363677978516, 1058.928955078125, 461.539794921875, -167.87799072265625, 802.56689453125, -203.1394805908203, 25.875200271606445, 554.131103515625, 523.3094482421875, -474.00537109375, 108.58338165283203, 1343.7366943359375, 259.5227966308594, 519.6653442382812, 517.1514892578125, -148.9063720703125, 454.9129333496094, 212.02902221679688, 590.9077758789062, 1270.9249267578125, -121.27659606933594, -29.97148895263672, -105.65669250488281, 1162.3348388671875, 597.5421752929688, 215.13980102539062, 592.54345703125, 231.05320739746094, 364.58709716796875, 346.6682434082031, -246.02825927734375, 499.47454833984375, 776.7636108398438, 630.0510864257812, -668.5167236328125, 102.12609100341797, -996.400634765625, 95.07427978515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000560.npy"} +{"epoch": 0.8223201174743024, "step": 561, "batch_size": 64, "mean": 290.80755615234375, "std": 626.5330200195312, "min": -2158.138427734375, "p10": -301.5412231445312, "median": 241.5041961669922, "p90": 1074.555163574219, "max": 1797.16650390625, "pos_frac": 0.734375, "sample": [615.8914184570312, 433.43890380859375, 164.72933959960938, 26.20345115661621, -965.4077758789062, 1351.2918701171875, 755.2005004882812, 61.89152526855469, 604.3289184570312, 1099.5650634765625, 150.4874267578125, 499.8882751464844, 1797.16650390625, -192.80177307128906, 208.43368530273438, 181.66714477539062, 139.87884521484375, -192.43435668945312, 332.8135986328125, 428.0658264160156, 1250.7113037109375, 194.05337524414062, 536.4509887695312, 385.2052917480469, -273.8639221191406, 150.75962829589844, 807.1964111328125, 348.3386535644531, -55.89008331298828, 1612.5487060546875, 765.86181640625, 1228.4613037109375, 0.7635574340820312, 256.0184020996094, 906.917236328125, -564.5177612304688, -313.4029235839844, 485.80975341796875, 20.54621124267578, 619.972900390625, 156.4613800048828, -395.6934509277344, -181.5498046875, 738.0253295898438, 477.9676208496094, -152.6214599609375, 234.28814697265625, 783.8654174804688, 1016.19873046875, 248.72024536132812, -2158.138427734375, 1536.8876953125, 93.73872375488281, -142.76373291015625, 432.389404296875, -110.00994873046875, 433.54498291015625, -41.59745788574219, 637.7008056640625, 329.4111328125, -776.0850830078125, 149.53848266601562, -391.5860900878906, -169.24929809570312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000561.npy"} +{"epoch": 0.8237885462555066, "step": 562, "batch_size": 64, "mean": 415.5235290527344, "std": 612.984619140625, "min": -1267.4881591796875, "p10": -214.06683807373045, "median": 381.84637451171875, "p90": 1015.738592529297, "max": 2623.953857421875, "pos_frac": 0.78125, "sample": [613.6571655273438, 2623.953857421875, 1377.3408203125, 515.568359375, 132.01229858398438, 396.4083557128906, 766.8854370117188, 278.8541259765625, 928.9521484375, 731.796142578125, 762.6229248046875, 909.1941528320312, 228.53414916992188, 498.9238586425781, 1074.3541259765625, -129.22727966308594, 249.65289306640625, 901.14453125, 205.92665100097656, 417.2326354980469, 182.1884002685547, 547.1585083007812, 612.2214965820312, -284.4851379394531, 28.61266326904297, 838.145263671875, 1054.0753173828125, -53.32965087890625, 1020.980224609375, 242.7193603515625, -225.5654296875, -199.01806640625, -92.73805236816406, 736.126708984375, -148.4315185546875, 2329.312255859375, -205.40411376953125, 458.006591796875, 630.9888916015625, 986.3609008789062, 703.9898071289062, 218.09104919433594, 647.3856201171875, 259.4293518066406, -1267.4881591796875, 367.2843933105469, -217.77943420410156, 863.4542236328125, 428.6602783203125, 1003.5081176757812, 14.892276763916016, -67.01170349121094, 116.8498306274414, 74.80516052246094, 583.6929931640625, 343.60955810546875, -597.1651000976562, 28.26569175720215, 145.9051971435547, -565.93603515625, 100.52760314941406, -457.06201171875, 1101.2603759765625, 822.623291015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000562.npy"} +{"epoch": 0.8252569750367107, "step": 563, "batch_size": 64, "mean": 322.9136657714844, "std": 727.1387939453125, "min": -2543.427978515625, "p10": -507.6020172119141, "median": 361.9321594238281, "p90": 1049.5878112792973, "max": 1800.484619140625, "pos_frac": 0.78125, "sample": [195.35293579101562, -2543.427978515625, -658.5095825195312, 428.5937805175781, 744.9075317382812, 90.1689453125, 1367.1435546875, 39.749420166015625, 311.71234130859375, 622.2017822265625, 908.3131103515625, 185.2857666015625, -523.1466674804688, 874.764892578125, 810.2653198242188, 690.27490234375, 108.17829895019531, 1800.484619140625, 760.2151489257812, -179.48081970214844, 649.120849609375, 142.7103271484375, -198.00332641601562, 325.611328125, 952.0043334960938, 4.299396514892578, 721.4122314453125, 325.92755126953125, 1218.3294677734375, 945.7196044921875, 127.53182983398438, 681.7351684570312, -1707.1668701171875, 562.5423583984375, -114.81983947753906, 137.2037353515625, 43.38467788696289, 699.8319702148438, 551.8800048828125, 358.964111328125, 925.1865234375, 580.4298706054688, 1286.978759765625, 292.09588623046875, 1091.4093017578125, -214.047607421875, -1291.4541015625, 1786.34228515625, -497.39752197265625, 1423.6688232421875, -23.12728500366211, 722.1444702148438, 80.96558380126953, -651.5816650390625, 764.866943359375, -511.9753723144531, 467.97747802734375, -276.892822265625, 91.08616638183594, 565.6778564453125, 364.90020751953125, 399.0166320800781, 268.71929931640625, 560.21923828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000563.npy"} +{"epoch": 0.8267254038179148, "step": 564, "batch_size": 64, "mean": 394.5144348144531, "std": 520.6907958984375, "min": -521.251220703125, "p10": -221.63592987060545, "median": 312.4113311767578, "p90": 1096.330364990235, "max": 1982.6839599609375, "pos_frac": 0.796875, "sample": [358.36639404296875, 505.9044494628906, 768.9666748046875, 162.2593231201172, 636.3931884765625, 577.0772705078125, 674.14111328125, -299.3273010253906, 267.1108093261719, -521.251220703125, 518.1236572265625, 738.4266967773438, -297.51678466796875, 339.2051086425781, 946.8407592773438, 33.424072265625, -172.43238830566406, 1299.3521728515625, 1160.3973388671875, 472.2520751953125, 314.1167297363281, 267.352783203125, 189.8993377685547, -203.91526794433594, -442.12835693359375, 354.7521057128906, -229.23049926757812, 180.36270141601562, 23.019378662109375, -397.58966064453125, 336.94635009765625, 1792.7388916015625, 707.8428344726562, -103.20543670654297, 40.677040100097656, 359.42401123046875, 151.5451202392578, 129.77682495117188, 375.947021484375, -37.11881637573242, 605.29296875, 1982.6839599609375, -56.919677734375, 641.2166748046875, 775.7149047851562, 762.8629150390625, 1352.9329833984375, 777.1563720703125, 1359.8720703125, 310.7059326171875, 12.285881042480469, 657.2208862304688, 44.661659240722656, 85.62626647949219, 125.35047912597656, 296.95599365234375, 233.19911193847656, 870.415283203125, 309.9841003417969, -78.07313537597656, 1373.392578125, -272.2886962890625, 879.606689453125, 220.1390380859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000564.npy"} +{"epoch": 0.8281938325991189, "step": 565, "batch_size": 64, "mean": 425.871826171875, "std": 733.3487548828125, "min": -1156.9530029296875, "p10": -442.4836364746093, "median": 270.57435607910156, "p90": 1231.387341308594, "max": 2558.281005859375, "pos_frac": 0.703125, "sample": [75.28916931152344, 1181.79833984375, 1308.11572265625, -1156.9530029296875, 240.15008544921875, 864.7788696289062, 2077.96240234375, -37.83174133300781, -164.12818908691406, -336.58868408203125, 697.368408203125, 1068.8970947265625, -274.7757568359375, -931.8466796875, 223.24322509765625, -487.8671875, -492.0749206542969, -65.56683349609375, -98.73779296875, 467.7093200683594, 1121.5511474609375, 152.12289428710938, 214.13308715820312, 1503.8272705078125, -561.5771484375, 9.830940246582031, 189.18701171875, 283.8932189941406, -606.6202392578125, 999.4319458007812, 633.6184692382812, 258.8524169921875, 603.7007446289062, 825.87548828125, 760.7320556640625, -2.3686065673828125, 883.15380859375, 597.4627075195312, -273.6283264160156, -108.80590057373047, -73.337646484375, 971.1555786132812, 734.2279663085938, 1811.6168212890625, 888.0651245117188, 2558.281005859375, 533.6365966796875, 2299.6005859375, 1179.17138671875, -513.7391357421875, -183.4615478515625, 12.933822631835938, 911.5269775390625, 493.7231140136719, 857.148193359375, 282.2962951660156, 1085.064208984375, 305.3200988769531, 102.1541748046875, 31.994163513183594, -297.19512939453125, 250.62673950195312, 119.03251647949219, 1252.6397705078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000565.npy"} +{"epoch": 0.8296622613803231, "step": 566, "batch_size": 64, "mean": 476.88720703125, "std": 758.7432250976562, "min": -1359.807373046875, "p10": -376.5286499023438, "median": 543.9501037597656, "p90": 1471.5723022460943, "max": 2248.64990234375, "pos_frac": 0.703125, "sample": [-217.802734375, -790.6260986328125, 634.6210327148438, 2049.458251953125, 499.8834533691406, -116.77511596679688, 763.507080078125, 225.90145874023438, -1013.7245483398438, 540.7199096679688, 606.8499755859375, -243.5937957763672, -24.527374267578125, 678.3674926757812, 929.6915893554688, 424.60833740234375, 713.0186767578125, 81.45323181152344, 11.807575225830078, -1359.807373046875, 1003.4382934570312, 659.2338256835938, 547.1802978515625, 41.41656494140625, 1309.3543701171875, -561.819580078125, 499.44146728515625, 1072.969970703125, -91.51036071777344, 1135.3765869140625, 832.5238647460938, 1046.8336181640625, 697.649658203125, 101.98365783691406, 1050.00390625, -302.6352844238281, -110.51602172851562, 1949.1251220703125, 1003.1403198242188, -189.8208770751953, -371.37811279296875, 812.3526611328125, 599.7073364257812, 1518.453857421875, 1362.1820068359375, 1789.284912109375, 2248.64990234375, 628.2542724609375, -920.0093383789062, 408.8258056640625, -636.0216064453125, 1660.858154296875, 805.7726440429688, 1539.776123046875, -63.213279724121094, -378.73602294921875, 1103.0872802734375, 386.82452392578125, 1151.8720703125, -371.17547607421875, -1.3901290893554688, 27.5699462890625, 653.9216918945312, 478.9113464355469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000566.npy"} +{"epoch": 0.8311306901615272, "step": 567, "batch_size": 64, "mean": 364.84234619140625, "std": 479.86114501953125, "min": -1061.8560791015625, "p10": -82.09771308898922, "median": 295.373046875, "p90": 977.8046325683595, "max": 1804.6904296875, "pos_frac": 0.875, "sample": [144.08807373046875, 45.420799255371094, 1283.1480712890625, 602.94921875, 386.96295166015625, 600.962158203125, 137.29998779296875, -99.67569732666016, 1040.1136474609375, 175.13967895507812, 985.4866943359375, 89.10858917236328, 1082.17138671875, 280.8533935546875, 118.07839965820312, 280.71478271484375, 1247.504638671875, 588.5369262695312, 604.0741577148438, 45.2228889465332, 603.8561401367188, 191.95223999023438, -317.6310729980469, 617.4744262695312, 307.94146728515625, 658.6990356445312, 730.9236450195312, 1480.10888671875, 12.26411247253418, 663.7965087890625, 39.65129852294922, 300.27386474609375, 397.36407470703125, -1061.8560791015625, 272.365966796875, -41.08241653442383, 39.75784683227539, 753.9652099609375, 1804.6904296875, 796.4879150390625, 176.88400268554688, -836.6469116210938, 261.08782958984375, 121.1862564086914, 237.82801818847656, 959.8798217773438, 290.47222900390625, 303.4580078125, 422.90130615234375, 382.796630859375, 141.90159606933594, 866.526123046875, 22.52819061279297, 459.7042236328125, -158.08949279785156, 284.4071044921875, 111.5892105102539, 318.0274658203125, -115.7198257446289, 250.38507080078125, 415.81207275390625, 537.8176879882812, -472.74871826171875, 478.75634765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000567.npy"} +{"epoch": 0.8325991189427313, "step": 568, "batch_size": 64, "mean": 481.43841552734375, "std": 534.9915771484375, "min": -841.20751953125, "p10": -83.31899223327635, "median": 384.1775360107422, "p90": 1135.2046264648438, "max": 1940.77099609375, "pos_frac": 0.875, "sample": [695.5301513671875, -757.6051025390625, 1264.94873046875, 924.10205078125, 494.59136962890625, 355.8132629394531, 610.6548461914062, 732.345703125, 386.5599060058594, -841.20751953125, -267.3951416015625, -125.4667739868164, 582.6368408203125, 1834.2989501953125, 211.94210815429688, 510.8014831542969, 230.69808959960938, 171.48638916015625, 213.03660583496094, -93.14503479003906, 556.40576171875, 374.3148498535156, 973.9144287109375, 988.6926879882812, 566.7821044921875, 312.89263916015625, 125.92330169677734, 1940.77099609375, 273.8937072753906, 359.4218444824219, 867.3018798828125, 653.2177734375, 1140.0499267578125, 660.8331909179688, 1513.4569091796875, 409.9891052246094, 205.69456481933594, 620.5978393554688, 300.0257873535156, 861.8280029296875, 1022.9963989257812, 61.87314224243164, 756.4208984375, 893.9119873046875, 80.22441101074219, 352.66485595703125, 142.56637573242188, -60.39155960083008, 348.48431396484375, 1123.89892578125, 74.0719223022461, 1258.244140625, 381.795166015625, -633.645263671875, 234.2515869140625, -345.905517578125, 1212.24560546875, 1017.4331665039062, 214.04208374023438, 622.9640502929688, 172.7412109375, 756.938232421875, 141.07086181640625, 142.5283660888672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000568.npy"} +{"epoch": 0.8340675477239354, "step": 569, "batch_size": 64, "mean": 409.6417236328125, "std": 701.2659301757812, "min": -1879.828857421875, "p10": -260.9833984375, "median": 337.5604705810547, "p90": 1336.0948730468751, "max": 2278.959716796875, "pos_frac": 0.78125, "sample": [-578.4137573242188, 54.675567626953125, 1883.68994140625, 187.74777221679688, 496.430419921875, -227.63475036621094, 715.1581420898438, 132.44656372070312, -513.5473022460938, 250.1419219970703, 805.8899536132812, 1355.64697265625, 503.05126953125, 551.7141723632812, 375.56298828125, -627.390869140625, 247.0337677001953, 933.3366088867188, -258.3299865722656, -28.069726943969727, -1299.63232421875, 1687.2889404296875, 127.34591674804688, -229.16680908203125, 56.6011962890625, 678.4440307617188, 1071.5565185546875, 69.94920349121094, 42.892120361328125, 99.157958984375, 1355.3057861328125, 899.385498046875, 631.053466796875, -31.934173583984375, 1342.795654296875, 403.82135009765625, 84.75689697265625, 366.2422790527344, 776.3237915039062, -1879.828857421875, 570.6248779296875, 166.63258361816406, -2.849367141723633, 1125.398681640625, 903.0336303710938, 140.81649780273438, 883.4136352539062, 152.6683349609375, -262.1205749511719, 748.294677734375, -341.0252685546875, 1810.72998046875, 68.09087371826172, 486.5356140136719, 1004.17041015625, -29.56775665283203, 137.07269287109375, 677.570556640625, 308.878662109375, 168.83282470703125, 676.3486328125, 712.6041259765625, 1320.459716796875, 2278.959716796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000569.npy"} +{"epoch": 0.8355359765051396, "step": 570, "batch_size": 64, "mean": 374.2774658203125, "std": 598.9781494140625, "min": -901.3948974609375, "p10": -237.0114028930664, "median": 291.8476867675781, "p90": 1206.1352050781256, "max": 2597.140380859375, "pos_frac": 0.75, "sample": [315.7825927734375, 1070.4232177734375, 409.52978515625, 1534.0987548828125, 2597.140380859375, 119.66828918457031, 758.6392211914062, 1392.405029296875, 1500.0836181640625, 297.18267822265625, 354.30413818359375, 985.4174194335938, 251.56161499023438, 489.50390625, 26.35011863708496, 445.7933349609375, 861.9533081054688, 431.77862548828125, 768.7467041015625, -205.66416931152344, 772.7699584960938, -246.90554809570312, -565.1295166015625, -737.1978759765625, 142.52801513671875, 909.3728637695312, -532.8028564453125, -221.03518676757812, 298.455078125, 448.89727783203125, 518.013916015625, 296.4080810546875, -182.9464111328125, -452.33587646484375, 229.93914794921875, 261.95172119140625, 235.97703552246094, 458.1348876953125, 79.83940887451172, 1264.2974853515625, 514.72216796875, -196.27545166015625, 225.71676635742188, 191.12982177734375, 1276.310791015625, 287.28729248046875, 229.69947814941406, 714.0625, 1050.472412109375, 760.6368408203125, -80.09114074707031, 210.15097045898438, 557.272705078125, 403.22021484375, -66.19849395751953, -901.3948974609375, 273.6090087890625, -25.963180541992188, 160.4578399658203, -147.58294677734375, -174.14974975585938, 1369.2020263671875, 182.39242553710938, -243.8583526611328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000570.npy"} +{"epoch": 0.8370044052863436, "step": 571, "batch_size": 64, "mean": 441.9400939941406, "std": 676.9304809570312, "min": -1198.64501953125, "p10": -393.48218383789055, "median": 426.2853546142578, "p90": 1314.4791259765625, "max": 2164.349365234375, "pos_frac": 0.78125, "sample": [560.3220825195312, 756.334228515625, 125.10853576660156, 878.177001953125, -221.01943969726562, 546.2217407226562, 70.96754455566406, 963.042724609375, 407.89617919921875, 1118.7274169921875, 1998.022216796875, -418.4427185058594, -255.92282104492188, 1627.4737548828125, 198.0320281982422, 540.9692993164062, 814.8438720703125, 767.069091796875, 444.6745300292969, 778.9146118164062, 2164.349365234375, 1805.2451171875, 725.2761840820312, 1141.830078125, -335.2409362792969, 36.7720947265625, 772.2911987304688, 370.8477783203125, 1451.2557373046875, 1170.7362060546875, 253.68235778808594, -103.18426513671875, 385.9552307128906, 1654.54541015625, 898.470703125, -96.69358825683594, -558.354248046875, 151.25466918945312, 1290.3157958984375, 532.29931640625, 850.789794921875, -452.4691467285156, 1324.8348388671875, 548.42138671875, 453.92138671875, 95.98603057861328, 579.8519897460938, 192.3850860595703, 675.3731079101562, -33.67079162597656, -554.9570922851562, -1198.64501953125, 309.052001953125, 70.72232055664062, -713.4501342773438, 312.97857666015625, 213.52537536621094, 497.9919128417969, -916.7510986328125, 62.05192184448242, -160.63143920898438, 612.4307861328125, 23.352081298828125, 78.00732421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000571.npy"} +{"epoch": 0.8384728340675477, "step": 572, "batch_size": 64, "mean": 461.7014465332031, "std": 763.6243896484375, "min": -1400.8536376953125, "p10": -313.31953125, "median": 371.12451171875, "p90": 1419.4121582031255, "max": 3114.997314453125, "pos_frac": 0.71875, "sample": [-288.4643859863281, -172.5117950439453, -325.90411376953125, 527.278076171875, 1879.8599853515625, 26.68596649169922, 74.90310668945312, -2.7791366577148438, -24.76003646850586, 205.47232055664062, 11.954366683959961, 1297.25634765625, 877.7000732421875, 333.78271484375, 593.1782836914062, 1159.8497314453125, 966.074462890625, 1229.1873779296875, 1471.7646484375, 649.677978515625, 262.94256591796875, 743.022705078125, -67.67192077636719, 1078.5723876953125, -945.9639282226562, 106.76856994628906, 485.35504150390625, -314.899658203125, -259.4750671386719, 927.9746704101562, -499.3311767578125, 529.977783203125, 467.63702392578125, 36.869049072265625, 254.51571655273438, 3114.997314453125, 93.88127899169922, 384.1043395996094, -301.79461669921875, -616.8578491210938, 1537.0238037109375, 131.5619659423828, -309.632568359375, 576.319580078125, -218.76516723632812, -410.3190612792969, 1172.1829833984375, 803.3600463867188, 750.2965087890625, -13.108833312988281, 883.8756103515625, 318.96588134765625, 392.9934997558594, 1988.080810546875, 197.90866088867188, 358.1446838378906, 633.6065673828125, 2108.855224609375, -16.3323974609375, -1400.8536376953125, 626.2842407226562, 1130.481201171875, 1671.9586181640625, 665.1743774414062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000572.npy"} +{"epoch": 0.8399412628487518, "step": 573, "batch_size": 64, "mean": 397.1726379394531, "std": 571.3529052734375, "min": -683.7924194335938, "p10": -162.93608703613282, "median": 307.73211669921875, "p90": 970.231756591797, "max": 2552.51513671875, "pos_frac": 0.78125, "sample": [662.5989379882812, -101.63604736328125, 9.878927230834961, -3.90899658203125, -21.443313598632812, 2552.51513671875, 915.3222045898438, -85.11620330810547, 164.07861328125, 201.5098114013672, 431.7277526855469, 251.15065002441406, -683.7924194335938, 2102.4599609375, 385.6981201171875, 509.7904052734375, 1771.2314453125, -406.69256591796875, 36.60053253173828, -161.75836181640625, -514.3322143554688, 1100.812255859375, 796.1416015625, 541.1318359375, 66.79999542236328, 172.05316162109375, 908.5421752929688, 999.0784301757812, 141.688232421875, 1168.0059814453125, -288.774169921875, 905.3262329101562, 347.9628601074219, 546.2111206054688, 616.08251953125, -326.0085754394531, 347.48046875, 84.62469482421875, 784.6591796875, 1.5769119262695312, 935.586669921875, 149.11105346679688, 291.53240966796875, -97.2298812866211, 374.9677429199219, 82.74308776855469, 323.93182373046875, -29.970775604248047, 904.9322509765625, -231.95855712890625, 526.148193359375, 144.92941284179688, 135.90203857421875, 750.5255126953125, 382.179443359375, 65.0398178100586, -163.44082641601562, 823.420166015625, 586.8304443359375, 544.9083251953125, 148.5570068359375, 731.7709350585938, 985.0796508789062, 124.27763366699219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000573.npy"} +{"epoch": 0.8414096916299559, "step": 574, "batch_size": 64, "mean": 435.4447021484375, "std": 592.907958984375, "min": -964.9228515625, "p10": -275.53297119140626, "median": 371.8797302246094, "p90": 1277.0200805664065, "max": 2011.835205078125, "pos_frac": 0.765625, "sample": [236.34158325195312, 770.7737426757812, -97.36581420898438, -157.781982421875, 1220.536865234375, 63.42481231689453, 112.4822998046875, 1342.5140380859375, 1051.1962890625, 752.2637939453125, 6.582328796386719, 1573.4635009765625, 1098.2789306640625, 844.1633911132812, 160.15792846679688, 673.7964477539062, 96.11516571044922, 433.34674072265625, -664.013427734375, 2011.835205078125, 553.274658203125, 640.0907592773438, 475.29913330078125, 654.3084716796875, 186.48236083984375, 749.4594116210938, 387.7563781738281, -390.9698486328125, 97.97563171386719, -262.0009765625, 166.64797973632812, -449.3101501464844, 239.108642578125, 1301.2271728515625, -79.96332550048828, 758.4920654296875, -502.0036315917969, 843.802490234375, 235.06394958496094, 544.8193359375, 944.9201049804688, -281.3323974609375, 746.9328002929688, 881.42431640625, 638.92138671875, 356.0030822753906, 459.7406921386719, -31.045791625976562, -964.9228515625, -313.2892761230469, 338.0817565917969, 113.84273529052734, -45.3514404296875, 1466.861328125, -6.810821533203125, 804.9599609375, 342.7672424316406, 1457.8109130859375, 168.4349822998047, 614.7279663085938, 90.02974700927734, 1755.359375, 658.5471801757812, -5.824575424194336], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000574.npy"} +{"epoch": 0.8428781204111601, "step": 575, "batch_size": 64, "mean": 408.8327941894531, "std": 692.0982055664062, "min": -1272.97509765625, "p10": -320.3971405029297, "median": 415.3527374267578, "p90": 1332.0375366210942, "max": 2188.193603515625, "pos_frac": 0.765625, "sample": [1465.424560546875, 228.1717071533203, 814.9505004882812, 420.80548095703125, 551.3151245117188, -888.767822265625, 591.6483154296875, 499.8295593261719, 221.30471801757812, 157.1644744873047, 41.98218536376953, -41.273963928222656, 409.8999938964844, 1070.9566650390625, 586.661865234375, 321.6205749511719, -132.82443237304688, 796.3422241210938, 233.2353057861328, 314.25885009765625, 365.1969909667969, 80.4813232421875, 1372.2579345703125, 643.0272827148438, 611.4755859375, 376.2348327636719, -1247.865478515625, 290.3222351074219, -740.42041015625, 567.1141357421875, -927.0393676757812, 1238.18994140625, 879.6681518554688, 1385.8369140625, 260.66162109375, -145.3790740966797, -332.25958251953125, -66.8990707397461, 1396.57275390625, 1031.083984375, -221.46014404296875, 246.032470703125, -292.7181091308594, 1030.09912109375, 1121.748291015625, -825.1250610351562, 520.4028930664062, 548.7919921875, 833.9404907226562, 2188.193603515625, 533.103271484375, 940.91162109375, 436.1822814941406, -28.29052734375, 1541.2655029296875, 137.87576293945312, 576.900390625, -1272.97509765625, 2074.338134765625, 76.48139953613281, -173.65338134765625, 682.8416748046875, 139.5484161376953, 649.8966064453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000575.npy"} +{"epoch": 0.8443465491923642, "step": 576, "batch_size": 64, "mean": 310.19610595703125, "std": 582.2504272460938, "min": -868.3506469726562, "p10": -375.238232421875, "median": 207.0791244506836, "p90": 1057.5937866210938, "max": 1702.5860595703125, "pos_frac": 0.734375, "sample": [209.99623107910156, 1070.5836181640625, -105.47615051269531, -6.936397552490234, -381.5882568359375, -198.58961486816406, -173.29588317871094, 1662.43798828125, 895.5691528320312, 161.654052734375, 256.9646301269531, 1597.053955078125, 113.656005859375, 106.28175354003906, 779.0325317382812, 467.09991455078125, 418.6155700683594, -868.3506469726562, 288.5549011230469, 712.655029296875, 537.4078979492188, 1098.317626953125, 620.5294189453125, 829.294189453125, 16.948223114013672, 1425.828369140625, -99.71403503417969, -505.26727294921875, -189.78848266601562, 710.7003173828125, -252.6876678466797, 221.0395050048828, 49.127655029296875, 37.75945281982422, 880.8800048828125, 515.5045776367188, 1702.5860595703125, 283.92327880859375, 250.63153076171875, 721.8125, 62.59375762939453, 962.08642578125, 183.60623168945312, 339.9759521484375, 79.57881927490234, -821.0774536132812, -59.22923278808594, 689.94091796875, -622.3385009765625, 664.3286743164062, 416.3743896484375, 1141.4913330078125, 1027.2841796875, 927.1409301757812, -853.0106811523438, -393.6453552246094, 143.50177001953125, 192.08389282226562, 204.16201782226562, 129.7833251953125, -360.4215087890625, 147.24679565429688, -329.06439208984375, 119.40848541259766], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000576.npy"} +{"epoch": 0.8458149779735683, "step": 577, "batch_size": 64, "mean": 401.912109375, "std": 631.7845458984375, "min": -1452.9244384765625, "p10": -97.35993881225583, "median": 366.6878204345703, "p90": 1332.658288574219, "max": 2417.468505859375, "pos_frac": 0.84375, "sample": [-1452.9244384765625, 2417.468505859375, 1280.563232421875, 1603.3162841796875, 167.70355224609375, 30.611068725585938, 293.5157775878906, 167.2516326904297, 132.27825927734375, -60.777496337890625, 720.837158203125, 308.7969055175781, 563.520751953125, 437.71392822265625, 435.56707763671875, 728.2371215820312, 1006.522705078125, 1391.872314453125, -542.3311157226562, 652.0997314453125, 166.34364318847656, 42.701698303222656, -883.5936279296875, 190.3641357421875, 180.4694366455078, 693.6920166015625, 157.22628784179688, 273.78363037109375, 444.65509033203125, 405.77264404296875, 145.46994018554688, 468.100830078125, 281.4839172363281, -66.41008758544922, 1695.4322509765625, 52.893218994140625, 451.740234375, -584.7122192382812, 231.29786682128906, -369.4532470703125, 414.4211730957031, 189.31265258789062, 373.2763671875, 453.73150634765625, -37.45246124267578, 501.1947021484375, 67.82737731933594, 639.050048828125, 794.651611328125, 971.431884765625, 77.46156311035156, 360.0992736816406, 1354.9847412109375, -110.62416076660156, 1745.396484375, 437.8055419921875, 483.0414123535156, -654.9757080078125, 68.59803771972656, 1525.2301025390625, 19.826950073242188, 964.4496459960938, 436.1697692871094, 388.36669921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000577.npy"} +{"epoch": 0.8472834067547724, "step": 578, "batch_size": 64, "mean": 445.94696044921875, "std": 558.50341796875, "min": -841.18408203125, "p10": -159.86340026855467, "median": 380.1394500732422, "p90": 1199.9249755859382, "max": 1979.003662109375, "pos_frac": 0.8125, "sample": [1979.003662109375, 196.05105590820312, 790.473876953125, 108.0347900390625, 565.68994140625, 319.49176025390625, 1355.3612060546875, 489.0502014160156, 173.10775756835938, -175.7266082763672, 94.56291961669922, 907.5895385742188, 428.50909423828125, 253.466064453125, 169.84732055664062, -516.6857299804688, 131.19053649902344, 21.537376403808594, 1825.1153564453125, 870.5615234375, 720.1295776367188, 423.28948974609375, 1049.0308837890625, -95.38566589355469, 7.081901550292969, 843.8867797851562, 791.9429931640625, -105.58014678955078, -335.6471252441406, 136.49588012695312, -27.696701049804688, -426.81103515625, 360.0050048828125, 400.2738952636719, 161.3767852783203, 555.5060424804688, -225.795166015625, 265.83154296875, -841.18408203125, -131.27557373046875, 533.907470703125, 729.3858642578125, 473.73919677734375, 453.1185607910156, 1264.5938720703125, 941.7138671875, 298.4980163574219, 104.05093383789062, 839.4359130859375, 169.5694122314453, 809.067138671875, 842.8860473632812, 75.14305114746094, 1320.4613037109375, 845.3843994140625, 1671.4742431640625, -168.24301147460938, 301.1112060546875, 1477.373046875, 999.746826171875, 547.090087890625, -140.31097412109375, 95.40986633300781, 544.2922973632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000578.npy"} +{"epoch": 0.8487518355359766, "step": 579, "batch_size": 64, "mean": 268.92694091796875, "std": 599.6517333984375, "min": -1646.5748291015625, "p10": -428.4040008544921, "median": 297.35520935058594, "p90": 1009.3469482421879, "max": 2155.751953125, "pos_frac": 0.703125, "sample": [5.945762634277344, -157.0176239013672, -1646.5748291015625, 655.760009765625, -67.83059692382812, 132.76034545898438, 849.9083862304688, 326.7603454589844, 681.1786499023438, 400.8255615234375, -12.812446594238281, -140.3833465576172, -749.4246215820312, 446.69012451171875, 281.6227111816406, 639.7239990234375, 280.84698486328125, 499.50030517578125, 1099.477783203125, 265.67333984375, 1042.0567626953125, 94.62385559082031, 372.4610900878906, 507.9697265625, 2.866567611694336, 499.0860595703125, 362.16192626953125, 116.41373443603516, 565.935791015625, 260.55401611328125, 525.0176391601562, -473.97418212890625, 648.3856811523438, 678.7607421875, 2155.751953125, 1425.111083984375, 393.11572265625, -762.7191772460938, -56.95705032348633, 496.6056823730469, 1164.0098876953125, -322.0735778808594, 729.6248168945312, -290.76922607421875, 1274.8951416015625, -82.02229309082031, -28.899799346923828, -219.28968811035156, -631.258056640625, 304.27423095703125, 933.0240478515625, 492.3488464355469, -82.20674133300781, 562.8195190429688, 359.9783630371094, -831.4539794921875, 1086.4129638671875, 62.1593017578125, 263.41534423828125, -848.9281616210938, 198.19163513183594, -212.9134521484375, 290.4361877441406, 393.69061279296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000579.npy"} +{"epoch": 0.8502202643171806, "step": 580, "batch_size": 64, "mean": 503.0469055175781, "std": 693.0075073242188, "min": -1155.6800537109375, "p10": -94.24470062255858, "median": 340.78623962402344, "p90": 1265.5536376953125, "max": 2954.265380859375, "pos_frac": 0.828125, "sample": [-244.4249725341797, 1020.7371826171875, 302.07659912109375, 575.589111328125, -133.55279541015625, 194.16705322265625, 192.75401306152344, 2044.216064453125, 444.5812072753906, 268.38800048828125, 447.25164794921875, -79.240234375, 2954.265380859375, 193.54263305664062, 63.198978424072266, 690.9871215820312, 372.97406005859375, 96.48698425292969, 747.2269897460938, 554.5125732421875, 267.5425720214844, 1163.3719482421875, 1067.6842041015625, 633.605712890625, 544.9891357421875, 12.085456848144531, 1103.8895263671875, 1265.519287109375, 588.1012573242188, 146.64871215820312, 57.609031677246094, 158.35081481933594, 55.152305603027344, 97.81288146972656, 800.2908935546875, 1265.568359375, 48.76470947265625, -111.04325103759766, 934.2096557617188, 776.7854614257812, 1915.7325439453125, 10.298322677612305, -1155.6800537109375, 986.738525390625, -56.67975616455078, 772.6483764648438, -364.6560974121094, -100.67518615722656, 622.9638671875, 2734.79541015625, 0.7783985137939453, -53.173431396484375, 1430.1357421875, 105.17129516601562, 356.58001708984375, 412.2837829589844, -5.217079162597656, 337.0676574707031, 33.57793426513672, -359.5983581542969, 344.50482177734375, 959.8612670898438, 1354.742919921875, 330.122802734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000580.npy"} +{"epoch": 0.8516886930983847, "step": 581, "batch_size": 64, "mean": 353.0070495605469, "std": 632.11962890625, "min": -901.1304321289062, "p10": -459.7761596679687, "median": 336.74456787109375, "p90": 976.6237304687502, "max": 2950.917724609375, "pos_frac": 0.734375, "sample": [184.85494995117188, 104.01484680175781, 110.71713256835938, -98.58909606933594, 403.94970703125, 453.4881896972656, -441.6654052734375, 157.42962646484375, 925.58251953125, -590.0732421875, 776.5267944335938, 756.234130859375, 505.70245361328125, 372.41778564453125, 661.8291015625, -523.139892578125, 1403.912353515625, 879.4139404296875, 522.2703857421875, 1351.0777587890625, -183.97813415527344, 646.0755615234375, 122.54617309570312, 545.068115234375, 591.5284423828125, 579.5150146484375, 297.55078125, 204.17767333984375, 55.853515625, 998.49853515625, -314.379638671875, 2950.917724609375, -31.99148941040039, 1570.51611328125, 265.36328125, 451.98687744140625, 892.239501953125, -183.7192840576172, 73.1631851196289, 472.27972412109375, 334.18621826171875, 122.66891479492188, -70.0772705078125, -305.1396484375, 1179.305908203125, 242.4610137939453, 644.6657104492188, 897.2222900390625, 1344.1505126953125, 590.088134765625, -616.7476806640625, -901.1304321289062, -12.27316665649414, -559.3189697265625, 728.538330078125, 339.30291748046875, -462.830322265625, -637.029052734375, 112.00970458984375, -452.6497802734375, 380.8357849121094, 866.017822265625, 330.93505859375, 578.09375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000581.npy"} +{"epoch": 0.8531571218795888, "step": 582, "batch_size": 64, "mean": 392.15032958984375, "std": 563.015869140625, "min": -1041.5322265625, "p10": -204.11291198730467, "median": 312.8669738769531, "p90": 1104.6315673828126, "max": 1737.7562255859375, "pos_frac": 0.71875, "sample": [-1041.5322265625, 238.01596069335938, 1282.6036376953125, 281.6400146484375, 594.06591796875, -175.46047973632812, 292.6583251953125, 469.47308349609375, 452.4276123046875, -36.41733169555664, 458.8416748046875, 114.81914520263672, -38.30348205566406, 796.0736083984375, 820.768798828125, 1462.4500732421875, -58.04328918457031, -109.03941345214844, -292.55096435546875, 1152.51904296875, 425.61614990234375, 61.49214172363281, 573.9824829101562, -618.3802490234375, -272.369384765625, 909.24365234375, 205.56005859375, 1050.0196533203125, 1496.543701171875, 1737.7562255859375, 908.5615844726562, 333.8552551269531, 1055.2310791015625, -82.29337310791016, -312.6998291015625, 189.39938354492188, -16.787498474121094, 167.86354064941406, 620.68505859375, 629.7544555664062, 333.07562255859375, -193.80235290527344, 940.1326293945312, 1092.0257568359375, 0.2979888916015625, 1071.9140625, -0.5343399047851562, 772.3154907226562, -208.53172302246094, 256.38214111328125, -5.398509979248047, 779.794189453125, 1110.0340576171875, 382.16705322265625, -41.27268600463867, -961.2208862304688, 225.75338745117188, 263.5610656738281, 722.312744140625, 656.1356201171875, 1219.14208984375, 220.24085998535156, 679.6287231445312, 55.423194885253906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000582.npy"} +{"epoch": 0.8546255506607929, "step": 583, "batch_size": 64, "mean": 238.0078582763672, "std": 661.7612915039062, "min": -1336.7723388671875, "p10": -606.3111999511718, "median": 233.1663360595703, "p90": 941.5999694824219, "max": 2076.943603515625, "pos_frac": 0.71875, "sample": [109.26333618164062, 276.58074951171875, 336.3476867675781, -604.2377319335938, 922.77880859375, 47.81910705566406, 937.4224853515625, 288.44268798828125, 2076.943603515625, 165.08624267578125, -196.64947509765625, 57.72090148925781, -962.0894775390625, 367.91680908203125, 181.0342559814453, -242.08871459960938, 856.7924194335938, 933.7696533203125, 324.7459411621094, 1067.4537353515625, 399.197021484375, 66.0381851196289, -573.7160034179688, -398.6871337890625, -105.18354034423828, -916.23193359375, 321.335205078125, 232.0499267578125, -607.1998291015625, -742.7129516601562, -157.01316833496094, -212.69363403320312, 526.8339233398438, 572.63671875, 378.8276062011719, 2001.1141357421875, 429.0114440917969, 12.69476318359375, 943.3903198242188, 226.34097290039062, 1.94256591796875, 357.3329772949219, 354.2511291503906, -102.53091430664062, -1064.560546875, -1336.7723388671875, 1645.338623046875, 440.564453125, -376.1862487792969, 708.3173828125, 933.6756591796875, 1224.9659423828125, 129.826171875, 542.4600830078125, 741.5184326171875, -753.0209350585938, 106.70289611816406, 492.9165954589844, 76.8070068359375, 234.28274536132812, 1194.641845703125, 76.88726806640625, -17.89299774169922, 279.9491271972656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000583.npy"} +{"epoch": 0.856093979441997, "step": 584, "batch_size": 64, "mean": 487.2535705566406, "std": 585.6002197265625, "min": -647.8678588867188, "p10": -133.03034057617185, "median": 343.5148468017578, "p90": 1290.496142578125, "max": 2182.425048828125, "pos_frac": 0.796875, "sample": [602.0383911132812, 243.0557403564453, -325.84735107421875, 302.39031982421875, -107.25146484375, -407.3179931640625, 622.2150268554688, 469.64263916015625, 1135.406494140625, 59.99616622924805, -4.300537109375, -144.07843017578125, 116.279541015625, 217.48724365234375, 353.16217041015625, 1118.543212890625, -9.135562896728516, 1256.3846435546875, 924.188232421875, -450.2072448730469, 289.96270751953125, 569.3671875, -5.5470733642578125, 272.8885498046875, 363.086669921875, 172.05606079101562, 1320.22802734375, 183.75674438476562, 210.52542114257812, 228.27731323242188, 1109.98876953125, 570.8839111328125, 301.9228820800781, -647.8678588867188, 730.32177734375, 328.258056640625, 567.893798828125, 553.2552490234375, 337.4483337402344, -6.944061279296875, 481.86505126953125, -319.43011474609375, 349.58135986328125, 1643.2650146484375, 866.4600830078125, 295.64251708984375, 1072.35302734375, 1235.6195068359375, -244.47247314453125, 2182.425048828125, 1879.1805419921875, 29.758224487304688, 1594.329345703125, 1305.1153564453125, 398.57220458984375, 1610.4364013671875, 187.68519592285156, 681.2632446289062, -47.164756774902344, 287.3508605957031, 581.3255004882812, 421.22857666015625, 1110.549560546875, 158.87452697753906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000584.npy"} +{"epoch": 0.8575624082232012, "step": 585, "batch_size": 64, "mean": 452.44818115234375, "std": 746.1478881835938, "min": -1875.4425048828125, "p10": -348.4046447753906, "median": 441.4910583496094, "p90": 1375.2043090820318, "max": 2707.573486328125, "pos_frac": 0.734375, "sample": [185.75738525390625, -236.3157196044922, 666.908203125, 301.33056640625, 506.808837890625, 2707.573486328125, 996.4623413085938, -604.22265625, 441.1911315917969, 272.681396484375, 1034.9932861328125, -21.926132202148438, 665.0318603515625, 605.245361328125, -11.313697814941406, 1619.36572265625, 1427.9552001953125, 672.8873291015625, 1529.40673828125, 492.78240966796875, 443.0336608886719, 1252.118896484375, 266.15899658203125, 441.7909851074219, 1089.684326171875, -40.64460754394531, 823.0875854492188, 1100.358642578125, 675.8514404296875, 161.0645751953125, 98.25746154785156, 753.9592895507812, 1457.2451171875, 96.05126953125, 1059.5433349609375, 262.7998046875, 1056.767822265625, -297.2117614746094, -52.09294128417969, 910.8988647460938, 351.8919677734375, -1875.4425048828125, 887.6228637695312, -805.9228515625, 1072.360107421875, 461.80218505859375, 515.9515991210938, -451.31719970703125, 854.9090576171875, -327.6022033691406, -82.29803466796875, -1195.63623046875, -62.01051712036133, -357.3199768066406, 178.19850158691406, 1577.3817138671875, 395.2718505859375, -561.59033203125, 917.3430786132812, -34.2105712890625, 258.681884765625, 132.7008514404297, 168.50782775878906, 2126.085693359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000585.npy"} +{"epoch": 0.8590308370044053, "step": 586, "batch_size": 64, "mean": 298.210693359375, "std": 600.3308715820312, "min": -1280.1248779296875, "p10": -377.0206115722656, "median": 319.5325469970703, "p90": 1079.7588745117193, "max": 1570.41455078125, "pos_frac": 0.6875, "sample": [1196.69921875, -260.19573974609375, -262.3681640625, 551.408447265625, 649.0457763671875, 312.60711669921875, -293.5166320800781, 359.156494140625, 381.0548400878906, -51.435035705566406, -763.5840454101562, 326.4579772949219, 765.7117309570312, 1464.5875244140625, -41.708885192871094, -164.8194122314453, 613.271484375, -756.2418212890625, 198.10781860351562, 190.92564392089844, -197.89710998535156, 88.6294174194336, 1177.708251953125, 728.040283203125, 880.3999633789062, -49.33799743652344, 738.6968383789062, -241.6947021484375, 92.71794891357422, 523.772705078125, 756.1524047851562, -181.63021850585938, 212.3394012451172, -1280.1248779296875, 736.8369140625, 724.497314453125, 1476.142333984375, -397.16680908203125, 528.509033203125, -148.28500366210938, 918.5537109375, -489.44964599609375, 472.69586181640625, 446.5489501953125, -1041.1170654296875, -669.0013427734375, 379.4925842285156, 435.11285400390625, 217.43455505371094, 533.1828002929688, -330.0128173828125, 302.76544189453125, 1148.8468017578125, 1534.8275146484375, 899.1328125, 243.43211364746094, 354.681884765625, 1570.41455078125, 120.17445373535156, -78.58252716064453, 188.35894775390625, 781.3316040039062, 426.68450927734375, 136.507080078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000586.npy"} +{"epoch": 0.8604992657856094, "step": 587, "batch_size": 64, "mean": 420.68768310546875, "std": 573.68359375, "min": -1273.1795654296875, "p10": -154.7366714477539, "median": 384.8859100341797, "p90": 1244.9475708007815, "max": 2067.340087890625, "pos_frac": 0.765625, "sample": [587.356201171875, 940.435302734375, 405.90106201171875, 130.48681640625, 358.0956115722656, 507.1255798339844, 606.4796142578125, -74.57472229003906, -0.2239227294921875, 1479.4449462890625, -479.0216064453125, -127.68295288085938, 2067.340087890625, 363.2500915527344, 216.6103515625, 514.0966796875, -46.9764404296875, 535.3993530273438, 416.8178405761719, 382.64691162109375, -216.3422393798828, 639.21484375, 1411.256591796875, 19.23479652404785, 403.635986328125, 1197.1280517578125, -255.31434631347656, 996.7789306640625, 778.5142822265625, -6.3985748291015625, 126.43407440185547, 34.92876434326172, -456.91217041015625, 57.698455810546875, 717.9330444335938, 102.03242492675781, 216.00588989257812, -1273.1795654296875, 1003.9844360351562, 129.996337890625, -340.5869140625, 807.6329345703125, 215.35533142089844, 728.1591796875, -110.23789978027344, 1265.441650390625, 716.3204956054688, -145.96226501464844, 199.60696411132812, 370.1244201660156, 505.773681640625, 1647.13525390625, 392.2542724609375, 660.7649536132812, 539.32275390625, 598.0399169921875, 1035.01806640625, -158.49713134765625, 315.24407958984375, 54.217159271240234, 387.1249084472656, -21.85009765625, 1472.5467529296875, 1381.4259033203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000587.npy"} +{"epoch": 0.8619676945668135, "step": 588, "batch_size": 64, "mean": 353.66156005859375, "std": 525.8469848632812, "min": -974.0598754882812, "p10": -261.6598449707031, "median": 319.6382141113281, "p90": 1057.4864074707034, "max": 1416.2308349609375, "pos_frac": 0.734375, "sample": [192.3203887939453, 592.7849731445312, -217.84725952148438, 142.71893310546875, -14.372940063476562, 654.9677734375, 667.8260498046875, 10.659452438354492, 831.8129272460938, 755.2213134765625, 130.45306396484375, -465.2449951171875, -75.5028076171875, 592.4539184570312, -280.3705139160156, -471.9985656738281, 911.0780029296875, 502.2361755371094, 181.11175537109375, 279.635986328125, 651.895751953125, -139.41604614257812, -113.86161804199219, 1205.0941162109375, 173.43385314941406, 500.403076171875, 1376.9005126953125, -102.915283203125, -447.2550048828125, 120.95430755615234, 330.5023498535156, -50.13246154785156, 295.25286865234375, 1075.4364013671875, 967.8363647460938, 519.5198974609375, 180.11927795410156, -218.00161743164062, 1321.5595703125, 1015.6030883789062, 1249.9793701171875, 780.2433471679688, 90.73709106445312, -597.2705688476562, 660.1234741210938, 555.3759765625, 154.3287811279297, 932.027099609375, -974.0598754882812, 356.34747314453125, -12.135204315185547, 1416.2308349609375, 1187.025390625, 404.47021484375, 491.21124267578125, 628.8068237304688, 643.1632690429688, 308.7740783691406, -155.46934509277344, 174.61956787109375, 59.828880310058594, 340.6006164550781, 913.55615234375, -557.0473022460938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000588.npy"} +{"epoch": 0.8634361233480177, "step": 589, "batch_size": 64, "mean": 468.15966796875, "std": 589.9996337890625, "min": -880.173583984375, "p10": -215.08692932128903, "median": 435.29966735839844, "p90": 1320.538940429688, "max": 2122.156982421875, "pos_frac": 0.828125, "sample": [1658.906494140625, 272.1529541015625, 348.525390625, 40.16535568237305, 778.445556640625, 167.0176544189453, -138.63758850097656, 599.890625, 381.80914306640625, -233.17510986328125, -16.258773803710938, 462.7256774902344, 61.841636657714844, 274.39404296875, 652.51513671875, 1359.568603515625, 2122.156982421875, 178.93914794921875, 551.1355590820312, 295.2319030761719, 486.1594543457031, 171.1689453125, 159.9732208251953, 1544.8935546875, -29.09368896484375, 484.25048828125, 338.9229431152344, 677.5535278320312, 1684.8232421875, -179.778564453125, 637.3652954101562, 668.5306396484375, 925.3743286132812, 483.4635009765625, 1161.1751708984375, -230.21908569335938, -255.05633544921875, 48.751991271972656, 770.6853637695312, 158.0442352294922, 499.7440490722656, -653.5261840820312, 79.28189086914062, 778.6848754882812, 625.6103515625, 483.8693542480469, 1094.949462890625, 247.4552459716797, 597.412841796875, 1452.9044189453125, 551.4810791015625, 308.9523620605469, 407.8736572265625, -880.173583984375, 248.80406188964844, -723.3213500976562, 802.0732421875, 1561.910888671875, 32.94316101074219, 306.5724182128906, 1229.4697265625, 715.557373046875, -460.8287658691406, 1130.1796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000589.npy"} +{"epoch": 0.8649045521292217, "step": 590, "batch_size": 64, "mean": 457.78570556640625, "std": 582.0707397460938, "min": -454.372314453125, "p10": -238.45634460449216, "median": 326.36663818359375, "p90": 1287.8748657226565, "max": 2155.22509765625, "pos_frac": 0.734375, "sample": [593.4669189453125, 1578.2584228515625, 878.4427490234375, 309.9553527832031, 1082.9130859375, -365.83062744140625, 94.15106201171875, 2155.22509765625, 847.8182373046875, -184.43881225585938, 590.5, -294.38824462890625, 360.927001953125, 1436.0543212890625, -125.78399658203125, -76.58747863769531, -217.28085327148438, 128.86898803710938, 932.3525390625, 224.07562255859375, 131.41162109375, -325.6878662109375, -83.10067749023438, 296.57666015625, -153.77996826171875, 178.96484375, 732.4431762695312, -368.42608642578125, 1367.046142578125, -102.11214447021484, 982.7103271484375, 497.72283935546875, -12.289833068847656, 1414.781005859375, 257.5098571777344, 414.9232482910156, 330.28619384765625, 633.52978515625, 1217.8597412109375, 580.1300659179688, 479.7681884765625, -247.53155517578125, -94.43527221679688, -454.372314453125, 322.44708251953125, 66.92303466796875, 255.25453186035156, 931.500732421875, 261.28125, -347.4739990234375, 432.94317626953125, 1317.88134765625, 297.56610107421875, -44.73668670654297, 638.0040283203125, 640.0391235351562, 920.0006713867188, 1810.8388671875, 197.29898071289062, 1079.03955078125, 1057.853271484375, 903.0587158203125, 185.2517852783203, 750.686279296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000590.npy"} +{"epoch": 0.8663729809104258, "step": 591, "batch_size": 64, "mean": 417.65093994140625, "std": 582.2152709960938, "min": -1081.7244873046875, "p10": -377.7824157714843, "median": 432.16905212402344, "p90": 1134.827734375, "max": 1724.590087890625, "pos_frac": 0.84375, "sample": [8.009956359863281, 1134.5718994140625, 1359.617919921875, 652.950927734375, 1015.6839599609375, -116.66026306152344, 44.21397399902344, -1081.7244873046875, 561.0891723632812, 1724.590087890625, 1004.9178466796875, 176.5113983154297, 716.5609130859375, 395.8931884765625, 72.63845825195312, 1631.994384765625, 826.946044921875, 598.805908203125, 129.28695678710938, 245.95574951171875, -5.6857452392578125, 933.3368530273438, 136.5625762939453, 96.72319793701172, 585.1815795898438, 147.45828247070312, -728.4397583007812, 660.01953125, 561.0390625, 435.87725830078125, -306.03057861328125, 166.44091796875, 431.08001708984375, -714.8334350585938, 1166.7333984375, 174.56651306152344, 176.88296508789062, 127.968994140625, 659.3817749023438, -432.494140625, -408.533203125, 842.864501953125, 117.47090148925781, 433.2580871582031, -593.8621215820312, 1065.641357421875, 806.3736572265625, 96.72955322265625, 655.6398315429688, 1075.82666015625, 44.851280212402344, 1134.9373779296875, 1010.8515014648438, 140.6875, 1421.1328125, 459.63507080078125, 486.7720642089844, 1268.40234375, -700.7681274414062, 657.6240844726562, 167.0007781982422, 558.3663330078125, 334.78192138671875, 280.3529357910156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000591.npy"} +{"epoch": 0.8678414096916299, "step": 592, "batch_size": 64, "mean": 379.54296875, "std": 721.2313232421875, "min": -1352.449462890625, "p10": -556.92373046875, "median": 383.8751525878906, "p90": 1228.0900634765626, "max": 2063.617431640625, "pos_frac": 0.765625, "sample": [372.8094482421875, 1123.708740234375, -61.930633544921875, 458.59088134765625, -899.839111328125, 102.78395080566406, 503.2274475097656, 996.920654296875, 1785.061767578125, 1514.098388671875, -944.9180297851562, 2063.617431640625, 252.09915161132812, 854.39501953125, -1023.5765380859375, -551.8650512695312, -176.031982421875, 338.8658142089844, 130.96441650390625, 1258.032470703125, 117.60419464111328, 1082.9810791015625, 81.53128814697266, 1141.50390625, 443.3526306152344, 103.5950927734375, 206.64450073242188, 959.383056640625, 1613.202880859375, 556.4290161132812, 125.586181640625, -279.96990966796875, 208.86032104492188, 747.863525390625, 1197.650146484375, 305.00067138671875, -559.0917358398438, 379.9553527832031, -434.902587890625, 610.5882568359375, 519.9592895507812, -95.2370376586914, 740.3555908203125, 1609.1591796875, 324.8585205078125, -348.5829162597656, 531.419921875, -1352.449462890625, 873.994873046875, 387.7949523925781, 717.68896484375, 93.15541076660156, 796.0214233398438, 562.5377197265625, -891.438720703125, 961.5790405273438, 686.79541015625, 121.183349609375, 1241.1357421875, -254.44949340820312, -1176.4217529296875, 787.58203125, 723.9195556640625, 25.40778160095215], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000592.npy"} +{"epoch": 0.869309838472834, "step": 593, "batch_size": 64, "mean": 292.6455078125, "std": 635.1069946289062, "min": -1020.4239501953125, "p10": -473.6730285644531, "median": 191.64047241210938, "p90": 1237.0400756835938, "max": 1809.688232421875, "pos_frac": 0.65625, "sample": [-20.490966796875, 721.5218505859375, -278.78204345703125, 105.03228759765625, -626.57666015625, -494.72296142578125, 1530.9227294921875, -537.826416015625, 1237.503662109375, 604.0732421875, -212.749755859375, 1465.8995361328125, 1357.226318359375, 1034.69384765625, 28.94853973388672, -844.4835815429688, 1258.147216796875, 1605.5340576171875, 773.750732421875, 118.99443054199219, -424.5565185546875, 180.14141845703125, 368.9658203125, 78.2035903930664, 57.16666793823242, 1809.688232421875, 516.2894897460938, 131.3280029296875, 373.9190673828125, -856.5426635742188, 3.712331771850586, 544.865966796875, -333.4726867675781, 203.1395263671875, 719.7118530273438, 816.03125, -128.35031127929688, 1235.9583740234375, -245.53988647460938, 391.252197265625, -863.4348754882812, 658.8230590820312, -313.7656555175781, -1020.4239501953125, -19.911771774291992, -121.3004150390625, 5.194421768188477, -6.168159484863281, 506.7860412597656, 822.3096923828125, 527.5746459960938, 409.1116027832031, 893.2279052734375, 464.294677734375, 203.50469970703125, 837.7940673828125, -4.838653564453125, 40.76165008544922, -136.94354248046875, 652.4078369140625, -82.7076416015625, 467.68798828125, -207.3360595703125, 748.137939453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000593.npy"} +{"epoch": 0.8707782672540382, "step": 594, "batch_size": 64, "mean": 367.7850036621094, "std": 616.4564819335938, "min": -910.6281127929688, "p10": -411.0436370849609, "median": 317.8326721191406, "p90": 1196.2281494140625, "max": 1777.448486328125, "pos_frac": 0.734375, "sample": [1017.4017333984375, -59.595855712890625, 175.14576721191406, 69.14517211914062, -57.42686462402344, -581.5057983398438, 600.0457153320312, 428.33831787109375, 80.08695220947266, 663.8448486328125, 547.1144409179688, 372.0157165527344, 1777.448486328125, 735.939208984375, 141.55581665039062, 947.939453125, 729.2222290039062, 244.4634552001953, 1135.4344482421875, -465.2552795410156, -574.6304321289062, 1506.6859130859375, -910.6281127929688, 1766.1566162109375, -201.4341583251953, 579.5645751953125, -82.01387786865234, 353.3313903808594, 473.4219665527344, -147.64566040039062, 102.00019836425781, 428.5030517578125, 1367.864013671875, 556.5448608398438, 314.0784912109375, 570.4225463867188, 649.12890625, 29.04336929321289, 219.76370239257812, -743.1791381835938, -111.76596069335938, 944.0581665039062, 168.4257049560547, -260.4160461425781, 1181.7342529296875, 919.30126953125, 908.3939208984375, 593.2127075195312, -425.0493469238281, 476.216796875, -879.76318359375, 41.03038024902344, 252.45416259765625, -196.583251953125, 321.58685302734375, -42.49137496948242, 1202.4398193359375, 892.04541015625, 1605.958251953125, 214.79173278808594, 1209.606201171875, -378.3636474609375, 84.13446044921875, 58.946495056152344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000594.npy"} +{"epoch": 0.8722466960352423, "step": 595, "batch_size": 64, "mean": 404.41473388671875, "std": 577.3847045898438, "min": -541.853515625, "p10": -192.28812866210936, "median": 275.3995056152344, "p90": 1043.8217773437502, "max": 2623.65673828125, "pos_frac": 0.78125, "sample": [953.302490234375, 296.90753173828125, 602.3716430664062, 20.131261825561523, -5.921363830566406, 1061.57861328125, 370.90985107421875, 942.2910766601562, -272.145751953125, 284.12847900390625, 1552.9403076171875, 93.99537658691406, 1070.237060546875, 994.39453125, -203.35023498535156, 347.67962646484375, 336.5187683105469, 1798.876220703125, 232.90968322753906, -157.19509887695312, 115.900146484375, -239.39166259765625, 587.962646484375, 378.5167541503906, -271.550048828125, -256.8160705566406, 255.28842163085938, 109.89969635009766, 237.46241760253906, 296.6717224121094, 151.14080810546875, 206.01824951171875, 265.752685546875, 222.07757568359375, 1002.38916015625, 29.06950569152832, 232.11782836914062, 585.4307250976562, 1635.6298828125, 345.3748779296875, -27.110122680664062, -541.853515625, 491.2309875488281, -345.3625183105469, 367.5914001464844, 370.0890808105469, 266.6705322265625, -166.47654724121094, 717.1514892578125, 743.1280517578125, -42.51490020751953, 675.6220092773438, 101.24102020263672, -137.79750061035156, 417.62030029296875, 120.20106506347656, 946.3353881835938, 99.65989685058594, -130.3641357421875, 2.067554473876953, 1772.771240234375, 502.32757568359375, 847.1513061523438, 2623.65673828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000595.npy"} +{"epoch": 0.8737151248164464, "step": 596, "batch_size": 64, "mean": 296.4541015625, "std": 617.1494750976562, "min": -1073.6947021484375, "p10": -469.6156677246093, "median": 201.5656280517578, "p90": 1249.7732910156258, "max": 2032.6763916015625, "pos_frac": 0.734375, "sample": [-296.375732421875, 22.75445556640625, 675.090576171875, 55.98075866699219, 849.72998046875, 598.7153930664062, 200.46408081054688, 516.3522338867188, 266.3258972167969, 566.8270263671875, 2032.6763916015625, -66.91171264648438, -567.4591064453125, 464.22625732421875, 1034.71728515625, 1498.1697998046875, 144.98634338378906, 149.0124053955078, 492.6776428222656, 665.5847778320312, -161.79335021972656, 552.7933349609375, 81.15202331542969, 207.70704650878906, -492.6347351074219, 153.8571319580078, 607.8423461914062, 396.29351806640625, 260.73272705078125, 140.7297821044922, -390.26104736328125, 110.67964935302734, -598.20068359375, 1422.368408203125, 60.47850799560547, 1051.248779296875, 1470.409423828125, 568.8892211914062, -23.274477005004883, -87.8838119506836, 202.66717529296875, 548.9676513671875, 141.2019500732422, 355.3854675292969, -218.4068603515625, 336.677978515625, 1334.855224609375, 211.12132263183594, -298.9563293457031, -149.62977600097656, 1819.030517578125, 59.95734405517578, -1073.6947021484375, 184.53176879882812, 346.0568542480469, -648.1614379882812, 151.81227111816406, -630.8915405273438, 695.5557861328125, 57.995086669921875, 1511.018310546875, -415.9045104980469, 426.8732604980469, -609.6782836914062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000596.npy"} +{"epoch": 0.8751835535976505, "step": 597, "batch_size": 64, "mean": 506.2996826171875, "std": 706.5131225585938, "min": -1208.86181640625, "p10": -139.30214385986326, "median": 409.5483856201172, "p90": 1460.5961669921878, "max": 2420.082763671875, "pos_frac": 0.78125, "sample": [893.646484375, 593.830810546875, 1126.813232421875, 339.2828674316406, 468.82452392578125, 1534.0821533203125, -134.4252471923828, 1029.568603515625, 1485.183349609375, -396.39501953125, -50.331626892089844, -52.31353759765625, 259.1396789550781, 50.36715316772461, 777.82958984375, 108.96024322509766, 1290.513671875, 136.5440216064453, 502.1507873535156, 1754.060302734375, 446.9464111328125, 464.34515380859375, -16.660858154296875, 1046.840087890625, 244.6443328857422, 296.9124450683594, 38.33146286010742, 211.47581481933594, 54.03825378417969, -1040.7568359375, -1208.86181640625, -83.5982666015625, 1330.0147705078125, 411.54327392578125, 557.6253662109375, 2376.988525390625, 1268.1690673828125, -54.79209899902344, -442.5210266113281, 69.11266326904297, 695.8720092773438, 1289.7216796875, 87.6712417602539, 270.32861328125, 407.5534973144531, -141.39224243164062, 623.0213012695312, 960.241455078125, 61.94615173339844, 1403.22607421875, 173.76629638671875, -276.65093994140625, -235.888916015625, 2420.082763671875, 1886.2109375, 37.43095397949219, 593.603271484375, 483.849365234375, 731.2327880859375, 775.0724487304688, 113.49992370605469, 1576.70751953125, -13.945823669433594, 792.89208984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000597.npy"} +{"epoch": 0.8766519823788547, "step": 598, "batch_size": 64, "mean": 397.79205322265625, "std": 600.700927734375, "min": -1268.64697265625, "p10": -167.12311859130858, "median": 413.224853515625, "p90": 1153.1157958984377, "max": 2151.881103515625, "pos_frac": 0.796875, "sample": [505.92498779296875, 645.6026611328125, 367.9899597167969, 1050.4078369140625, -1268.64697265625, 384.6061096191406, 1289.9119873046875, 715.7299194335938, 179.43096923828125, 1181.8826904296875, 35.404937744140625, -675.8978271484375, -939.25634765625, 204.57130432128906, 1233.946533203125, -154.6655731201172, 703.43994140625, 736.2399291992188, 1227.7435302734375, 1923.4222412109375, 258.80419921875, 1085.9930419921875, 684.9552001953125, -326.994140625, 372.25482177734375, 460.7839050292969, 540.1650390625, 354.54949951171875, -956.1549682617188, 143.5821990966797, 647.0755615234375, 630.6931762695312, 538.8916625976562, -126.6283950805664, -504.10614013671875, 585.9325561523438, 579.71875, 1195.64697265625, 198.4126434326172, 321.95782470703125, 222.80032348632812, 207.53912353515625, -133.535400390625, 435.2254943847656, -56.134185791015625, 676.0458984375, 470.34307861328125, 482.57257080078125, 21.956342697143555, 110.85208892822266, 391.2242126464844, 2151.881103515625, 823.7029418945312, 825.5302124023438, 511.50262451171875, 285.7261047363281, 30.18212890625, 1006.3673095703125, 574.5138549804688, -172.46206665039062, 13.188522338867188, -132.69622802734375, 716.4052734375, -37.36314010620117], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000598.npy"} +{"epoch": 0.8781204111600588, "step": 599, "batch_size": 64, "mean": 336.5788879394531, "std": 522.909423828125, "min": -1972.6463623046875, "p10": -138.88133239746088, "median": 301.7605895996094, "p90": 1039.9950500488285, "max": 1639.525634765625, "pos_frac": 0.84375, "sample": [435.2416076660156, 966.5208129882812, 284.79168701171875, 491.77618408203125, 787.8282470703125, 121.27008056640625, 1639.525634765625, 285.02655029296875, 186.0487060546875, 394.7696533203125, 462.606201171875, -702.1502685546875, 6.606121063232422, 94.81317138671875, 12.076528549194336, 355.14892578125, 938.9642333984375, 1071.4840087890625, 518.4083251953125, 1319.3355712890625, 189.95172119140625, 580.0748291015625, 649.5970458984375, 485.2157287597656, 348.6092529296875, -19.416330337524414, 105.90540313720703, 220.18763732910156, 300.2301025390625, -1972.6463623046875, 611.5831298828125, 303.29107666015625, 548.728271484375, 198.30284118652344, 1083.550537109375, 705.1429443359375, 532.0853881835938, -94.69094848632812, 1173.0792236328125, 281.2596740722656, 475.6258544921875, 204.38717651367188, 915.7574462890625, -289.79876708984375, -27.649795532226562, 329.0362854003906, 513.1882934570312, 180.47293090820312, 334.7714538574219, 114.73251342773438, 1114.8568115234375, 252.61827087402344, 108.47364044189453, 15.475822448730469, -314.4839172363281, 10.371395111083984, 281.57611083984375, 396.7861328125, -326.69305419921875, 1216.79150390625, -157.820068359375, -407.64227294921875, 278.8836364746094, 421.1977844238281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000599.npy"} +{"epoch": 0.8795888399412628, "step": 600, "batch_size": 64, "mean": 324.20037841796875, "std": 617.0860595703125, "min": -1090.9415283203125, "p10": -512.173193359375, "median": 224.2927703857422, "p90": 1117.5945434570312, "max": 2093.43896484375, "pos_frac": 0.75, "sample": [124.2890396118164, 668.2526245117188, 621.9989624023438, -1090.9415283203125, 857.1494750976562, 561.8839111328125, -52.227962493896484, 554.4614868164062, -610.7509155273438, 508.9811096191406, 82.17253875732422, 1.3584861755371094, 989.1849365234375, -241.29629516601562, 294.43231201171875, 705.447509765625, -718.0370483398438, 1447.5224609375, 72.6039047241211, 346.43402099609375, 851.0516357421875, 986.6224365234375, -184.7368927001953, 285.311279296875, 32.95033264160156, -349.76885986328125, 742.1693115234375, 1090.3135986328125, 1129.286376953125, 877.7597045898438, 177.26461791992188, -15.680389404296875, 988.3028564453125, 46.37676239013672, 1155.0860595703125, 638.862548828125, 101.76560974121094, 252.61973571777344, -444.68975830078125, 1488.28125, 2093.43896484375, 217.60276794433594, -146.38047790527344, 58.44384002685547, 1447.197998046875, 166.96859741210938, 823.813232421875, 55.2473258972168, -749.3628540039062, 550.675537109375, 70.44038391113281, 1147.999755859375, 251.27239990234375, -60.68292999267578, 230.98277282714844, 828.0333862304688, -702.853759765625, 214.26638793945312, 23.439260482788086, -541.0946655273438, -603.622314453125, 478.236328125, -209.95925903320312, 132.65484619140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000600.npy"} +{"epoch": 0.8810572687224669, "step": 601, "batch_size": 64, "mean": 411.2587585449219, "std": 545.4044189453125, "min": -1081.9791259765625, "p10": -252.3417724609375, "median": 364.5184783935547, "p90": 1175.9253417968753, "max": 1725.138671875, "pos_frac": 0.78125, "sample": [738.671875, 1216.964599609375, 1725.138671875, 708.6356201171875, -100.35909271240234, 991.6216430664062, 363.4901428222656, 152.4652099609375, -255.64642333984375, 248.00836181640625, -244.63092041015625, -152.6100311279297, 1329.4010009765625, 419.93450927734375, 313.3651428222656, 698.8235473632812, 359.45062255859375, 627.32666015625, -271.8490905761719, 1362.5504150390625, 751.0856323242188, 625.5240478515625, 746.3798217773438, -124.58695983886719, 565.4600830078125, 209.34117126464844, -56.155364990234375, 556.3352661132812, 773.634033203125, 244.83541870117188, 387.0704040527344, 344.76568603515625, -937.39013671875, 662.8922119140625, -376.9301452636719, 535.5676879882812, 142.7657470703125, -62.32981872558594, 151.97756958007812, 271.5646057128906, 644.4591064453125, 792.9754638671875, 96.78063201904297, -1081.9791259765625, 1195.6224365234375, -530.8606567382812, 47.290313720703125, 1383.422607421875, 625.0029907226562, -279.064697265625, 819.8258056640625, 1006.976318359375, 149.47093200683594, 320.4667663574219, 210.14710998535156, 1031.961181640625, 323.5115051269531, 1338.1119384765625, 1129.9654541015625, 441.301025390625, -59.439247131347656, 248.56427001953125, 457.9429931640625, 365.54681396484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000601.npy"} +{"epoch": 0.882525697503671, "step": 602, "batch_size": 64, "mean": 502.82830810546875, "std": 547.1575317382812, "min": -502.1641540527344, "p10": -70.72713470458984, "median": 414.23191833496094, "p90": 1132.7218994140626, "max": 2308.622802734375, "pos_frac": 0.84375, "sample": [490.5845031738281, 159.12872314453125, -120.47627258300781, 308.3271484375, 44.972267150878906, 154.7093505859375, 976.0250854492188, 687.4931640625, 202.36720275878906, 245.38243103027344, 438.449951171875, 107.86195373535156, 673.0833129882812, 993.655029296875, 1146.9149169921875, 49.54969024658203, 493.599365234375, 443.734619140625, 390.0138854980469, 1227.117919921875, -17.288177490234375, 222.08184814453125, -35.21728515625, -502.1641540527344, 1099.6048583984375, 735.9557495117188, 344.884521484375, 494.4038391113281, 150.28143310546875, -73.43748474121094, 64.6153793334961, 984.8013916015625, 928.6312866210938, 143.95069885253906, 717.3374633789062, 1248.4324951171875, 253.89083862304688, 496.486328125, -465.4532775878906, -348.48974609375, 822.5199584960938, 1085.0030517578125, 766.1282958984375, 1022.90625, 792.090087890625, 886.9556884765625, 2308.622802734375, -139.2758331298828, 1206.9891357421875, -412.6666259765625, 935.2852783203125, 911.3963623046875, 228.18609619140625, 164.72689819335938, 1364.635498046875, -64.40298461914062, 311.26666259765625, 801.3735961914062, 154.81704711914062, 116.34799194335938, 796.8422241210938, 2063.60205078125, 298.8268127441406, 203.0312957763672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000602.npy"} +{"epoch": 0.8839941262848752, "step": 603, "batch_size": 64, "mean": 450.83575439453125, "std": 661.5194091796875, "min": -831.5108032226562, "p10": -364.47956237792965, "median": 362.2799072265625, "p90": 1307.265380859375, "max": 2432.076171875, "pos_frac": 0.734375, "sample": [-388.857666015625, -268.0185241699219, 378.355712890625, 43.220481872558594, 1313.8333740234375, -20.92230224609375, 1221.743896484375, 409.86981201171875, 801.01611328125, 1233.300048828125, -72.02618408203125, 53.739219665527344, -704.1395874023438, 845.8458251953125, 50.90873336791992, 433.2872619628906, 43.153076171875, 1470.7479248046875, 1932.858642578125, 537.4513549804688, 980.2394409179688, 836.9805297851562, 670.8883056640625, 64.62391662597656, 307.198974609375, 279.5506896972656, 456.71844482421875, 156.32669067382812, -59.80996322631836, 261.14434814453125, 1578.509521484375, -831.5108032226562, 1718.358642578125, -144.36929321289062, -9.555702209472656, 346.2041015625, 957.1509399414062, -342.769287109375, 721.1527709960938, 552.4209594726562, 916.1152954101562, -403.06756591796875, -97.00541687011719, -23.94852066040039, -132.74609375, 533.830078125, 724.1527709960938, 116.42350769042969, -373.7839660644531, 418.8948974609375, 489.70843505859375, 1291.9400634765625, 890.6121826171875, -431.12506103515625, 246.4976806640625, -399.1663818359375, 2432.076171875, 328.2256164550781, 1687.101806640625, 36.74420166015625, 826.7662353515625, 1075.35595703125, 885.0625610351562, 0.003849029541015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000603.npy"} +{"epoch": 0.8854625550660793, "step": 604, "batch_size": 64, "mean": 501.3348693847656, "std": 690.4627685546875, "min": -836.5044555664062, "p10": -264.22925720214846, "median": 433.0321960449219, "p90": 1590.6401489257814, "max": 2377.7607421875, "pos_frac": 0.734375, "sample": [-383.8493957519531, 192.1807861328125, -264.9084777832031, -149.2384490966797, 672.0595092773438, 981.5582275390625, 956.4500732421875, 1100.8385009765625, 893.2445678710938, 2310.53857421875, -836.5044555664062, 973.9596557617188, -47.78746032714844, -236.08157348632812, 907.727294921875, 1086.841552734375, 888.51123046875, 115.27340698242188, 396.3074951171875, 994.2548217773438, 177.35775756835938, -490.43121337890625, 116.98054504394531, 1624.8983154296875, 588.154296875, -693.3535766601562, 1728.421875, -262.6444091796875, 133.78256225585938, 212.454833984375, 7.366371154785156, 391.0645446777344, 566.4827880859375, 481.2610778808594, -78.34805297851562, 1667.3785400390625, 639.8219604492188, 35.46497344970703, -225.07945251464844, 843.85400390625, 584.9415283203125, 414.9312438964844, 509.8826599121094, 259.40673828125, 416.1139221191406, 2377.7607421875, -477.41595458984375, 1573.4697265625, 449.9504699707031, -56.92192077636719, 606.8930053710938, 1107.23876953125, 819.3297119140625, -77.93055725097656, 1862.2720947265625, 531.8185424804688, -373.261474609375, -74.96499633789062, 359.6096496582031, 714.1862182617188, -42.58868408203125, 588.01611328125, 1597.9989013671875, 398.4314880371094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000604.npy"} +{"epoch": 0.8869309838472834, "step": 605, "batch_size": 64, "mean": 365.2569885253906, "std": 620.79296875, "min": -1035.4881591796875, "p10": -287.6330841064453, "median": 256.8729934692383, "p90": 1082.4372436523443, "max": 2735.85302734375, "pos_frac": 0.75, "sample": [-57.23228454589844, -335.63006591796875, -22.6514892578125, 216.251953125, 71.0066146850586, 90.45259094238281, 845.8928833007812, 679.9901733398438, 882.27880859375, 242.7049560546875, 672.8538208007812, -56.653778076171875, 73.42962646484375, -306.106201171875, 440.3497009277344, 466.627197265625, 135.0741424560547, 824.8309936523438, 1550.9564208984375, -190.7967071533203, 2735.85302734375, 33.466033935546875, 589.87255859375, -29.455900192260742, 211.3767547607422, 965.0234375, -145.30511474609375, -292.6588439941406, 358.73468017578125, 329.580810546875, 77.11522674560547, -275.90631103515625, 879.4883422851562, -492.5296936035156, -169.94732666015625, 420.3753356933594, 1187.1771240234375, 88.78316497802734, 258.91265869140625, 750.1544189453125, 102.58372497558594, 2305.0693359375, -735.621337890625, -1035.4881591796875, 503.43817138671875, 426.4117431640625, -309.50616455078125, 1147.3304443359375, 93.65576934814453, 6.30925178527832, 497.1920471191406, 346.41607666015625, 1230.9727783203125, 244.93167114257812, 240.48741149902344, 317.29010009765625, 440.7279357910156, 1132.7574462890625, 452.9875183105469, 254.8333282470703, 950.65966796875, 376.9054870605469, -147.63775634765625, 830.0008544921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000605.npy"} +{"epoch": 0.8883994126284875, "step": 606, "batch_size": 64, "mean": 401.52313232421875, "std": 565.43896484375, "min": -569.9078369140625, "p10": -335.14705505371086, "median": 301.57373046875, "p90": 1083.7690063476566, "max": 2078.86962890625, "pos_frac": 0.765625, "sample": [-499.17071533203125, 113.17623901367188, 110.0853500366211, -358.5435485839844, 364.0718078613281, 1001.5360107421875, 239.12399291992188, 201.346923828125, 1160.3226318359375, 808.8602905273438, 176.4040069580078, 2078.86962890625, 890.35498046875, -85.6390380859375, -447.8006896972656, 232.90838623046875, 519.6370849609375, 247.53604125976562, 330.7175598144531, 1326.1552734375, 650.84912109375, -448.5374755859375, 1638.9859619140625, 21.691795349121094, 985.1436767578125, 625.7420654296875, 557.7347412109375, 174.05177307128906, -102.4946060180664, -280.55523681640625, 539.5078735351562, 715.75439453125, -418.6226806640625, 276.47210693359375, -499.3150329589844, 322.18389892578125, 200.10357666015625, 746.5303344726562, -115.89884185791016, -102.06184387207031, -569.9078369140625, -116.45417022705078, 1889.508544921875, 1119.01171875, 237.66403198242188, 656.658935546875, 36.18451690673828, 608.2791748046875, 872.911865234375, 356.21759033203125, -73.92668151855469, 842.3983154296875, -61.22352600097656, 244.25233459472656, 904.6841430664062, 1444.763671875, 272.3145751953125, 700.3281860351562, 26.38372039794922, 280.96356201171875, 609.799072265625, 580.0504760742188, 560.199951171875, 379.2005920410156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000606.npy"} +{"epoch": 0.8898678414096917, "step": 607, "batch_size": 64, "mean": 342.0454406738281, "std": 577.7899169921875, "min": -1162.990478515625, "p10": -234.09727478027344, "median": 310.7872619628906, "p90": 998.582232666016, "max": 2503.743896484375, "pos_frac": 0.796875, "sample": [474.059326171875, -104.46229553222656, 517.6988525390625, 242.56358337402344, 134.734619140625, 170.95814514160156, 1136.4833984375, 350.3511962890625, 2.556396484375, 1243.645751953125, -309.380615234375, 648.8860473632812, 442.3092346191406, -235.29721069335938, 100.93222045898438, 1039.48779296875, -614.3320922851562, 185.88031005859375, 308.0406494140625, 89.56781768798828, -172.7587890625, 146.7886505126953, 335.4808654785156, 482.4007568359375, -452.2463684082031, 609.1273803710938, -166.81951904296875, 1537.8538818359375, 735.2274169921875, 58.71918869018555, 641.8756103515625, 112.67530059814453, 735.0079956054688, 313.53387451171875, 495.56634521484375, 333.509521484375, 903.1359252929688, 18.096923828125, 1501.104736328125, 387.5562438964844, -988.7169189453125, 581.5763549804688, -231.29742431640625, 511.7252197265625, 734.4823608398438, 92.30810546875, -5.475730895996094, 156.75262451171875, -1162.990478515625, 260.71026611328125, 528.436767578125, 363.0108947753906, -177.94168090820312, 619.332763671875, 246.58273315429688, 78.51930236816406, 211.3023681640625, 2503.743896484375, 1493.8924560546875, 726.216552734375, 303.3755187988281, -370.25982666015625, 571.7153930664062, 463.3869934082031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000607.npy"} +{"epoch": 0.8913362701908958, "step": 608, "batch_size": 64, "mean": 400.59295654296875, "std": 503.2538146972656, "min": -489.6466369628906, "p10": -192.36787414550776, "median": 411.6615447998047, "p90": 943.5206481933595, "max": 1763.410888671875, "pos_frac": 0.78125, "sample": [732.4363403320312, 792.90966796875, 369.5085144042969, 529.8427734375, -489.6466369628906, 1574.451171875, -352.5489196777344, 150.8716583251953, -133.6028594970703, 669.3125, 770.9982299804688, 0.6926097869873047, 7.569068908691406, 694.7987670898438, -138.6513671875, 161.87530517578125, -440.6174011230469, 443.5408935546875, -100.92927551269531, 326.9819030761719, 498.1739501953125, 682.5678100585938, 87.81706237792969, 744.9830322265625, 101.7397232055664, 431.09344482421875, 697.2303466796875, 468.7112121582031, 957.4470825195312, -140.5523223876953, 58.347713470458984, -47.17436218261719, 250.96958923339844, 392.2296447753906, 700.7119140625, 272.19317626953125, 456.20843505859375, 465.661376953125, 1201.694580078125, -354.87310791015625, 1739.4039306640625, 253.6502685546875, 642.471923828125, 1099.9442138671875, 1240.3641357421875, 166.22576904296875, 84.6270751953125, 812.52099609375, -49.381752014160156, -214.5745391845703, 662.6527099609375, -1.0912399291992188, 549.6829833984375, 783.7323608398438, -302.3079528808594, 911.025634765625, -428.2005310058594, 66.79385375976562, 1763.410888671875, 745.0848999023438, 786.677001953125, 473.15008544921875, 235.52719116210938, 121.584716796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000608.npy"} +{"epoch": 0.8928046989720999, "step": 609, "batch_size": 64, "mean": 506.5679016113281, "std": 787.3883056640625, "min": -1195.086181640625, "p10": -276.63756866455066, "median": 431.0681610107422, "p90": 1230.3578369140628, "max": 3893.398193359375, "pos_frac": 0.796875, "sample": [133.62387084960938, 53.46540069580078, 109.09008026123047, -669.1119384765625, 222.81382751464844, 439.2563171386719, 1256.990234375, 525.6204223632812, 610.5444946289062, -404.53961181640625, 791.3964233398438, 1712.9637451171875, 104.45106506347656, 439.2138671875, 822.365478515625, -1195.086181640625, -131.45388793945312, 536.9012451171875, 922.9047241210938, 422.9224548339844, 775.8509521484375, -87.8424072265625, 717.9173583984375, 706.7376098632812, -379.7940673828125, 596.9168701171875, 1322.5006103515625, 235.41909790039062, -501.4877014160156, 682.9729614257812, 1609.234130859375, 708.6707763671875, 2484.779541015625, 517.6950073242188, 2741.432373046875, 985.9937744140625, -49.16132354736328, 741.34375, -78.00926971435547, 311.0040588378906, 306.9465637207031, 175.33973693847656, 825.9443359375, 3893.398193359375, 692.9315185546875, 99.70452880859375, 384.1425476074219, -137.61082458496094, -743.2090454101562, 180.47080993652344, 299.54119873046875, 294.6091613769531, 1082.6998291015625, 21.177993774414062, -3.8038253784179688, 1082.5458984375, 295.6608581542969, 671.7144165039062, 634.714111328125, 1168.215576171875, 260.8251953125, 490.39825439453125, -336.220458984375, 33.7038459777832], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000609.npy"} +{"epoch": 0.8942731277533039, "step": 610, "batch_size": 64, "mean": 320.3234558105469, "std": 583.2960815429688, "min": -1910.7852783203125, "p10": -386.4082977294922, "median": 288.99049377441406, "p90": 903.921667480469, "max": 1747.5306396484375, "pos_frac": 0.765625, "sample": [635.81298828125, 696.453369140625, 218.36854553222656, 189.2549285888672, -392.15234375, 725.2078857421875, 78.14106750488281, 597.1239013671875, 548.2081909179688, -503.13214111328125, 19.03339385986328, 530.5128173828125, -11.607109069824219, 1306.714599609375, 730.1431884765625, 382.0540771484375, 249.22042846679688, 188.894287109375, -566.292724609375, -1910.7852783203125, 577.3522338867188, 1747.5306396484375, 182.9849853515625, 1042.32470703125, -186.9803466796875, 371.424072265625, 175.59292602539062, -403.23663330078125, 704.4076538085938, 704.44873046875, 1560.6236572265625, 10.089065551757812, -1.796142578125, 713.3140869140625, 343.73223876953125, 1455.0706787109375, -373.0055236816406, 203.67092895507812, 328.5577087402344, 612.0339965820312, 666.4228515625, 417.14068603515625, -54.36823654174805, 743.7675170898438, 134.38876342773438, 227.85580444335938, 929.0647583007812, 136.88717651367188, -98.5546875, 288.40576171875, 34.92506408691406, 704.63720703125, 244.2660369873047, -95.58909606933594, 627.63525390625, 78.4269790649414, 342.1847229003906, 363.36016845703125, -159.94850158691406, 289.5752258300781, -459.4740905761719, -715.8232421875, 845.2544555664062, 1530.947265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000610.npy"} +{"epoch": 0.895741556534508, "step": 611, "batch_size": 64, "mean": 539.90380859375, "std": 750.0967407226562, "min": -786.6029052734375, "p10": -278.99327392578124, "median": 428.5040283203125, "p90": 1434.107958984375, "max": 2637.351806640625, "pos_frac": 0.765625, "sample": [1216.6851806640625, 424.1744689941406, 2585.72021484375, 1654.0096435546875, 1158.8975830078125, 567.433349609375, -68.53357696533203, 1324.323486328125, 34.350730895996094, 264.7084045410156, 319.8925476074219, 748.8709106445312, 16.479290008544922, -141.0272674560547, -774.497314453125, -283.5169677734375, -398.5435791015625, -786.6029052734375, 234.52822875976562, 1414.9671630859375, -711.6329956054688, 1248.72314453125, 2300.783935546875, -443.684814453125, 1442.3111572265625, 584.8079223632812, 54.481422424316406, 1054.5909423828125, 573.6869506835938, 152.7568817138672, -39.5628662109375, 1115.0626220703125, 429.87640380859375, -39.101234436035156, 427.13165283203125, 1114.4359130859375, 69.69256591796875, 171.7734375, -191.38833618164062, 281.4446105957031, 974.0927124023438, 357.1716613769531, 3.6814117431640625, 1752.3702392578125, 438.10382080078125, 499.39801025390625, -300.3877868652344, 544.30419921875, 1084.0419921875, -117.00394439697266, 639.117431640625, -268.43798828125, 245.33837890625, 496.8497314453125, 774.483154296875, 510.74468994140625, 358.8536682128906, 1713.5860595703125, 480.32000732421875, 1406.3597412109375, 1181.159912109375, -4.600837707519531, 2637.351806640625, 38.43734359741211], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000611.npy"} +{"epoch": 0.8972099853157122, "step": 612, "batch_size": 64, "mean": 408.61627197265625, "std": 560.021484375, "min": -818.4468383789062, "p10": -241.06194000244136, "median": 343.74322509765625, "p90": 1019.7358337402344, "max": 2348.142333984375, "pos_frac": 0.796875, "sample": [997.4669799804688, 270.1341857910156, 334.7679748535156, -414.3048400878906, -93.8505859375, 3.7978763580322266, 997.3853149414062, 289.1054992675781, 414.95501708984375, 432.53826904296875, -621.86962890625, 830.2635498046875, 126.51736450195312, 1014.75341796875, 851.95166015625, 1021.8711547851562, 592.161865234375, 259.83740234375, 522.1005249023438, 1188.16455078125, 855.5201416015625, 92.42630767822266, -818.4468383789062, 1277.0406494140625, 735.5875854492188, -255.457275390625, 799.3256225585938, -305.30010986328125, 583.6823120117188, 749.3851318359375, 540.6344604492188, -793.4696044921875, 136.32289123535156, 352.7184753417969, 174.67388916015625, -327.6884765625, 621.7327270507812, 401.6341857910156, 113.822998046875, -194.20166015625, 137.17701721191406, 174.0575714111328, 143.25509643554688, 193.66232299804688, 2348.142333984375, 39.439762115478516, 791.3021240234375, 985.2260131835938, 1203.6552734375, 240.82861328125, 130.2857208251953, 731.95947265625, 1388.7078857421875, 141.52145385742188, -47.70085144042969, -207.4728240966797, 526.7369384765625, -4.9988861083984375, 931.4917602539062, -157.91136169433594, 1123.7667236328125, 592.4022216796875, 904.8382568359375, 83.377197265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000612.npy"} +{"epoch": 0.8986784140969163, "step": 613, "batch_size": 64, "mean": 507.1754150390625, "std": 609.2411499023438, "min": -860.1251220703125, "p10": -38.087606811523415, "median": 449.3294982910156, "p90": 1239.3911132812502, "max": 2455.6318359375, "pos_frac": 0.84375, "sample": [594.0606689453125, 1260.07177734375, 1296.3511962890625, -152.0593719482422, 1079.9168701171875, 1191.13623046875, 28.214069366455078, 1076.1204833984375, 935.1968994140625, 23.90171241760254, 1499.2196044921875, 200.69985961914062, 618.2977905273438, 288.36712646484375, 1618.985595703125, -342.74932861328125, 5.4236907958984375, 400.54608154296875, 455.09210205078125, -47.33174133300781, 377.6143798828125, 95.97570037841797, 690.6640014648438, 665.51953125, 1120.9765625, 300.7166748046875, 1112.224609375, 1028.723388671875, -5.99066162109375, 575.0891723632812, 766.43994140625, 21.727630615234375, 10.711858749389648, 656.0686645507812, 443.56689453125, 1774.360107421875, 676.2818603515625, 705.7840576171875, 2455.6318359375, 3.838184356689453, -845.4503784179688, 1120.9405517578125, 682.2012329101562, 857.02294921875, 690.713134765625, -860.1251220703125, 127.84956359863281, 1506.33935546875, -1.0611610412597656, 238.9654541015625, 854.4072265625, -271.561767578125, -445.4597473144531, 169.67431640625, 1013.8145751953125, 533.8615112304688, 162.1612548828125, 343.8906555175781, 52.50102996826172, 641.1845703125, -16.517959594726562, 79.1946792602539, 223.65963745117188, 95.63581848144531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000613.npy"} +{"epoch": 0.9001468428781204, "step": 614, "batch_size": 64, "mean": 489.30645751953125, "std": 681.6434326171875, "min": -887.33056640625, "p10": -244.16184387207022, "median": 465.4331970214844, "p90": 1329.2904174804687, "max": 2588.90185546875, "pos_frac": 0.75, "sample": [207.60525512695312, 421.43182373046875, -590.4451904296875, 941.5919189453125, -110.69786071777344, 1319.906982421875, 21.939212799072266, 404.2627868652344, 550.0628662109375, -20.09023666381836, 1290.034912109375, 194.4571075439453, 516.851318359375, 887.9043579101562, 283.90032958984375, -167.87557983398438, 1389.366455078125, 742.312255859375, 148.3512420654297, 1.6247367858886719, 1083.598388671875, -16.873291015625, 709.8820190429688, 1001.888671875, -567.4324340820312, -113.45838165283203, 509.4345703125, -134.66970825195312, 614.7520751953125, -276.85595703125, 575.8195190429688, 1764.68603515625, -158.60955810546875, 804.9462280273438, -583.9814453125, -157.02764892578125, 719.0694580078125, -698.3860473632812, -5.98805046081543, 726.82861328125, 1561.355712890625, 389.5596008300781, 1597.227294921875, 311.9756164550781, 1066.2601318359375, 1091.9749755859375, 1282.358154296875, 309.385009765625, 660.9852905273438, 723.6709594726562, 3.7560043334960938, 333.1338806152344, 1333.3118896484375, 103.03155517578125, 656.1838989257812, 2588.90185546875, 1844.0360107421875, 121.59918212890625, -652.7322998046875, -887.33056640625, 1193.1990966796875, 157.70166015625, 528.5172119140625, 767.433349609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000614.npy"} +{"epoch": 0.9016152716593245, "step": 615, "batch_size": 64, "mean": 411.6612854003906, "std": 655.2713623046875, "min": -1777.503173828125, "p10": -238.64943695068357, "median": 268.02159118652344, "p90": 1090.75703125, "max": 2531.9345703125, "pos_frac": 0.828125, "sample": [151.34048461914062, 726.9489135742188, 168.39901733398438, 1431.7626953125, 254.40553283691406, 1261.7706298828125, -21.707748413085938, 5.903755187988281, 8.54327392578125, 99.32138061523438, 923.5103759765625, 270.060791015625, 248.44300842285156, -312.883056640625, 715.2901000976562, -504.9622497558594, 999.8927612304688, -1777.503173828125, 265.9823913574219, 400.94647216796875, 287.558837890625, 160.771484375, 1959.5391845703125, 143.26123046875, 71.02983856201172, 1015.5159912109375, -212.38656616210938, 449.0341796875, 563.3411865234375, 73.95462036132812, 409.69024658203125, 1017.4242553710938, 650.13427734375, 1061.830078125, 1321.6124267578125, 63.415992736816406, 103.54118347167969, 114.34921264648438, -517.5985107421875, -105.60968780517578, -720.7353515625, 1062.2474365234375, 1094.987060546875, 58.91596984863281, 1643.6085205078125, 534.7306518554688, 565.0786743164062, 1080.886962890625, 158.59283447265625, 460.9177551269531, 960.9036254882812, -249.9049530029297, 224.4571075439453, 10.105682373046875, 756.2160034179688, -386.7135314941406, 217.61529541015625, 433.96466064453125, 504.2506408691406, 815.8069458007812, -177.1867218017578, 2531.9345703125, 684.4345092773438, 135.33412170410156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000615.npy"} +{"epoch": 0.9030837004405287, "step": 616, "batch_size": 64, "mean": 334.37677001953125, "std": 578.4204711914062, "min": -823.6965942382812, "p10": -376.19680175781247, "median": 313.39817810058594, "p90": 1135.2195556640627, "max": 1737.42822265625, "pos_frac": 0.734375, "sample": [922.39208984375, 67.40362548828125, 142.88052368164062, 745.5133056640625, 545.6558837890625, -1.971944808959961, 259.3746337890625, -206.53451538085938, 350.5400390625, 1175.499267578125, 1375.1192626953125, 624.6495971679688, 158.04104614257812, 84.44633483886719, -730.3785400390625, 1152.5050048828125, 463.3286437988281, 276.2563171386719, 110.95812225341797, -123.2085189819336, 1496.619873046875, 573.5361328125, 511.9345397949219, 399.2583312988281, -366.4715270996094, -641.6398315429688, 550.7854614257812, 360.29443359375, -79.8786849975586, 1233.4359130859375, 1624.0482177734375, 157.56788635253906, -497.11553955078125, -6.131080627441406, 82.10606384277344, -823.6965942382812, -168.74472045898438, 240.85592651367188, 905.3576049804688, 633.5519409179688, 389.84173583984375, 370.85321044921875, 356.8626708984375, 568.0393676757812, -747.6434936523438, 1002.7584228515625, 142.1366729736328, -335.7039794921875, 135.20452880859375, 773.403564453125, 457.0076904296875, -380.3647766113281, 1737.42822265625, 357.592041015625, 725.49609375, 160.98919677734375, 243.27439880371094, 170.3175811767578, -314.315673828125, 630.6082153320312, 1065.3553466796875, -364.33966064453125, 1094.8868408203125, -417.7200622558594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000616.npy"} +{"epoch": 0.9045521292217328, "step": 617, "batch_size": 64, "mean": 241.14703369140625, "std": 576.4742431640625, "min": -1234.5537109375, "p10": -407.1669067382812, "median": 236.93768310546875, "p90": 780.1889160156253, "max": 2127.309814453125, "pos_frac": 0.75, "sample": [278.3953857421875, 203.82611083984375, -185.01834106445312, 303.6658935546875, 379.1107482910156, -250.72613525390625, 1.7092227935791016, -627.0670776367188, 659.4664916992188, 185.53416442871094, -286.6872253417969, 378.8561096191406, 369.3089904785156, 439.7112731933594, 207.88534545898438, 242.43075561523438, 475.17779541015625, 155.11680603027344, 63.80085754394531, 1383.23486328125, 669.3270263671875, 574.929443359375, 205.441650390625, 43.93834686279297, 421.4794616699219, 457.4638977050781, 93.0941390991211, -280.0281677246094, 332.1282958984375, -1234.5537109375, 83.48202514648438, 407.0497741699219, -378.02667236328125, -348.4054260253906, 683.3892822265625, 284.941650390625, 1128.109619140625, 811.6265869140625, 557.093505859375, 454.7384033203125, 536.8114013671875, -493.59967041015625, 842.1473999023438, 517.9638061523438, 1933.3126220703125, 440.53387451171875, 706.8343505859375, -647.00390625, 196.97567749023438, 589.3384399414062, -1172.1162109375, 172.95567321777344, 98.03862762451172, 210.21710205078125, 231.44461059570312, -112.82424926757812, 882.1236572265625, -151.8603515625, -34.28997039794922, -419.65557861328125, 2127.309814453125, 310.62628173828125, -778.5784912109375, 101.75479125976562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000617.npy"} +{"epoch": 0.9060205580029369, "step": 618, "batch_size": 64, "mean": 522.3434448242188, "std": 838.8261108398438, "min": -1727.7322998046875, "p10": -456.5450408935547, "median": 440.16827392578125, "p90": 1321.3440307617188, "max": 3020.472412109375, "pos_frac": 0.765625, "sample": [-1727.7322998046875, 849.544921875, 288.18377685546875, 278.4665832519531, 124.4007339477539, 254.7455291748047, 147.37237548828125, 67.49171447753906, 2392.373291015625, 883.5899047851562, 249.96470642089844, -893.4192504882812, 801.2232055664062, 213.1176300048828, 1243.54833984375, 313.7468566894531, -362.91510009765625, 1208.2171630859375, 720.7251586914062, -391.2889099121094, -183.337646484375, -462.0552062988281, -443.68798828125, 90.43913269042969, -796.9523315429688, -607.3505249023438, 102.29537200927734, 1334.5704345703125, 1032.845458984375, 122.63735961914062, 583.0867919921875, 440.7642822265625, 707.7798461914062, 991.15966796875, 979.6806640625, -928.2387084960938, -467.7392272949219, 1392.669677734375, 342.1920471191406, 528.2789306640625, 897.6275024414062, 1290.482421875, 972.305419921875, 2470.1806640625, 999.1846313476562, -87.19635772705078, 439.572265625, 659.7249145507812, 380.055419921875, 3020.472412109375, 856.5701904296875, -15.269296646118164, 1476.86669921875, 1228.9622802734375, 909.2271728515625, 490.9530029296875, -169.3470916748047, 1190.7579345703125, 2577.331298828125, -61.073394775390625, 427.2431640625, 1065.5706787109375, 599.8961181640625, 389.48870849609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000618.npy"} +{"epoch": 0.9074889867841409, "step": 619, "batch_size": 64, "mean": 400.05615234375, "std": 811.0994262695312, "min": -1132.9000244140625, "p10": -549.6720031738281, "median": 303.95091247558594, "p90": 1448.1886352539068, "max": 3109.224609375, "pos_frac": 0.671875, "sample": [-1017.5045166015625, -668.17431640625, 11.0892333984375, 1209.2410888671875, 1516.435791015625, -102.6094741821289, 342.5334167480469, 235.41580200195312, 313.40765380859375, -477.9174499511719, 593.9485473632812, 715.3480224609375, 431.35723876953125, 1783.7529296875, -678.1210327148438, 198.8665771484375, 1197.533935546875, 460.85626220703125, 1163.9422607421875, 1145.4378662109375, -823.524169921875, -733.0370483398438, 1274.3974609375, 1316.20556640625, -20.341079711914062, -481.82391357421875, -103.7030258178711, 558.3375244140625, 665.988037109375, -93.87542724609375, 99.58193969726562, 623.6165161132812, -395.17095947265625, 28.719970703125, 1503.46484375, -379.4341125488281, -57.21539306640625, -187.7476043701172, 798.0177001953125, 653.7457885742188, -53.31777572631836, 1696.59375, 593.1343383789062, 570.0443725585938, 1039.05419921875, 77.48104858398438, -194.8236846923828, 2603.99755859375, 1546.5592041015625, 311.0364990234375, 112.5700454711914, 204.15695190429688, 434.8150939941406, 3109.224609375, -1132.9000244140625, -578.749755859375, 650.2149658203125, -116.7957763671875, 1319.2108154296875, -31.61166763305664, 296.8653259277344, 155.98056030273438, 1.0006542205810547, 368.8121643066406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000619.npy"} +{"epoch": 0.908957415565345, "step": 620, "batch_size": 64, "mean": 380.65301513671875, "std": 610.8950805664062, "min": -1066.031982421875, "p10": -257.8278106689453, "median": 354.47142028808594, "p90": 1252.6873291015627, "max": 1934.740234375, "pos_frac": 0.75, "sample": [396.3836364746094, -336.3392333984375, 495.0657958984375, 651.1436767578125, 482.80426025390625, 810.173095703125, 349.47589111328125, 271.08306884765625, -76.91516876220703, 1328.2882080078125, -249.93698120117188, 381.61505126953125, -261.2095947265625, 1206.0518798828125, 452.3660888671875, 141.9721221923828, 1056.737060546875, 537.4061279296875, -88.89224243164062, 1385.6072998046875, 1934.740234375, 208.774658203125, 402.2976989746094, 1097.2777099609375, -500.9939270019531, 197.85238647460938, 670.9050903320312, 255.90823364257812, 222.1776885986328, 82.22782897949219, -35.39379119873047, 1791.5865478515625, 413.72161865234375, 1803.218017578125, -140.0882568359375, 523.6105346679688, 43.703636169433594, 552.37646484375, -48.721012115478516, 403.5787353515625, 376.6315002441406, 463.583251953125, -200.46258544921875, -109.10160827636719, 1272.6739501953125, 762.204833984375, 714.86767578125, -1065.250244140625, 143.30792236328125, 200.49349975585938, -218.44012451171875, -1066.031982421875, 1776.976318359375, 359.4669494628906, 98.32383728027344, 235.11495971679688, 499.8456726074219, 161.18942260742188, -440.3984375, 331.9354248046875, 314.7726135253906, 931.48583984375, -357.7216491699219, 364.6845703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000620.npy"} +{"epoch": 0.9104258443465492, "step": 621, "batch_size": 64, "mean": 551.19140625, "std": 720.2650146484375, "min": -1603.9794921875, "p10": -243.41492462158197, "median": 507.34156799316406, "p90": 1408.5875122070318, "max": 3010.297119140625, "pos_frac": 0.84375, "sample": [661.6509399414062, -429.9609375, -757.996826171875, -192.18849182128906, 1598.338623046875, 20.074562072753906, 123.82413482666016, 675.9281616210938, 646.503173828125, 940.5148315429688, 862.5390014648438, 327.1602783203125, 315.1707763671875, 878.04443359375, 210.24163818359375, -498.0923767089844, -265.3691101074219, 891.2470092773438, -1603.9794921875, 193.21368408203125, 1533.9583740234375, 190.52667236328125, 952.2142333984375, 536.5043334960938, 1200.7481689453125, 3010.297119140625, 170.04159545898438, 1072.5943603515625, 1065.807861328125, -63.88876724243164, 1297.1072998046875, 796.263671875, 906.8276977539062, 304.30218505859375, 856.4963989257812, 1726.69091796875, 178.67926025390625, 721.7078857421875, 427.5782165527344, 478.1788024902344, -119.95750427246094, 1265.140625, -281.5199890136719, 610.032470703125, 210.15872192382812, 158.7706298828125, 1456.36474609375, 353.5538330078125, 1256.343017578125, 1202.2518310546875, -1032.6812744140625, 368.86798095703125, 83.68367767333984, 133.47186279296875, 255.2814178466797, 641.0772705078125, 931.0358276367188, 1654.3448486328125, 244.25308227539062, 339.21136474609375, 1716.045166015625, 934.14501953125, 725.476806640625, 211.39739990234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000621.npy"} +{"epoch": 0.9118942731277533, "step": 622, "batch_size": 64, "mean": 397.30218505859375, "std": 627.1004638671875, "min": -1102.440185546875, "p10": -177.88167114257806, "median": 386.6011657714844, "p90": 979.6090881347659, "max": 2692.66552734375, "pos_frac": 0.8125, "sample": [669.7999877929688, 335.9076232910156, -210.07736206054688, 403.70574951171875, 574.5360717773438, 388.9584045410156, -725.183837890625, 387.4468994140625, 2488.486328125, 560.0323486328125, 65.8954849243164, -401.10552978515625, 107.91224670410156, -317.22601318359375, -29.131134033203125, 561.6260986328125, 199.87696838378906, 385.75543212890625, 84.83425903320312, 192.40565490722656, 1180.8992919921875, -65.97052001953125, 615.7379150390625, 2692.66552734375, 791.4605102539062, 69.85116577148438, 553.1773681640625, 736.13330078125, 175.16615295410156, -1022.949951171875, 208.7265625, 17.2222900390625, 1003.6431274414062, 149.99452209472656, 275.40362548828125, 923.5296630859375, 451.939697265625, -513.7972412109375, 1009.1773071289062, 525.370849609375, 865.137451171875, 836.6810302734375, 19.122962951660156, 647.726806640625, 237.00173950195312, -64.25128173828125, 428.8727111816406, -102.75839233398438, 57.72926330566406, 314.1024169921875, 1451.3753662109375, 532.0452270507812, 721.782470703125, 646.331787109375, 581.5738525390625, 732.579833984375, -1102.440185546875, -5.143013000488281, 126.61445617675781, 1460.7012939453125, 660.6132202148438, 606.171630859375, 20.65860366821289, 253.27337646484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000622.npy"} +{"epoch": 0.9133627019089574, "step": 623, "batch_size": 64, "mean": 313.7362060546875, "std": 612.8697509765625, "min": -1067.54248046875, "p10": -257.08003234863276, "median": 226.86019897460938, "p90": 1068.154351806641, "max": 2710.0458984375, "pos_frac": 0.71875, "sample": [-11.58685302734375, 231.39639282226562, -809.7929077148438, 1368.300048828125, -95.6932373046875, -53.17536544799805, 685.0505981445312, 158.72781372070312, 787.2691040039062, 34.2000732421875, 1438.8984375, 267.86474609375, 597.5066528320312, 178.9045867919922, 656.8507690429688, -465.1283874511719, 707.591064453125, 331.35394287109375, -226.4786376953125, 50.5767822265625, -150.65379333496094, 443.9722900390625, 1227.8953857421875, -165.7896728515625, -1067.54248046875, 130.8037109375, 348.0404052734375, -690.86962890625, 423.011474609375, 602.7999877929688, -270.1949157714844, 499.5755310058594, -129.9547119140625, -39.925697326660156, -170.75299072265625, 95.37477111816406, 958.4530639648438, 690.3949584960938, 942.1585693359375, 200.79452514648438, 61.12550354003906, -589.135986328125, 355.4526062011719, 1360.4090576171875, 57.612335205078125, -38.51042556762695, 2710.0458984375, 518.9251098632812, -40.12003707885742, -384.6198425292969, 1115.169189453125, 589.9215087890625, 285.83111572265625, 380.73602294921875, 222.32400512695312, 1765.2451171875, 85.152099609375, 87.89915466308594, 277.23468017578125, 6.360485076904297, 513.1676025390625, 275.033935546875, 720.700439453125, 32.9295654296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000623.npy"} +{"epoch": 0.9148311306901615, "step": 624, "batch_size": 64, "mean": 467.7041015625, "std": 573.8270874023438, "min": -1199.674560546875, "p10": -218.0212677001953, "median": 498.77928161621094, "p90": 1192.6002807617188, "max": 1461.87744140625, "pos_frac": 0.828125, "sample": [585.7987060546875, -374.06890869140625, 702.032470703125, 360.522705078125, 497.08648681640625, 1006.6058349609375, 1187.0516357421875, 1461.87744140625, 1376.560546875, 196.49468994140625, 186.36328125, 10.72601318359375, 126.90152740478516, 1194.978271484375, 950.644287109375, 8.2674560546875, 584.8799438476562, 741.2752075195312, 1217.859375, -1103.3006591796875, 75.89552307128906, 234.1411590576172, 1.1917800903320312, 908.763427734375, 415.2911682128906, 211.2344970703125, 906.5816040039062, 800.4738159179688, 1092.9976806640625, -223.98196411132812, 167.88465881347656, 1092.6793212890625, -204.11297607421875, 285.0674133300781, -312.58551025390625, 941.7162475585938, 391.3602294921875, -93.78302001953125, 914.7318115234375, 410.3443603515625, -1199.674560546875, 78.07425689697266, -66.598876953125, 720.5785522460938, 979.377197265625, 1002.2903442382812, 280.92193603515625, 682.9310302734375, 1016.7503662109375, 500.4720764160156, 117.05793762207031, -415.4503173828125, 8.81859016418457, 1226.3748779296875, 603.2825927734375, 711.751220703125, -321.3438415527344, 659.4384765625, -108.36262512207031, 652.3182373046875, 1286.4886474609375, 161.50006103515625, 1339.92822265625, 1081.68994140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000624.npy"} +{"epoch": 0.9162995594713657, "step": 625, "batch_size": 64, "mean": 524.8773193359375, "std": 563.9940185546875, "min": -447.3465881347656, "p10": -23.857870483398415, "median": 437.4485778808594, "p90": 1236.61767578125, "max": 2252.187744140625, "pos_frac": 0.875, "sample": [58.311485290527344, 148.68057250976562, -327.8778381347656, 939.1499633789062, 357.56622314453125, 37.13433837890625, 819.707763671875, 1175.654541015625, 1045.120849609375, 1000.3526611328125, 39.681243896484375, 618.161376953125, 275.26177978515625, 62.00004577636719, 1982.22998046875, 630.3933715820312, 234.3680419921875, 107.14454650878906, 2252.187744140625, 474.72149658203125, 1945.6917724609375, 251.92994689941406, 91.27823638916016, 744.9859008789062, 155.88113403320312, 714.52783203125, 1767.0196533203125, -32.88374328613281, 331.97930908203125, 314.56903076171875, 412.17889404296875, 709.8387451171875, 490.5126647949219, -436.89031982421875, 523.8927612304688, -111.98355102539062, 84.9991226196289, 604.0260009765625, 296.2891540527344, 794.88818359375, 1247.3560791015625, 420.49066162109375, 1386.6279296875, 454.406494140625, 118.25514221191406, 475.7296447753906, -81.60154724121094, 473.19647216796875, 670.8212890625, 798.2130737304688, 106.83316802978516, -2.7975006103515625, 926.8648681640625, 359.89337158203125, 136.75660705566406, 141.91290283203125, 186.3415069580078, 495.1096496582031, 1211.5614013671875, -447.3465881347656, 733.5698852539062, -132.40548706054688, 1393.030029296875, 936.6465454101562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000625.npy"} +{"epoch": 0.9177679882525698, "step": 626, "batch_size": 64, "mean": 284.48504638671875, "std": 644.917724609375, "min": -2501.293212890625, "p10": -322.6523956298827, "median": 224.28958892822266, "p90": 943.5091552734377, "max": 1912.6240234375, "pos_frac": 0.734375, "sample": [665.6715087890625, 61.000732421875, 465.3983154296875, 389.27191162109375, 177.16873168945312, 211.4592742919922, 723.5122680664062, 213.81149291992188, 879.4739379882812, -2501.293212890625, 1594.0601806640625, 234.76768493652344, 1531.9600830078125, 1912.6240234375, -587.6716918945312, 176.20993041992188, 254.373291015625, -369.6172180175781, -5.45361328125, -156.28631591796875, 15.78875732421875, 210.8525848388672, 577.7120361328125, 118.83306121826172, 72.60978698730469, 1.3457489013671875, 156.00885009765625, -618.8582763671875, 467.6172180175781, 634.0107421875, -126.70097351074219, 648.39013671875, -535.9703369140625, -759.9549560546875, 608.0884399414062, 1191.396240234375, 759.9655151367188, 370.1361389160156, 334.4771728515625, 643.6995239257812, 558.4501342773438, -213.06781005859375, 970.9528198242188, 499.30328369140625, 1675.477294921875, 722.1942138671875, -104.69904327392578, 128.53976440429688, 124.40499877929688, 391.97027587890625, -116.2906494140625, 814.7535400390625, 330.1058349609375, 1100.085205078125, -19.073013305664062, 211.20664978027344, 662.6919555664062, 398.6949462890625, -717.765625, -93.15690612792969, 93.86589050292969, -122.7044906616211, 292.16302490234375, -20.947856903076172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000626.npy"} +{"epoch": 0.9192364170337739, "step": 627, "batch_size": 64, "mean": 586.0703125, "std": 831.2222900390625, "min": -701.0098266601562, "p10": -149.28670654296874, "median": 430.69078063964844, "p90": 1477.503259277344, "max": 3805.1328125, "pos_frac": 0.75, "sample": [642.0808715820312, 267.16204833984375, 1143.2010498046875, 484.4393615722656, -183.14535522460938, -48.41560363769531, 105.29297637939453, 700.6536865234375, 576.71484375, 494.1810607910156, 1336.0003662109375, -533.0965576171875, 235.6429443359375, 459.9217224121094, -107.3928451538086, 981.7846069335938, -149.24539184570312, 719.9708862304688, 979.3817138671875, 1192.9957275390625, -670.790283203125, -149.30441284179688, 659.0436401367188, -18.535017013549805, 1820.363037109375, 171.28018188476562, 426.4788513183594, 2571.013671875, 1022.7535400390625, 384.1142578125, 1917.981201171875, 74.11483764648438, 557.4380493164062, 1279.71875, 552.3609619140625, 1747.1712646484375, 1431.7041015625, 162.95584106445312, 531.8719482421875, -140.33590698242188, 110.8882827758789, -53.715179443359375, 3256.9130859375, 1497.1314697265625, 706.3056640625, -237.70535278320312, 3805.1328125, -133.5091552734375, 467.7856140136719, -77.25833129882812, 1040.12451171875, 280.5548400878906, 911.6541748046875, -701.0098266601562, 266.554443359375, 314.54229736328125, 434.9027099609375, 238.5137481689453, -401.0476379394531, 1321.8115234375, -67.4804458618164, 273.4107666015625, 299.37969970703125, 325.09576416015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000627.npy"} +{"epoch": 0.920704845814978, "step": 628, "batch_size": 64, "mean": 442.3868408203125, "std": 661.0192260742188, "min": -911.684814453125, "p10": -283.934455871582, "median": 368.77464294433594, "p90": 1341.9218872070312, "max": 2209.055908203125, "pos_frac": 0.703125, "sample": [342.4196472167969, -21.788375854492188, 423.7236328125, 1356.9688720703125, 1465.46337890625, 164.62246704101562, 712.8681030273438, 197.52154541015625, -254.4638214111328, -7.424009323120117, 188.70956420898438, 135.209228515625, -911.684814453125, 1049.29345703125, 1040.130126953125, 1223.52978515625, -91.84088134765625, -237.9674072265625, 1685.228271484375, 487.54132080078125, 2209.055908203125, 566.8507080078125, 244.00912475585938, -192.40087890625, -61.438316345214844, -296.5647277832031, 470.15704345703125, 855.9994506835938, -528.7650146484375, -472.7088317871094, 1000.4214477539062, 296.7882080078125, 802.5504760742188, 1465.199951171875, 1345.8887939453125, 901.79150390625, 434.8383483886719, 758.8524780273438, 170.6616668701172, 198.04751586914062, -210.42593383789062, 568.7161254882812, 467.850341796875, 76.44306182861328, 1574.789306640625, 1282.724853515625, 1329.61181640625, 225.55084228515625, -411.14569091796875, -775.4671630859375, -142.17019653320312, -207.03175354003906, 1332.665771484375, 837.87744140625, -675.5681762695312, -43.18122100830078, 1084.722412109375, 395.129638671875, 497.6628112792969, 164.14093017578125, 1195.424560546875, 550.0558471679688, -71.85688781738281, 148.94419860839844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000628.npy"} +{"epoch": 0.922173274596182, "step": 629, "batch_size": 64, "mean": 406.26995849609375, "std": 548.5282592773438, "min": -1083.5233154296875, "p10": -222.77543029785153, "median": 416.4658203125, "p90": 1199.638122558594, "max": 1766.9041748046875, "pos_frac": 0.8125, "sample": [220.1490478515625, 1266.464599609375, -350.6226501464844, -560.2236938476562, 418.33935546875, 411.271484375, 359.60760498046875, 791.808349609375, 1326.0274658203125, 567.6954956054688, 535.8810424804688, 824.3603515625, -98.67996978759766, 434.20501708984375, 1231.041748046875, -125.52436065673828, 163.58843994140625, 60.262699127197266, 36.950111389160156, 606.8677978515625, 31.689376831054688, 1132.7301025390625, 483.5870056152344, -178.58656311035156, 156.65834045410156, 556.0630493164062, -848.9744262695312, -148.9613037109375, 1026.03173828125, 729.7093505859375, 85.42191314697266, 845.4683227539062, 1403.929443359375, 1766.9041748046875, 290.3919372558594, 163.5512237548828, -1083.5233154296875, 1241.16162109375, 1045.0537109375, -270.05352783203125, -241.71351623535156, -596.8275756835938, 354.19732666015625, 381.72137451171875, 589.053466796875, 1228.31298828125, 299.330078125, 520.8953857421875, 681.3818969726562, 7.193960189819336, 843.946533203125, 321.6199951171875, 82.70098876953125, 665.515625, 220.63381958007812, 496.28509521484375, 19.081369400024414, 661.6664428710938, 972.6351318359375, 597.5764770507812, 431.1988525390625, 627.7608642578125, 414.59228515625, -125.20437622070312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000629.npy"} +{"epoch": 0.9236417033773862, "step": 630, "batch_size": 64, "mean": 232.0568084716797, "std": 694.9452514648438, "min": -1352.0950927734375, "p10": -474.1148376464844, "median": 161.2292709350586, "p90": 995.3790466308594, "max": 3376.850830078125, "pos_frac": 0.6875, "sample": [1167.38916015625, -1103.1722412109375, -548.8484497070312, 347.5641784667969, -195.34454345703125, 740.5198364257812, 10.983573913574219, -190.05035400390625, 2307.14501953125, -210.1197509765625, 659.2659912109375, -117.01942443847656, 254.33547973632812, 1019.8630981445312, 756.8065795898438, -1352.0950927734375, 145.21450805664062, -576.4849853515625, 302.21368408203125, 211.70103454589844, 456.1562194824219, -218.44712829589844, 145.39682006835938, 1364.6103515625, -145.3109893798828, 3376.850830078125, 39.83903503417969, 982.38525390625, 151.40293884277344, -219.4566192626953, 33.58494567871094, -396.26580810546875, 292.9401550292969, -527.8054809570312, 363.5702209472656, -474.6610412597656, -221.1196746826172, 431.7424621582031, 159.5906524658203, 402.90716552734375, 162.66357421875, -260.24969482421875, 293.7672119140625, -538.56201171875, 172.598876953125, 376.28900146484375, 133.5665283203125, 284.6669006347656, 643.4732666015625, -48.999855041503906, 65.91706848144531, 1000.9478149414062, 634.3969116210938, 159.7949676513672, -338.40216064453125, 58.74470520019531, 344.0293273925781, 181.3184051513672, 636.75439453125, -472.8403625488281, 1165.97021484375, 215.6890869140625, 346.23638916015625, 6.087522506713867], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000630.npy"} +{"epoch": 0.9251101321585903, "step": 631, "batch_size": 64, "mean": 365.2381591796875, "std": 647.4708251953125, "min": -1795.395751953125, "p10": -395.4131805419922, "median": 319.0447082519531, "p90": 1112.1002075195313, "max": 1989.2781982421875, "pos_frac": 0.71875, "sample": [1104.9781494140625, 983.66259765625, -407.8382263183594, 299.20074462890625, -393.3685302734375, 433.98089599609375, -107.86325073242188, -133.05740356445312, -237.99551391601562, -308.73828125, 1670.095703125, 1246.92578125, -467.7764587402344, 471.5436096191406, -543.2791137695312, 183.5072479248047, 501.4818115234375, 869.4251708984375, 42.67719268798828, -396.2894592285156, 139.4055633544922, -16.990262985229492, 263.9694519042969, -693.9998168945312, 1006.059814453125, 4.745273590087891, 151.66256713867188, -165.16844177246094, 781.5924682617188, 769.769287109375, -1795.395751953125, 516.9493408203125, 1989.2781982421875, 720.25341796875, 383.32745361328125, 740.2631225585938, 274.2832336425781, -167.5368194580078, -124.61380767822266, 1113.259521484375, 226.17752075195312, 539.938232421875, 1532.6239013671875, 860.2294311523438, 610.529296875, -787.8186645507812, 1109.3951416015625, 338.888671875, 1461.3623046875, 458.83380126953125, 72.63610076904297, 638.1190795898438, -156.42926025390625, 393.4567565917969, 233.52557373046875, 202.41552734375, 1230.78271484375, 630.8908081054688, 937.740234375, -100.07820892333984, 264.503173828125, 959.6199951171875, 234.78305053710938, 780.7291870117188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000631.npy"} +{"epoch": 0.9265785609397944, "step": 632, "batch_size": 64, "mean": 388.82421875, "std": 636.9307250976562, "min": -1301.375, "p10": -324.9979248046875, "median": 373.52227783203125, "p90": 1099.9728637695314, "max": 2343.582275390625, "pos_frac": 0.75, "sample": [708.7247314453125, -713.7784423828125, 295.71728515625, 380.2610778808594, 632.6234741210938, -1301.375, 1080.9833984375, 1108.1112060546875, 1328.4949951171875, 1012.2675170898438, 787.8331909179688, 682.62255859375, 226.17449951171875, 896.468994140625, 267.7918395996094, 222.80953979492188, 580.2607421875, 386.66448974609375, 33.690792083740234, 9.509498596191406, 1572.132568359375, 366.7834777832031, 727.041748046875, 508.10052490234375, -480.86309814453125, 1155.380859375, 614.13525390625, -336.7883605957031, 444.0684509277344, -177.73187255859375, -1150.061279296875, 324.2839660644531, 1032.615478515625, 1036.6854248046875, 588.89306640625, 786.3016967773438, 960.789306640625, 759.0704956054688, 323.4460754394531, -297.4869079589844, 1284.7197265625, -109.37189483642578, -209.71234130859375, -131.69674682617188, 695.452880859375, 302.58062744140625, 667.3423461914062, 138.21392822265625, 1139.28125, 2343.582275390625, 1016.2818603515625, -146.67379760742188, -438.413330078125, 166.951171875, 154.0819549560547, -230.9341278076172, 403.6037902832031, 290.5865173339844, -652.01123046875, 81.47454833984375, 93.19248962402344, -230.91603088378906, -124.91793060302734, 999.3990478515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000632.npy"} +{"epoch": 0.9280469897209985, "step": 633, "batch_size": 64, "mean": 287.41412353515625, "std": 583.1665649414062, "min": -1075.1495361328125, "p10": -393.2981475830078, "median": 267.5705108642578, "p90": 1143.4038208007812, "max": 1566.03173828125, "pos_frac": 0.71875, "sample": [-399.6371765136719, 1324.922607421875, 763.9129638671875, -671.2568359375, 416.79815673828125, 369.6087646484375, -424.02178955078125, -112.05386352539062, 1216.975830078125, 77.66405487060547, 268.50018310546875, 67.92047119140625, 400.59326171875, -1075.1495361328125, 741.6509399414062, 89.95240020751953, 187.07518005371094, 1149.0831298828125, 355.2119445800781, -233.98214721679688, -153.40252685546875, 331.2972106933594, 313.7763671875, -40.12781524658203, 705.556640625, 165.03945922851562, -12.243635177612305, -134.08319091796875, -935.3988037109375, 1154.5899658203125, 371.23187255859375, 250.73040771484375, 1130.152099609375, 424.9729309082031, 750.8865966796875, -82.75508117675781, -255.76522827148438, -1029.547607421875, -378.507080078125, 175.6746826171875, 1385.361083984375, 475.6354675292969, 185.39019775390625, -797.5985107421875, 590.1078491210938, 646.9515380859375, 558.034912109375, 1566.03173828125, 1041.438232421875, 1079.3446044921875, 149.75741577148438, 266.6408386230469, -246.40159606933594, 892.1798706054688, 326.93023681640625, -24.717025756835938, 154.1809844970703, 367.25604248046875, 1350.73876953125, 88.86083984375, 607.571533203125, 72.53652954101562, 107.01795196533203, 285.408935546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000633.npy"} +{"epoch": 0.9295154185022027, "step": 634, "batch_size": 64, "mean": 242.82333374023438, "std": 587.0032958984375, "min": -975.2646484375, "p10": -373.63626098632807, "median": 159.19013214111328, "p90": 1035.0707519531254, "max": 2238.637939453125, "pos_frac": 0.625, "sample": [192.82374572753906, 878.2640380859375, -280.0065612792969, 1560.354736328125, -530.7860717773438, -23.208009719848633, 29.193056106567383, 233.71298217773438, 302.3634948730469, 1251.4669189453125, 34.14752197265625, 294.26019287109375, -90.49866485595703, 293.78326416015625, 642.5752563476562, 146.96920776367188, 243.02496337890625, -238.98065185546875, -717.863525390625, 956.121826171875, 1099.9158935546875, -146.8829803466797, -123.54847717285156, 102.42974853515625, -36.077545166015625, 54.26795196533203, 152.23117065429688, 691.4075317382812, 512.638916015625, 47.61519241333008, 2238.637939453125, 358.5101318359375, 635.416015625, -975.2646484375, 1068.906005859375, 1174.9642333984375, 608.669677734375, -329.2960205078125, -392.63922119140625, 241.53640747070312, 737.3475952148438, -150.05726623535156, -82.76526641845703, -933.8589477539062, 166.1490936279297, -533.0383911132812, -4.033832550048828, -501.087158203125, -190.6705322265625, 862.4177856445312, -312.6827392578125, 731.931396484375, 388.130859375, 1325.71728515625, 365.2767028808594, 88.31852722167969, 270.1934509277344, 650.3116455078125, -93.08727264404297, 254.08511352539062, -7.48382568359375, -96.84611511230469, -257.5126647949219, 702.782958984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000634.npy"} +{"epoch": 0.9309838472834068, "step": 635, "batch_size": 64, "mean": 351.2920837402344, "std": 629.7105712890625, "min": -1083.65625, "p10": -298.73904266357414, "median": 335.932861328125, "p90": 1013.8996582031253, "max": 2300.81298828125, "pos_frac": 0.71875, "sample": [872.781005859375, 101.19508361816406, 2300.81298828125, -197.8743896484375, 440.0773620605469, -28.081932067871094, 188.45526123046875, 1.5118522644042969, -11.581680297851562, 523.5579223632812, 41.93238067626953, 1546.396728515625, 16.360916137695312, 819.431884765625, -520.6024169921875, 362.71551513671875, 555.115966796875, -30.797454833984375, 772.253173828125, 27.147994995117188, 410.03192138671875, 185.6389617919922, 741.6790771484375, 928.952880859375, 355.09783935546875, 108.4185562133789, 9.117645263671875, 436.02215576171875, -220.11585998535156, 1780.61181640625, -108.42487335205078, 380.44342041015625, 381.71636962890625, 1211.9698486328125, 772.3646850585938, -136.8638916015625, 92.51506805419922, -21.664936065673828, 454.992919921875, 1050.305419921875, -167.48182678222656, 894.5594482421875, 928.2599487304688, 355.5432434082031, -745.55224609375, -155.01718139648438, 642.2408447265625, -50.67176055908203, -332.4346923828125, 605.2857666015625, -478.39483642578125, -400.77227783203125, -1083.65625, 847.9536743164062, 1176.697021484375, 272.4023742675781, 442.9036560058594, 698.8607177734375, -913.2926025390625, 2110.23193359375, 187.76980590820312, 459.6087951660156, 316.76788330078125, 277.2659912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000635.npy"} +{"epoch": 0.9324522760646109, "step": 636, "batch_size": 64, "mean": 425.38995361328125, "std": 661.10986328125, "min": -903.7350463867188, "p10": -239.64664764404296, "median": 326.12953186035156, "p90": 976.4569091796875, "max": 3726.567626953125, "pos_frac": 0.75, "sample": [44.77899169921875, -362.2305908203125, 90.21383666992188, -116.13717651367188, 286.3384704589844, -423.1743469238281, 392.36761474609375, 360.00811767578125, 470.7170715332031, 10.112205505371094, 797.005126953125, -362.22955322265625, 355.3615417480469, 1167.795654296875, 23.166831970214844, 977.0690307617188, -92.94189453125, 2161.21337890625, 1250.12158203125, 490.52069091796875, 254.86647033691406, 1009.8402709960938, -70.89041137695312, -277.722900390625, 790.8763427734375, 296.3069152832031, 888.352783203125, 411.27044677734375, 146.4088592529297, 3726.567626953125, 17.763687133789062, -222.3966827392578, 874.7587280273438, 1251.3564453125, -313.9031677246094, 964.0804443359375, 973.1080322265625, 372.6677551269531, 530.5968017578125, -221.14895629882812, 655.032958984375, 211.42225646972656, 822.4559326171875, 798.5870361328125, 733.1190795898438, 292.78887939453125, 833.55078125, -5.199310302734375, 289.49298095703125, -247.03948974609375, 184.09181213378906, -18.516387939453125, -903.7350463867188, 396.6439208984375, 264.3058776855469, -195.6703643798828, 975.0286254882812, 721.5850830078125, 290.3109436035156, 449.39111328125, -8.082168579101562, 296.89752197265625, 534.9832153320312, 930.6765747070312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000636.npy"} +{"epoch": 0.933920704845815, "step": 637, "batch_size": 64, "mean": 459.3734130859375, "std": 641.8457641601562, "min": -996.6302490234375, "p10": -90.57667846679686, "median": 322.39808654785156, "p90": 1401.0846923828126, "max": 2329.317138671875, "pos_frac": 0.828125, "sample": [113.15288543701172, 60.729820251464844, 557.363525390625, -8.996467590332031, 52.43899917602539, 1008.6630859375, 312.9375, 70.486083984375, 642.9221801757812, 1292.425048828125, 720.6455688476562, -941.8338012695312, 92.93870544433594, 138.34805297851562, -186.0057830810547, 596.5505981445312, 878.5612182617188, 1401.51513671875, 1029.4764404296875, 615.3289794921875, 1613.27197265625, 330.05126953125, 206.45443725585938, 2329.317138671875, 109.00942993164062, 466.39691162109375, -358.7233581542969, 13.35775375366211, 325.1553649902344, -34.401947021484375, 326.5348815917969, -209.14035034179688, -151.09600830078125, 616.14306640625, 421.5556335449219, -98.70878601074219, 319.64080810546875, 695.1168823242188, 819.5908203125, 221.40643310546875, 561.695556640625, 429.10931396484375, 181.64767456054688, 922.015869140625, 638.037109375, 577.1876831054688, 1400.080322265625, 65.06647491455078, 1762.8887939453125, -996.6302490234375, 1134.9346923828125, 60.87742614746094, 287.3077392578125, 1633.614990234375, 343.1349182128906, 62.15275573730469, 74.64381408691406, 1470.8905029296875, -71.60176086425781, 2254.898681640625, 14.874252319335938, 106.62428283691406, 139.32470703125, -61.45636749267578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000637.npy"} +{"epoch": 0.9353891336270191, "step": 638, "batch_size": 64, "mean": 358.84271240234375, "std": 700.8246459960938, "min": -3408.7744140625, "p10": -85.08853530883788, "median": 275.27667236328125, "p90": 1157.8909423828127, "max": 1784.43212890625, "pos_frac": 0.765625, "sample": [1231.2763671875, -680.7144165039062, -3408.7744140625, 979.8616333007812, 284.627197265625, 152.23313903808594, 48.441070556640625, 39.114013671875, 1134.5301513671875, 342.8979797363281, 725.3726806640625, 1364.149169921875, 82.52488708496094, 734.7677612304688, 263.48486328125, 508.412353515625, 612.7484130859375, -528.4863891601562, 514.3150634765625, 35.85463333129883, 388.9867858886719, 43.54631805419922, -211.2803955078125, 134.87298583984375, 281.6094055175781, 913.8837890625, 1353.2113037109375, -18.048480987548828, 684.570068359375, 252.64041137695312, 755.3021850585938, 252.51361083984375, 605.0286254882812, -44.039024353027344, -104.01365661621094, 750.6278076171875, 932.1982421875, -54.718997955322266, 328.6360778808594, -347.6410827636719, -49.98826599121094, 499.8889465332031, 268.9439392089844, 1784.43212890625, -14.518180847167969, -27.1197452545166, 784.2496948242188, 106.07221984863281, 596.9769897460938, 1127.3509521484375, -50.757415771484375, -65.02081298828125, 1020.0609130859375, 474.61669921875, 147.44265747070312, 67.48330688476562, 67.3558120727539, 57.544288635253906, 786.0110473632812, 1167.9027099609375, -93.6889877319336, 95.34752655029297, 1561.8126220703125, 1319.0125732421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000638.npy"} +{"epoch": 0.9368575624082232, "step": 639, "batch_size": 64, "mean": 165.82308959960938, "std": 706.2058715820312, "min": -1354.9571533203125, "p10": -690.8281677246093, "median": 161.29566955566406, "p90": 1059.0478271484376, "max": 1679.7008056640625, "pos_frac": 0.609375, "sample": [1521.30224609375, -910.359619140625, -472.98724365234375, 1179.149169921875, 952.101806640625, -433.15203857421875, 263.9401550292969, 399.4985656738281, -573.626953125, 887.0599975585938, 1657.321533203125, -33.182987213134766, 350.5717468261719, 651.2546997070312, -1052.0428466796875, 304.5664367675781, -454.1153564453125, -376.100341796875, 113.57113647460938, 474.6794738769531, 643.7440185546875, -1298.48681640625, 879.2092895507812, 589.61572265625, -636.8079833984375, -1354.9571533203125, -1125.3299560546875, 263.0681457519531, 111.48184967041016, 108.24702453613281, 629.204345703125, 559.2329711914062, -3.1359329223632812, -608.4450073242188, 1074.36767578125, 1216.7247314453125, -357.3814697265625, 844.694091796875, -445.9742431640625, -30.887123107910156, 55.788841247558594, -110.12478637695312, 683.6160888671875, 382.2156982421875, 1679.7008056640625, -862.0282592773438, -118.38431549072266, 72.35140228271484, -320.86456298828125, -386.98809814453125, 167.72328186035156, 785.4510498046875, 312.3291015625, 1023.301513671875, -713.9796752929688, -433.6787414550781, -458.7596740722656, 154.86805725097656, 144.93397521972656, 678.5560302734375, 312.3809509277344, 1298.992431640625, 583.8072509765625, 173.83493041992188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000639.npy"} +{"epoch": 0.9383259911894273, "step": 640, "batch_size": 64, "mean": 558.8187255859375, "std": 758.8751831054688, "min": -890.9835815429688, "p10": -198.78300781249996, "median": 410.15126037597656, "p90": 1372.1029418945313, "max": 2911.274658203125, "pos_frac": 0.78125, "sample": [763.6207885742188, 564.2176513671875, -460.0438232421875, -146.15489196777344, 159.17898559570312, 413.3760681152344, 296.24609375, 1335.90087890625, 1300.88330078125, 204.45391845703125, 1375.873779296875, 1574.2933349609375, 936.6246337890625, 82.84634399414062, -266.4834289550781, 498.9050598144531, 897.1806030273438, -114.3709716796875, -79.38874816894531, 1334.572021484375, -776.4193115234375, 984.0185546875, 418.82989501953125, 681.8533935546875, -212.86141967773438, -890.9835815429688, 139.52963256835938, 406.92645263671875, 2911.274658203125, 888.2738037109375, 573.1224365234375, 1035.1285400390625, -702.6045532226562, 583.1723022460938, 2640.999267578125, 323.61260986328125, -165.93338012695312, 254.51942443847656, 61.5882568359375, 365.8349609375, -83.26274108886719, 251.26564025878906, 36.49101257324219, 1258.0743408203125, 1028.8359375, 188.59201049804688, 965.3036499023438, 677.965576171875, -465.621337890625, 1363.3043212890625, 2066.86767578125, 352.9799499511719, 26.922515869140625, 1183.0853271484375, -0.9454498291015625, 774.2893676757812, 397.886962890625, 797.2839965820312, 77.41963195800781, 755.0075073242188, 1407.7293701171875, 2369.18896484375, -145.48143005371094, 289.6014099121094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000640.npy"} +{"epoch": 0.9397944199706314, "step": 641, "batch_size": 64, "mean": 358.13702392578125, "std": 724.6242065429688, "min": -1272.158935546875, "p10": -498.09977416992183, "median": 345.08201599121094, "p90": 1083.1233276367188, "max": 2912.272705078125, "pos_frac": 0.78125, "sample": [136.86544799804688, 322.5809326171875, 567.4153442382812, 1043.438232421875, -518.7921142578125, -163.36282348632812, 351.6484375, 1527.9464111328125, 429.9732666015625, 95.37043762207031, -1081.47216796875, 484.0974426269531, 18.287002563476562, -435.8550720214844, 2912.272705078125, -506.99066162109375, -420.37347412109375, -477.3543701171875, 39.83299255371094, 933.4498291015625, 11.688323974609375, 116.91947174072266, 100.55841064453125, 615.4165649414062, 229.76211547851562, 582.9776611328125, -1211.9385986328125, 934.4119873046875, 587.7705078125, 47.59782409667969, 879.7605590820312, 505.5023193359375, 660.2635498046875, 129.2554931640625, 85.57959747314453, 49.75997543334961, 1265.6295166015625, 331.1490478515625, 593.845703125, 613.6611328125, 158.5316162109375, 34.62896728515625, -72.1594009399414, -662.5099487304688, 212.99737548828125, 833.1632690429688, -1272.158935546875, 1100.1312255859375, 914.0247192382812, 957.2174682617188, 2243.492431640625, 785.1492309570312, -1035.741943359375, 759.857177734375, -30.100257873535156, 365.8275451660156, 647.4083251953125, -261.87835693359375, 506.29351806640625, 338.5155944824219, 950.56982421875, 1207.3187255859375, 656.139404296875, 1195.50439453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000641.npy"} +{"epoch": 0.9412628487518355, "step": 642, "batch_size": 64, "mean": 439.28436279296875, "std": 889.5961303710938, "min": -1764.3668212890625, "p10": -308.2948364257812, "median": 329.50494384765625, "p90": 1275.9433837890629, "max": 4663.4541015625, "pos_frac": 0.75, "sample": [101.24665832519531, 565.1837158203125, 726.5223388671875, 559.495849609375, 269.1412353515625, -1764.3668212890625, 771.0169677734375, 266.8716735839844, -68.94979095458984, -539.7617797851562, -121.67346954345703, 101.01754760742188, 1416.5380859375, -320.1402587890625, 863.8590087890625, 1002.4963989257812, 519.7920532226562, 753.4541015625, -833.7213745117188, 45.28493881225586, 240.16635131835938, -171.74066162109375, 361.270751953125, 36.18821716308594, 289.02569580078125, 750.3217163085938, -101.7890396118164, 233.62353515625, 96.57545471191406, 420.1101379394531, 891.9217529296875, 662.2723388671875, 712.5255126953125, 2423.8994140625, -243.58523559570312, 83.70883178710938, 371.5128173828125, 736.7763671875, -605.797119140625, 232.65347290039062, 847.3750610351562, 165.9320068359375, 490.7391052246094, 1309.655517578125, -280.655517578125, 1197.28173828125, 815.0665893554688, 1607.47802734375, -205.72169494628906, 5.022705078125, 4663.4541015625, 1021.7982788085938, -631.5464477539062, 469.982177734375, -209.18467712402344, -168.63856506347656, 363.90673828125, 2424.236328125, 44.158172607421875, -1047.0587158203125, 504.8916015625, 1726.3203125, 297.7391357421875, 969.0184936523438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000642.npy"} +{"epoch": 0.9427312775330396, "step": 643, "batch_size": 64, "mean": 344.084228515625, "std": 546.4957885742188, "min": -762.9945068359375, "p10": -397.6305938720703, "median": 310.5269775390625, "p90": 1000.9250305175782, "max": 1817.4775390625, "pos_frac": 0.78125, "sample": [312.506591796875, 383.2859802246094, 159.87477111816406, 910.3665771484375, 809.7388305664062, 634.267822265625, 525.1923828125, -88.00188446044922, 245.50270080566406, 851.4468383789062, 16.672313690185547, -680.4737548828125, 341.4474182128906, 295.9775695800781, -158.32130432128906, 913.26171875, 810.2815551757812, 1010.0427856445312, 1097.631103515625, 479.6331481933594, 168.44593811035156, 40.71665573120117, -312.8045959472656, 187.80667114257812, 34.18904495239258, 180.16607666015625, -762.9945068359375, 676.3914184570312, 443.8004455566406, 974.4320068359375, -465.0970458984375, 276.2764587402344, 308.54736328125, 218.57861328125, 956.776123046875, 1200.81689453125, 417.6880187988281, -442.93408203125, 730.4409790039062, 369.3453063964844, -477.56341552734375, 1793.304931640625, 1079.335693359375, 341.31024169921875, 656.5663452148438, 94.39026641845703, 2.8523731231689453, 428.5230712890625, 1817.4775390625, 55.407474517822266, 979.6502685546875, 645.8768310546875, 738.2200927734375, -325.95343017578125, 1060.0604248046875, 41.442474365234375, -373.29168701171875, -408.0615539550781, 417.08013916015625, 26.771881103515625, -155.9517822265625, -117.64608764648438, -512.5817260742188, 143.2470703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000643.npy"} +{"epoch": 0.9441997063142438, "step": 644, "batch_size": 64, "mean": 358.000244140625, "std": 727.659912109375, "min": -1717.1717529296875, "p10": -552.1825927734375, "median": 232.5782012939453, "p90": 1155.8793457031252, "max": 2502.4169921875, "pos_frac": 0.734375, "sample": [2502.4169921875, 52.553550720214844, 906.4190673828125, -1717.1717529296875, 487.6593017578125, 387.67034912109375, -758.114013671875, 31.230606079101562, 963.128662109375, 388.0243835449219, 43.883365631103516, -29.302902221679688, 65.28424835205078, 469.6314697265625, 115.56736755371094, 639.9632568359375, 175.91107177734375, -20.37237548828125, 260.8763732910156, -567.353759765625, 162.88796997070312, 1964.0245361328125, 59.84815979003906, -591.083984375, 779.6832275390625, 915.6143798828125, -1.8432769775390625, 495.89801025390625, -273.10040283203125, 1590.9627685546875, -33.87328338623047, -346.1415710449219, 777.79296875, -62.64909362792969, 76.14906311035156, -36.56415557861328, 1175.30419921875, 705.930419921875, 715.5520629882812, 285.019775390625, 126.24349212646484, -576.7393798828125, 526.3230590820312, 56.97295379638672, 1098.829345703125, 561.7139282226562, -593.6614379882812, -294.46588134765625, 118.96470642089844, 1054.720947265625, 1612.9195556640625, 820.426025390625, 96.13197326660156, -1086.859130859375, 1072.9501953125, 1110.5546875, 369.58184814453125, 1328.325927734375, 25.787445068359375, 435.0265197753906, 653.5620727539062, 1949.8919677734375, 204.280029296875, -516.783203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000644.npy"} +{"epoch": 0.9456681350954479, "step": 645, "batch_size": 64, "mean": 177.82760620117188, "std": 677.1175537109375, "min": -1249.979248046875, "p10": -700.8367065429687, "median": 93.66887664794922, "p90": 1070.8259887695312, "max": 2201.6015625, "pos_frac": 0.609375, "sample": [335.0145568847656, 1050.2314453125, -676.4718017578125, 931.6392211914062, -1249.979248046875, -878.084228515625, -669.883544921875, 39.734161376953125, 413.9723815917969, -157.89845275878906, -130.71913146972656, -41.10650634765625, 54.712852478027344, -7.993259429931641, -88.37411499023438, -347.93341064453125, 404.92364501953125, 301.6349792480469, 979.7998657226562, 1082.7174072265625, 663.625732421875, -388.283203125, 71.71514892578125, 38.68467712402344, 128.0531768798828, 64.82823181152344, 1079.6522216796875, -32.728919982910156, -729.074462890625, -1041.8594970703125, 189.26148986816406, 402.0859069824219, -711.27880859375, 1345.94091796875, 868.8745727539062, 24.045928955078125, 1880.068359375, 788.0162353515625, -1139.435546875, -634.7494506835938, 1306.0191650390625, -42.865692138671875, 115.62260437011719, 186.2506866455078, 397.2547607421875, 544.189208984375, 392.682861328125, 306.0013732910156, 228.4609375, 632.9100952148438, -62.48333740234375, -285.03021240234375, 1206.2705078125, 31.28954315185547, 387.3134765625, 2201.6015625, -97.60942840576172, 341.71575927734375, -864.195556640625, -117.1977310180664, 552.5585327148438, 202.6358184814453, -100.97557067871094, -294.83197021484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000645.npy"} +{"epoch": 0.947136563876652, "step": 646, "batch_size": 64, "mean": 305.16351318359375, "std": 579.70361328125, "min": -1013.6762084960938, "p10": -348.19539489746086, "median": 214.67088317871094, "p90": 1046.6956481933594, "max": 2064.737060546875, "pos_frac": 0.71875, "sample": [901.6470947265625, 1018.6847534179688, -146.54736328125, 1451.76708984375, 624.446044921875, 221.13087463378906, 499.10357666015625, 659.2590942382812, 910.496826171875, 26.151687622070312, 372.9458923339844, 503.9674072265625, -934.1474609375, -9.571380615234375, 1016.2304077148438, 121.10035705566406, -282.6957702636719, 286.54864501953125, 1058.7003173828125, -41.93938446044922, 429.36627197265625, -140.90087890625, -165.62030029296875, 642.5993041992188, 97.45486450195312, 355.7160339355469, 436.17919921875, 237.25917053222656, 85.02008056640625, 881.2291259765625, 104.81158447265625, 463.6533508300781, 88.40756225585938, 166.6356964111328, -209.1509246826172, 208.2108917236328, 63.40728759765625, 1621.417236328125, -216.5934600830078, -1013.6762084960938, 397.7105712890625, 626.2470703125, 119.63850402832031, -425.43780517578125, -68.25126647949219, -560.2987060546875, -74.90464782714844, 69.08019256591797, -510.2503662109375, 2064.737060546875, 1059.551025390625, 279.39483642578125, 577.7482299804688, -149.5766143798828, 1339.676513671875, -376.26666259765625, -593.7366943359375, 143.50900268554688, 200.0680389404297, 139.20346069335938, 1129.482421875, 975.1339721679688, 486.83782958984375, 288.465576171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000646.npy"} +{"epoch": 0.9486049926578561, "step": 647, "batch_size": 64, "mean": 382.257080078125, "std": 538.4498901367188, "min": -1129.6923828125, "p10": -168.6048126220703, "median": 375.04701232910156, "p90": 1059.4821044921875, "max": 1976.7203369140625, "pos_frac": 0.78125, "sample": [789.3316650390625, 601.5383911132812, 308.6999816894531, 247.7792510986328, 305.3194274902344, 1131.8878173828125, 259.0339050292969, 407.5088806152344, 542.62646484375, 595.0865478515625, 1541.20703125, 134.72207641601562, 43.802154541015625, 1771.784912109375, 682.0543212890625, 517.3172607421875, -68.62379455566406, 1051.2813720703125, 650.3433227539062, -170.8995819091797, 216.28952026367188, 198.14849853515625, -44.60499572753906, -163.25035095214844, 564.7417602539062, 431.64752197265625, -74.27580261230469, 1976.7203369140625, -840.8810424804688, 843.2601318359375, -181.73480224609375, -221.9837646484375, 116.13113403320312, 791.8502807617188, 1054.775390625, 744.5596923828125, 472.71112060546875, 371.87847900390625, -537.4277954101562, 216.16111755371094, 531.6823120117188, 95.05278778076172, 1146.6878662109375, -145.408203125, -98.91275024414062, 433.4908447265625, 423.76861572265625, 404.1878662109375, -36.528526306152344, 1061.499267578125, 192.13340759277344, 94.4824447631836, 1159.258056640625, 279.4815673828125, 30.535507202148438, 532.863037109375, -1129.6923828125, 744.52001953125, -278.3913269042969, 337.21697998046875, 506.4195861816406, 391.86639404296875, 378.2155456542969, 133.5057373046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000647.npy"} +{"epoch": 0.9500734214390602, "step": 648, "batch_size": 64, "mean": 572.3054809570312, "std": 779.3780517578125, "min": -1028.548828125, "p10": -184.86049499511716, "median": 505.5013732910156, "p90": 1480.1082153320315, "max": 3475.093994140625, "pos_frac": 0.78125, "sample": [502.9968566894531, 283.010009765625, 154.3126220703125, 540.6661987304688, 298.9195251464844, 694.0574340820312, 1993.05078125, 1508.58349609375, 838.4375, 1412.24853515625, 53.778228759765625, -720.0283203125, 53.401031494140625, 908.1950073242188, 263.149169921875, -135.52845764160156, 1323.9124755859375, 497.26275634765625, 18.442245483398438, -60.97802734375, -23.270734786987305, 515.1357421875, -106.59577178955078, 884.1903076171875, -229.02793884277344, 1887.1505126953125, 561.1665649414062, 116.03932189941406, -23.161026000976562, -170.18115234375, 805.9617919921875, 867.161865234375, -191.15164184570312, 3475.093994140625, 654.4171142578125, 569.9282836914062, -1028.548828125, 575.5435791015625, 35.00286865234375, -665.7108154296875, -246.00250244140625, 316.8125, 2308.181884765625, 734.796630859375, 667.6630859375, 403.4342346191406, 1413.6658935546875, -86.5208740234375, 580.9730834960938, 668.5239868164062, 1078.4990234375, 372.34173583984375, 341.2449645996094, 628.9539794921875, 1314.1265869140625, 104.0240707397461, 508.0058898925781, 751.635986328125, 425.7130126953125, 2328.87255859375, 1386.7301025390625, 228.0634765625, -420.8195495605469, 1881.597900390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000648.npy"} +{"epoch": 0.9515418502202643, "step": 649, "batch_size": 64, "mean": 264.7760009765625, "std": 660.9012451171875, "min": -908.830810546875, "p10": -461.84951477050777, "median": 204.0474624633789, "p90": 1008.770135498047, "max": 3133.153076171875, "pos_frac": 0.609375, "sample": [510.12335205078125, 13.395942687988281, 459.3455810546875, -108.08938598632812, 502.7803039550781, -349.6244812011719, -10.63491439819336, 198.17913818359375, 761.92724609375, 310.98175048828125, 532.9247436523438, -147.8111572265625, -471.8529968261719, 56.86106872558594, -661.3300170898438, -130.16897583007812, -44.64701843261719, 440.5649108886719, -328.4031066894531, 246.7823486328125, 673.403564453125, 172.4519500732422, -511.2366027832031, -180.49212646484375, 970.7756958007812, -782.7811889648438, 683.8201293945312, 866.9270629882812, -131.27833557128906, 598.0294189453125, 360.8858947753906, -382.7571105957031, 82.46917724609375, -112.50093841552734, 805.5933837890625, 554.584716796875, 857.5703125, 377.23480224609375, 867.4490356445312, -426.4493408203125, 1187.10986328125, 441.3556823730469, 5.298259735107422, 1161.26953125, 874.1651611328125, -225.19235229492188, 1357.1810302734375, 3133.153076171875, 410.29583740234375, 377.5641174316406, -99.21170806884766, 209.91578674316406, 10.538223266601562, 848.4255981445312, -378.35699462890625, 1238.0567626953125, 1125.943115234375, -474.0793762207031, -19.737903594970703, -178.01910400390625, -908.830810546875, -862.728271484375, 1025.053466796875, -438.508056640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000649.npy"} +{"epoch": 0.9530102790014684, "step": 650, "batch_size": 64, "mean": 421.4519348144531, "std": 729.4365234375, "min": -1144.8297119140625, "p10": -459.9873504638672, "median": 304.30992126464844, "p90": 1298.3082153320313, "max": 2469.744873046875, "pos_frac": 0.765625, "sample": [55.68177032470703, -558.3043823242188, 808.0812377929688, 816.9378662109375, 177.21261596679688, 931.2064208984375, 302.519775390625, -1068.714599609375, 2259.47509765625, 594.4686279296875, 1171.1292724609375, -763.5836181640625, 1365.939697265625, 982.2311401367188, 938.7184448242188, 1282.097900390625, 290.26031494140625, 1680.7132568359375, 1153.8260498046875, 2469.744873046875, 795.1802368164062, -133.4947967529297, 1694.0960693359375, 589.2399291992188, 264.07244873046875, 41.37092208862305, -149.72625732421875, 459.9504089355469, 195.10488891601562, 437.3715515136719, 724.7384033203125, -169.62989807128906, -324.2664794921875, -917.2987060546875, -452.73431396484375, 279.3492736816406, -25.331340789794922, 1816.005615234375, -99.35533142089844, 48.73822021484375, 551.3616333007812, 189.88259887695312, -463.0957946777344, 663.5734252929688, 554.2548217773438, 520.931640625, 85.48777770996094, 830.4189453125, 156.64129638671875, 1305.2554931640625, 306.1000671386719, 299.3114929199219, 595.1578369140625, 976.6513671875, 877.44287109375, 61.59160614013672, 474.42193603515625, -691.1246948242188, -1144.8297119140625, 245.050537109375, 717.7197265625, 77.49213409423828, -384.7240905761719, 204.92886352539062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000650.npy"} +{"epoch": 0.9544787077826725, "step": 651, "batch_size": 64, "mean": 542.8966064453125, "std": 595.1066284179688, "min": -402.380615234375, "p10": -23.079699134826658, "median": 411.27577209472656, "p90": 1317.7631469726564, "max": 2236.120849609375, "pos_frac": 0.859375, "sample": [868.7456665039062, 684.3140258789062, 2236.120849609375, 210.61505126953125, -402.380615234375, -5.96563720703125, 365.6160583496094, 612.9776611328125, 38.18241882324219, 152.3362579345703, 1287.7567138671875, 2165.19580078125, 319.494140625, -222.08148193359375, 111.5654296875, -45.300498962402344, 75.01083374023438, 522.058837890625, -301.8362121582031, 868.3485107421875, 1603.4664306640625, 330.473876953125, 843.8733520507812, 280.6153564453125, 724.1307983398438, 56.44459533691406, -22.622800827026367, -23.2755126953125, 154.6485595703125, 711.8936767578125, 194.88401794433594, 33.37525177001953, 1051.43701171875, 153.48202514648438, 1214.5753173828125, 971.33251953125, 577.71337890625, 36.352569580078125, 760.62109375, 3.924182891845703, 443.6881408691406, 1097.2213134765625, -366.6716003417969, 500.6736145019531, 698.34228515625, 672.6025390625, 25.453125, 946.9321899414062, 122.50096130371094, 606.3108520507812, 164.33682250976562, 386.0636291503906, 1330.623046875, 1786.3956298828125, 349.9613037109375, -370.38189697265625, 1094.842041015625, 388.615966796875, 1681.8912353515625, 433.9355773925781, 1110.1519775390625, 1454.5, 876.2086181640625, 113.06413269042969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000651.npy"} +{"epoch": 0.9559471365638766, "step": 652, "batch_size": 64, "mean": 413.9796142578125, "std": 687.5950927734375, "min": -1476.0555419921875, "p10": -304.3278289794921, "median": 307.14662170410156, "p90": 1287.2681152343753, "max": 2406.256591796875, "pos_frac": 0.765625, "sample": [376.94134521484375, 141.31277465820312, 763.4293823242188, -983.0536499023438, 598.7353515625, 459.6040344238281, 227.3008270263672, -521.5259399414062, 746.9813232421875, 164.8402099609375, 1324.7388916015625, 1423.0426025390625, 1199.8363037109375, -1476.0555419921875, 72.88848876953125, 592.4815673828125, 54.670867919921875, 816.2744140625, 1421.69677734375, -404.33642578125, 1159.3974609375, 302.6325988769531, 88.4432601928711, -150.40216064453125, 37.370155334472656, -4.86346435546875, 932.7701416015625, -112.84825134277344, 344.62615966796875, 65.15448760986328, 41.54924392700195, 2406.256591796875, -342.8028259277344, 2380.5732421875, 312.9744567871094, 1554.1419677734375, 47.02116394042969, 88.76190185546875, -25.192319869995117, 30.969635009765625, 476.40313720703125, -43.54193878173828, 203.39675903320312, -133.55935668945312, 964.2344970703125, 878.484130859375, 882.7322998046875, -230.28994750976562, 630.490966796875, 804.899658203125, 1737.1329345703125, 74.40669250488281, 872.7713012695312, 257.6728210449219, -542.77099609375, 428.0392150878906, 1146.87939453125, -336.058349609375, 389.06207275390625, 764.8616943359375, 247.53370666503906, 577.3750610351562, 311.66064453125, -23.459434509277344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000652.npy"} +{"epoch": 0.9574155653450808, "step": 653, "batch_size": 64, "mean": 372.2716369628906, "std": 656.0138549804688, "min": -1172.58349609375, "p10": -359.26174621582027, "median": 263.69715118408203, "p90": 1220.8676513671876, "max": 2155.76220703125, "pos_frac": 0.703125, "sample": [876.41748046875, -0.4653167724609375, -135.93421936035156, 1853.749755859375, 247.02552795410156, -395.2382507324219, 109.47486877441406, -508.20166015625, 45.9444580078125, 836.7427368164062, 545.181640625, -263.3050537109375, -68.46923065185547, 1301.9490966796875, 845.800048828125, 1223.1474609375, 833.9996337890625, 1215.548095703125, 1204.5552978515625, -89.45819091796875, 185.85128784179688, 280.3687744140625, 1670.99609375, 424.8192443847656, 532.4805297851562, 715.7302856445312, 1074.3453369140625, 297.80194091796875, -19.733726501464844, -452.0826721191406, 1362.251953125, 244.0735626220703, 603.4237670898438, -1172.58349609375, -245.36764526367188, -111.40431213378906, 423.26837158203125, 481.6846008300781, -663.5184936523438, 2124.272705078125, 2155.76220703125, 577.5957641601562, 12.988157272338867, -44.087562561035156, 618.97265625, 524.2274169921875, 19.203338623046875, -285.7615966796875, 227.05853271484375, 231.90536499023438, 525.258544921875, 141.14199829101562, 783.7711791992188, 180.11459350585938, 470.8638916015625, -730.5678100585938, -390.7618103027344, 323.14306640625, 714.6455688476562, 86.07734680175781, 461.5754699707031, -228.18153381347656, 234.0517578125, -218.75413513183594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000653.npy"} +{"epoch": 0.9588839941262849, "step": 654, "batch_size": 64, "mean": 359.82257080078125, "std": 593.821044921875, "min": -592.7657470703125, "p10": -329.3725982666016, "median": 294.88124084472656, "p90": 1265.577490234375, "max": 1844.3818359375, "pos_frac": 0.671875, "sample": [24.304283142089844, 337.9696044921875, -196.22552490234375, 167.2120361328125, 411.59832763671875, -46.49015808105469, -330.2413330078125, 411.93017578125, -49.34403610229492, 734.1141357421875, 767.14892578125, -327.3455505371094, 92.67781829833984, 424.4267578125, -479.85321044921875, -237.06536865234375, -73.23821258544922, 1844.3818359375, 1002.0714721679688, 552.2706298828125, 547.693603515625, 1653.1002197265625, 500.49139404296875, 305.1964111328125, 1362.510009765625, 602.0093383789062, -104.94047546386719, 667.7714233398438, 133.78036499023438, -108.59466552734375, -150.4820098876953, 1177.3243408203125, 405.83013916015625, 65.30038452148438, 32.383880615234375, -13.147697448730469, 875.25927734375, 197.99057006835938, 284.5660705566406, 403.8204040527344, 492.8372802734375, 706.4310913085938, 710.2256469726562, -233.14382934570312, -592.7657470703125, 853.4788208007812, 310.89923095703125, 1652.282470703125, 1246.57666015625, -246.82630920410156, 23.18035888671875, 1410.265869140625, 580.0844116210938, 270.2342834472656, -60.65392303466797, -400.50201416015625, 571.474609375, -369.3717041015625, 1814.9443359375, 1273.720703125, 77.71139526367188, -356.27569580078125, -391.11199951171875, -183.21551513671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000654.npy"} +{"epoch": 0.960352422907489, "step": 655, "batch_size": 64, "mean": 377.6003723144531, "std": 614.0530395507812, "min": -756.1159057617188, "p10": -313.75294189453126, "median": 279.36878967285156, "p90": 1177.7396484375001, "max": 2197.7119140625, "pos_frac": 0.703125, "sample": [-314.46588134765625, -756.1159057617188, 1303.354736328125, 145.83291625976562, 84.55945587158203, 1218.6226806640625, 630.1790771484375, 887.7733154296875, 374.1467590332031, -227.81341552734375, 791.1677856445312, 52.64292907714844, -145.89581298828125, -549.40771484375, -27.790624618530273, 1191.428466796875, -174.05026245117188, 849.53759765625, -247.8730926513672, 1049.248779296875, -367.095703125, 413.1106872558594, 890.3428955078125, -619.6116943359375, -312.08941650390625, 585.6138916015625, 312.6670837402344, 83.45645141601562, -98.39878845214844, 259.9561767578125, -76.8446273803711, 1612.9022216796875, 197.01063537597656, 273.9009094238281, 592.691650390625, 847.8341674804688, -629.1616821289062, 756.1807861328125, 631.5032958984375, 1039.9708251953125, 347.4626159667969, 73.21195220947266, 587.300048828125, 2197.7119140625, 1588.2838134765625, -107.0450439453125, 360.9333801269531, -510.0290832519531, 1039.105224609375, 508.31689453125, -121.76786804199219, 92.756591796875, 24.36505126953125, 220.09971618652344, 1145.799072265625, 284.836669921875, 1587.9156494140625, -193.0015411376953, -86.12800598144531, 554.9601440429688, 741.1234741210938, 227.6324920654297, 158.92706298828125, 914.6333618164062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000655.npy"} +{"epoch": 0.9618208516886931, "step": 656, "batch_size": 64, "mean": 647.600830078125, "std": 710.2678833007812, "min": -492.44647216796875, "p10": -81.57827377319336, "median": 440.85150146484375, "p90": 1740.7328247070313, "max": 2639.280029296875, "pos_frac": 0.828125, "sample": [1500.8692626953125, 804.521728515625, -492.44647216796875, -38.90004348754883, 124.54914093017578, 244.2481689453125, 356.49530029296875, 574.1377563476562, 278.9208679199219, 768.6184692382812, 895.41943359375, 766.2289428710938, 304.48602294921875, 129.71975708007812, 478.3598327636719, 1691.42529296875, -147.81939697265625, 83.18978881835938, 1761.8646240234375, 170.5831298828125, 845.6357421875, -22.2315673828125, 2101.3623046875, -157.34307861328125, -115.6038818359375, 264.10113525390625, 951.3740234375, 1017.0642700195312, 449.0430908203125, 1104.761962890625, 397.845458984375, 2639.280029296875, 706.0693359375, -80.54698181152344, 2317.357177734375, 849.2672729492188, 432.659912109375, 1096.5533447265625, 2400.112060546875, 1.983367919921875, 2083.12255859375, -172.58311462402344, 1305.8323974609375, 52.138427734375, 1097.218505859375, 991.2721557617188, 341.65802001953125, 2129.10400390625, 308.2835693359375, 190.06973266601562, 759.4057006835938, 118.11515808105469, -127.43204498291016, 311.17547607421875, 925.933837890625, 146.15969848632812, -21.692642211914062, 279.95068359375, -82.02025604248047, 1062.6339111328125, 851.7325439453125, 37.34796905517578, 654.6924438476562, 751.11865234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000656.npy"} +{"epoch": 0.9632892804698973, "step": 657, "batch_size": 64, "mean": 519.62646484375, "std": 690.0695190429688, "min": -1566.7039794921875, "p10": -180.05298004150382, "median": 454.6181945800781, "p90": 1328.7445556640625, "max": 2481.67431640625, "pos_frac": 0.84375, "sample": [1161.83349609375, 29.56366729736328, 852.8792114257812, 491.48358154296875, 1490.2408447265625, 1544.71728515625, 690.0067749023438, 128.6355438232422, 1300.7529296875, -105.997314453125, -321.00018310546875, 464.42724609375, 1340.740966796875, -211.79112243652344, 253.5760955810547, 501.16534423828125, 61.59025573730469, 387.9606628417969, 128.3812255859375, 1625.538330078125, 2274.475830078125, 442.50250244140625, 69.35417938232422, 2481.67431640625, 1254.9295654296875, 1183.504150390625, 251.61500549316406, 74.61163330078125, 737.4156494140625, 632.7320556640625, 850.7255249023438, 486.89996337890625, 34.012733459472656, -1566.7039794921875, 573.8799438476562, 489.3936767578125, 268.06695556640625, -262.54876708984375, -6.82476806640625, 315.41314697265625, 614.8302001953125, -862.8682861328125, -277.31573486328125, 578.3494262695312, 859.9892578125, 57.544151306152344, 252.9574432373047, 613.4559936523438, 437.502685546875, 18.19416046142578, 720.01416015625, 763.8618774414062, 213.5899200439453, 835.9649658203125, 444.80914306640625, 351.3070373535156, 1256.2091064453125, 2437.70751953125, 419.4740905761719, -276.0984802246094, 274.2052917480469, -32.80952453613281, 628.9171142578125, 526.4705200195312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000657.npy"} +{"epoch": 0.9647577092511013, "step": 658, "batch_size": 64, "mean": 418.6898193359375, "std": 605.3524780273438, "min": -1120.515380859375, "p10": -363.9357055664062, "median": 425.72630310058594, "p90": 1244.3431152343753, "max": 1948.28759765625, "pos_frac": 0.75, "sample": [732.5283813476562, 820.34765625, 744.5323486328125, -301.688720703125, 490.8882141113281, -254.25408935546875, 1171.1846923828125, 268.328125, -378.400634765625, 440.01385498046875, 430.005126953125, 1400.020751953125, 85.01170349121094, 237.66366577148438, 289.44232177734375, -512.8408203125, 421.4474792480469, 353.5807800292969, 93.2510757446289, -82.66596984863281, 624.754638671875, 147.1123046875, 459.81085205078125, 154.36056518554688, -1120.515380859375, 919.2032470703125, -355.4351501464844, 592.6397705078125, 573.6879272460938, -153.06649780273438, -125.19688415527344, 293.66375732421875, -57.34207534790039, 404.75634765625, 280.201416015625, 1539.7550048828125, -56.80762481689453, 1130.8048095703125, 666.869384765625, 1144.4637451171875, 20.770099639892578, 410.7756042480469, 678.7032470703125, 1497.67431640625, 1273.6573486328125, 648.0431518554688, -444.9978332519531, 534.5177612304688, 860.4110717773438, 856.631103515625, -477.7371826171875, 1358.1143798828125, -363.82086181640625, 1948.28759765625, 712.0125122070312, 1175.9432373046875, 634.503173828125, 1418.7822265625, -363.98492431640625, 699.0048828125, 139.8687286376953, 504.4674987792969, 202.464111328125, -640.0594482421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000658.npy"} +{"epoch": 0.9662261380323054, "step": 659, "batch_size": 64, "mean": 402.4836120605469, "std": 595.201171875, "min": -889.4042358398438, "p10": -280.9352325439453, "median": 370.58692932128906, "p90": 1110.7233764648438, "max": 2501.0693359375, "pos_frac": 0.734375, "sample": [609.943603515625, -63.885955810546875, 215.304443359375, 815.6600341796875, -261.4331359863281, -289.29327392578125, 1211.8475341796875, -395.74676513671875, -504.4663391113281, -143.7119903564453, 1063.1451416015625, 385.6800537109375, 569.017578125, -150.8011932373047, 123.40211486816406, 156.48089599609375, 602.3291625976562, 166.49710083007812, 241.5020751953125, 197.46194458007812, 352.3764953613281, 566.2406005859375, -484.3272399902344, 2501.0693359375, -889.4042358398438, 1332.700927734375, 367.0505676269531, 659.916259765625, 1120.587158203125, 326.61785888671875, 406.2739562988281, 490.6435852050781, 0.190948486328125, -64.80232238769531, 678.9066162109375, 347.024658203125, 82.26860046386719, -19.067501068115234, 60.735198974609375, 1141.6959228515625, -550.9495849609375, -43.20232391357422, 660.6723022460938, -154.0517120361328, 760.9125366210938, -42.208377838134766, -27.87834930419922, 680.590087890625, 1717.472412109375, 1905.730224609375, 876.5530395507812, 232.01734924316406, 810.7420654296875, 499.8519592285156, 535.54638671875, 374.123291015625, 903.2140502929688, 1087.7078857421875, 542.24560546875, -332.7541809082031, 785.0807495117188, 81.94496154785156, 467.5821533203125, 462.37939453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000659.npy"} +{"epoch": 0.9676945668135095, "step": 660, "batch_size": 64, "mean": 525.3436279296875, "std": 653.158203125, "min": -1113.2904052734375, "p10": -231.09403076171873, "median": 500.4145965576172, "p90": 1461.6256958007818, "max": 2336.880126953125, "pos_frac": 0.765625, "sample": [-142.36973571777344, 228.3193359375, 1129.8052978515625, 509.6623229980469, 455.269775390625, 500.193115234375, 634.2451171875, 1521.7564697265625, 1086.62548828125, 821.7816772460938, 310.818359375, -331.54638671875, 1231.32470703125, 508.54559326171875, 926.3031005859375, 1705.068115234375, -199.5081329345703, -26.437088012695312, -262.95562744140625, -95.54757690429688, 1115.8438720703125, 1522.9049072265625, 51.40656280517578, 500.6360778808594, 805.8099975585938, 1944.558837890625, 381.0941162109375, -189.79246520996094, 783.6351318359375, 448.3289794921875, 214.54934692382812, 1321.320556640625, -469.5140380859375, 2336.880126953125, 351.6147155761719, 626.5858764648438, 784.3666381835938, 7.8192901611328125, 392.9866638183594, 820.7202758789062, 149.00193786621094, 203.00732421875, 657.0288696289062, 815.0504150390625, -153.03668212890625, -57.838104248046875, 164.43157958984375, 1638.29248046875, 910.0960083007812, 1131.2828369140625, -244.63084411621094, -0.6828994750976562, -568.8862915039062, 676.525634765625, 1014.55419921875, 563.4973754882812, 140.14187622070312, 319.2079772949219, 591.156982421875, 1837.8868408203125, -247.68408203125, -1113.2904052734375, 419.1101989746094, 514.656494140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000660.npy"} +{"epoch": 0.9691629955947136, "step": 661, "batch_size": 64, "mean": 465.8528137207031, "std": 661.1007080078125, "min": -806.78857421875, "p10": -113.97747268676753, "median": 391.78802490234375, "p90": 1149.876892089845, "max": 3360.220458984375, "pos_frac": 0.8125, "sample": [162.77122497558594, -68.1358413696289, 818.8956298828125, -12.292924880981445, 407.2716064453125, 236.46507263183594, 512.14892578125, -172.34945678710938, 686.2684326171875, 356.85870361328125, 1393.1763916015625, 401.154296875, 690.7369995117188, 599.38720703125, 3360.220458984375, 869.552734375, 163.53781127929688, 36.2469482421875, -806.78857421875, 143.44276428222656, -27.104839324951172, 807.7804565429688, 161.91848754882812, 408.2455749511719, 209.897705078125, 234.8673858642578, 323.05889892578125, 895.97265625, 1315.66064453125, 57.66717529296875, 367.88226318359375, 691.99658203125, 537.517822265625, 1258.6929931640625, 501.1600646972656, 339.1458740234375, 207.6007080078125, 344.4885559082031, 238.94424438476562, 451.55816650390625, -520.1796264648438, 382.4217529296875, -133.62388610839844, 818.0401000976562, 759.2701416015625, -675.5877685546875, 743.263916015625, 677.346923828125, 42.04106903076172, 735.41259765625, 2769.395751953125, -671.5199584960938, 297.2652587890625, -2.4284229278564453, 752.4703369140625, 1468.397705078125, 118.37065887451172, 612.6241455078125, -219.36358642578125, 1323.5006103515625, 483.4501953125, 516.7893676757812, 432.1614990234375, -0.45738983154296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000661.npy"} +{"epoch": 0.9706314243759178, "step": 662, "batch_size": 64, "mean": 366.068359375, "std": 672.4151000976562, "min": -1134.043701171875, "p10": -433.985546875, "median": 388.0619201660156, "p90": 1119.0428222656253, "max": 2386.697509765625, "pos_frac": 0.71875, "sample": [37.404762268066406, 810.7509765625, 1154.850341796875, -1134.043701171875, 609.3429565429688, -433.87933349609375, 456.5693359375, 874.48876953125, 1830.900634765625, 27.181678771972656, 862.9450073242188, -434.03106689453125, -1081.177001953125, -200.3328399658203, 1452.6690673828125, 72.48168182373047, -19.423593521118164, 324.9965515136719, 2386.697509765625, 811.4171752929688, 1242.976318359375, -173.5159912109375, 392.2413330078125, -178.31683349609375, 185.31399536132812, 856.116455078125, 126.58085632324219, 992.426025390625, 428.4107971191406, 211.57952880859375, 308.82171630859375, -173.41717529296875, 401.1600341796875, -208.71636962890625, 568.6658325195312, 1035.491943359375, 583.774658203125, 383.88250732421875, 674.5719604492188, 665.3795166015625, 35.79814147949219, 111.03547668457031, 846.21630859375, 1915.4981689453125, 778.1521606445312, 1271.099853515625, -766.6363525390625, 462.443115234375, 1006.0492553710938, 338.72015380859375, -589.453369140625, 67.8302001953125, -97.06517028808594, 552.78369140625, -545.7623291015625, -233.49710083007812, 559.5217895507812, 566.7042236328125, 362.6252746582031, 592.4962158203125, 632.6000366210938, -747.2924194335938, -332.2283935546875, -92.49946594238281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000662.npy"} +{"epoch": 0.9720998531571219, "step": 663, "batch_size": 64, "mean": 433.2557373046875, "std": 737.7041015625, "min": -1685.197509765625, "p10": -215.95414581298823, "median": 291.51329040527344, "p90": 1184.59794921875, "max": 3112.15087890625, "pos_frac": 0.75, "sample": [834.301025390625, 333.255126953125, 828.6139526367188, 43.39265441894531, 357.2908935546875, 1069.7467041015625, 217.46897888183594, 258.4734191894531, -60.03662109375, 412.4274597167969, 117.75676727294922, 154.00714111328125, 1523.943359375, 842.69921875, 64.02555847167969, 891.4303588867188, 692.3046875, 77.18970489501953, -1685.197509765625, 176.51995849609375, 372.01031494140625, 617.6212768554688, -242.55120849609375, 717.1488647460938, 696.568115234375, 380.46588134765625, 595.028564453125, 175.39663696289062, 455.409423828125, 3112.15087890625, -382.73968505859375, 1147.1094970703125, 1672.810791015625, -8.743881225585938, 610.2477416992188, 2244.854248046875, 1615.9678955078125, 114.84428405761719, -4.827848434448242, 1832.4339599609375, 1081.554931640625, -372.0769348144531, 84.366943359375, 324.55316162109375, -116.58294677734375, 562.4750366210938, 1143.718017578125, 156.48207092285156, 159.14718627929688, 1070.382568359375, 174.46141052246094, -25.971412658691406, 1200.6644287109375, -1115.621337890625, 946.5753173828125, -27.236469268798828, -518.997314453125, -153.8943328857422, 92.73822021484375, 677.0507202148438, -484.18017578125, -89.22595977783203, -83.09671020507812, 170.26446533203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000663.npy"} +{"epoch": 0.973568281938326, "step": 664, "batch_size": 64, "mean": 498.7710266113281, "std": 679.9046020507812, "min": -832.9625854492188, "p10": -190.6662628173828, "median": 428.28868103027344, "p90": 1297.7966918945315, "max": 2371.5048828125, "pos_frac": 0.75, "sample": [1117.899169921875, 38.604949951171875, 582.326904296875, 688.5014038085938, 867.0596923828125, 14.798484802246094, 306.5361633300781, 2012.0172119140625, -166.17019653320312, 1495.677001953125, -437.248779296875, 766.913330078125, 459.49755859375, 289.4939270019531, 601.9948120117188, 240.2811737060547, -726.7860107421875, 487.626708984375, -54.285423278808594, 537.98291015625, 1260.02685546875, 1313.9837646484375, 61.4161376953125, 1171.17138671875, -226.06204223632812, 175.90951538085938, 2021.60888671875, -200.81820678710938, 523.82666015625, -472.3240661621094, 1169.8836669921875, 120.77902221679688, -37.68414306640625, 1151.30517578125, 533.7971801757812, 399.3593444824219, -832.9625854492188, 426.9016418457031, 707.2197265625, 1158.084228515625, 99.61964416503906, 559.88134765625, 774.3766479492188, -166.9783935546875, 1006.9566040039062, 2292.681640625, 837.5374755859375, 242.7294921875, 2371.5048828125, -162.97430419921875, -148.2977294921875, 1343.597412109375, 1154.06201171875, 231.2456512451172, 249.33419799804688, 541.670166015625, 173.38931274414062, 304.1992492675781, -126.03211975097656, -93.82382202148438, 429.67572021484375, -16.982635498046875, 876.8949584960938, -401.06427001953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000664.npy"} +{"epoch": 0.9750367107195301, "step": 665, "batch_size": 64, "mean": 413.22760009765625, "std": 532.580078125, "min": -677.846923828125, "p10": -163.00472640991208, "median": 356.2386474609375, "p90": 1072.6174316406255, "max": 1917.069580078125, "pos_frac": 0.796875, "sample": [-179.24978637695312, 7.847499847412109, 435.0376281738281, 326.3917541503906, -125.0995864868164, 467.73150634765625, 835.5186767578125, -72.05072021484375, 1506.523681640625, 452.78424072265625, 334.6875915527344, 560.227294921875, 150.76730346679688, -599.2620239257812, 659.3967895507812, 5.936616897583008, -19.4991455078125, 190.62698364257812, 13.484634399414062, 565.5139770507812, 701.5594482421875, 138.03077697753906, 475.91265869140625, 359.3812561035156, -29.401105880737305, 267.1313171386719, -677.846923828125, 1384.493408203125, 768.9273681640625, -27.00156021118164, 718.2984619140625, 369.2063293457031, 693.760498046875, 410.2970886230469, 1131.80224609375, 177.47296142578125, 1173.316650390625, 550.8421630859375, 353.0960388183594, 252.17637634277344, 270.0343322753906, 1791.08740234375, 487.3249816894531, -367.93231201171875, 549.7288208007812, 875.4979248046875, 335.87066650390625, -394.36224365234375, 484.6865234375, 934.51953125, 99.15875244140625, 172.85069274902344, 902.7578735351562, 212.28707885742188, 1744.757080078125, -241.0749053955078, 704.0020751953125, -233.54263305664062, 236.62060546875, 881.2057495117188, 1917.069580078125, 73.87843322753906, 417.1480712890625, -115.77664184570312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000665.npy"} +{"epoch": 0.9765051395007343, "step": 666, "batch_size": 64, "mean": 422.8899230957031, "std": 609.143798828125, "min": -1034.518310546875, "p10": -308.56541748046874, "median": 345.4820251464844, "p90": 1065.6788696289066, "max": 2593.01220703125, "pos_frac": 0.765625, "sample": [775.6533203125, 418.56365966796875, 697.0806274414062, 437.3017272949219, -444.9573974609375, 699.5975341796875, 1144.1810302734375, 1010.9683837890625, 153.15957641601562, 1006.2235107421875, -37.08326721191406, 260.1238708496094, 393.9250183105469, -473.45263671875, -321.80401611328125, 275.085205078125, 283.1988525390625, -221.38629150390625, -52.76092529296875, -72.22357177734375, 1417.5543212890625, 8.756904602050781, 749.498291015625, 515.2109375, 2093.678466796875, -43.61775207519531, 790.9752197265625, 319.6485595703125, -113.49686431884766, -684.5401000976562, 322.9299621582031, 670.7093505859375, 369.71734619140625, 1089.126220703125, 915.83740234375, 428.6846923828125, 877.5343017578125, -365.6817321777344, 345.5111083984375, 463.7142639160156, 64.48275756835938, 794.76025390625, -277.67535400390625, 1297.70849609375, 492.4940185546875, 224.0245361328125, -1034.518310546875, 1006.166015625, 374.4557189941406, 92.40243530273438, 345.45294189453125, 728.0211791992188, 977.8480224609375, -13.111812591552734, 155.28131103515625, 176.42617797851562, 257.16900634765625, 336.0851745605469, 2593.01220703125, 217.0197296142578, -389.6885681152344, 895.3175659179688, 321.2774963378906, 1327.3997802734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000666.npy"} +{"epoch": 0.9779735682819384, "step": 667, "batch_size": 64, "mean": 500.7984313964844, "std": 651.6002197265625, "min": -723.3258056640625, "p10": -286.63959960937495, "median": 467.1950988769531, "p90": 1241.1739746093754, "max": 2263.717529296875, "pos_frac": 0.78125, "sample": [430.1700134277344, -539.2333374023438, 679.6380615234375, -699.628662109375, -69.12619018554688, 975.6906127929688, 1009.771484375, 576.1102294921875, 558.2239379882812, 1279.5003662109375, 256.11767578125, 98.81576538085938, 106.1041030883789, 483.6962890625, 298.54327392578125, -723.3258056640625, 720.4022216796875, 582.46044921875, 1740.6884765625, 902.9332885742188, 796.1853637695312, 221.91024780273438, 1151.7457275390625, 121.13065338134766, 586.9299926757812, 446.23907470703125, 1013.1357421875, -104.82848358154297, 1058.6290283203125, 760.6329956054688, 858.501953125, 640.8128051757812, 923.6598510742188, 671.6336669921875, 354.3490905761719, -267.0516052246094, 385.03887939453125, 450.69390869140625, 369.0933532714844, -41.68457794189453, -85.88975524902344, 1579.275634765625, 1848.2384033203125, 1.8789443969726562, 741.6116943359375, 1510.9017333984375, 661.2941284179688, 236.24972534179688, 166.5206298828125, 749.27587890625, -2.1271133422851562, 882.8641357421875, -418.7113037109375, 205.78309631347656, 2263.717529296875, 2195.3623046875, 80.23662567138672, 1025.251953125, -657.5350952148438, 359.5194396972656, -701.2521362304688, -295.0344543457031, -57.751007080078125, 697.1080932617188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000667.npy"} +{"epoch": 0.9794419970631424, "step": 668, "batch_size": 64, "mean": 340.3535461425781, "std": 753.9414672851562, "min": -1319.647705078125, "p10": -508.7701965332031, "median": 289.470458984375, "p90": 1208.374841308594, "max": 2700.5869140625, "pos_frac": 0.703125, "sample": [507.1441955566406, 281.54974365234375, 1343.2490234375, -1218.08740234375, 209.0114288330078, 875.5473022460938, 1392.456298828125, 281.9114990234375, 1072.53759765625, 480.01904296875, -650.4995727539062, 657.4541625976562, 43.94194793701172, 86.03765869140625, 400.55279541015625, 1065.9527587890625, 211.2790069580078, 80.91395568847656, -477.5869140625, -549.7451782226562, -1018.1416015625, 1040.379150390625, -470.332275390625, 914.5549926757812, -143.12338256835938, 649.4835815429688, 351.6813049316406, 436.7830505371094, 38.70695495605469, 674.6400756835938, 320.27294921875, 645.2105102539062, 1186.486083984375, -457.9447937011719, 974.5076293945312, 404.06488037109375, -917.4804077148438, 1030.3824462890625, 112.27350616455078, -231.21798706054688, 146.07464599609375, -73.22016143798828, 1397.879638671875, -84.29747772216797, -522.1344604492188, 1134.8077392578125, -93.80889892578125, -424.31878662109375, 336.9375915527344, -42.612831115722656, 382.05755615234375, 204.9959716796875, 297.0294189453125, 272.1805725097656, -212.77383422851562, 1217.7557373046875, 390.7718200683594, 344.46612548828125, 2295.02783203125, -1319.647705078125, -73.0693588256836, 1815.113037109375, 58.0006103515625, 2700.5869140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000668.npy"} +{"epoch": 0.9809104258443465, "step": 669, "batch_size": 64, "mean": 482.85858154296875, "std": 653.193603515625, "min": -1325.0335693359375, "p10": -215.3625427246093, "median": 414.95762634277344, "p90": 1405.1581787109378, "max": 2485.71533203125, "pos_frac": 0.8125, "sample": [176.34275817871094, 54.5469970703125, -69.8462142944336, 1008.4764404296875, -875.6448364257812, 286.45318603515625, 626.2324829101562, 649.8972778320312, 390.2933654785156, -61.99340057373047, 2485.71533203125, -239.46255493164062, 26.818618774414062, 305.8250732421875, -298.3246765136719, 442.2265930175781, 1467.6376953125, -84.43798065185547, 53.14398956298828, 195.8389129638672, 559.12353515625, 795.220703125, 1442.7408447265625, 1288.10107421875, 115.91105651855469, 1726.802001953125, 78.6292724609375, 1428.4749755859375, 1012.31103515625, 55.75821304321289, 589.1480712890625, 1208.2276611328125, -159.12918090820312, 564.0281982421875, 1478.608642578125, -587.4523315429688, -264.1581726074219, 744.7703247070312, 694.564453125, 802.3864135742188, 1350.7523193359375, 360.68731689453125, 172.77069091796875, 349.4142761230469, -1325.0335693359375, 794.1932373046875, 923.6818237304688, 656.771240234375, 417.24755859375, 266.47808837890625, -730.6643676757812, 1013.4323120117188, 412.6676940917969, 475.4321594238281, 676.6248779296875, 769.5625610351562, 49.6303596496582, 274.9455871582031, 499.2525329589844, 1291.216064453125, -12.30593490600586, 1452.7685546875, 349.1826477050781, 300.4357604980469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000669.npy"} +{"epoch": 0.9823788546255506, "step": 670, "batch_size": 64, "mean": 489.8502502441406, "std": 676.6763305664062, "min": -1392.3499755859375, "p10": -300.7358154296875, "median": 355.1375427246094, "p90": 1427.1839965820316, "max": 1819.99658203125, "pos_frac": 0.765625, "sample": [164.29751586914062, 501.4833984375, 247.48565673828125, 1357.416259765625, 451.7076110839844, -193.46017456054688, -202.74624633789062, -111.5252914428711, 1098.7005615234375, 544.1266479492188, 342.17242431640625, -85.94085693359375, -1392.3499755859375, 1819.99658203125, 1268.658935546875, 1751.531494140625, 697.0822143554688, 319.5837707519531, 1217.705078125, 640.0830078125, 118.91871643066406, -551.8788452148438, 1374.1748046875, -320.3846435546875, -562.6629028320312, 368.1026611328125, 1496.9658203125, 53.268009185791016, 1092.9810791015625, -450.6227111816406, 1498.76416015625, 812.7651977539062, -217.17408752441406, 233.23858642578125, 183.30056762695312, 1449.9022216796875, 1180.03173828125, -258.216064453125, 105.87265014648438, 1330.2734375, 423.8980712890625, 1207.1009521484375, 826.507568359375, -301.14483642578125, 1767.54248046875, 879.8587036132812, 297.34771728515625, 337.9877624511719, -299.78143310546875, -319.6204833984375, 14.249664306640625, 1466.77490234375, 176.61175537109375, 262.06658935546875, 1270.7540283203125, 169.58868408203125, 283.6031494140625, 1046.836669921875, -29.0986328125, 841.1270751953125, 448.8236999511719, 559.3953247070312, 45.68023681640625, 600.678466796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000670.npy"} +{"epoch": 0.9838472834067548, "step": 671, "batch_size": 64, "mean": 403.0694885253906, "std": 660.3814086914062, "min": -1592.95166015625, "p10": -218.1745468139648, "median": 378.3988037109375, "p90": 1274.9480224609376, "max": 1948.1341552734375, "pos_frac": 0.75, "sample": [-6.6175689697265625, -1592.95166015625, 458.1715393066406, 83.89578247070312, -131.52203369140625, 702.186279296875, 228.87545776367188, 385.31243896484375, 607.7635498046875, 397.26873779296875, 425.03485107421875, -76.51065826416016, 155.30348205566406, 1095.225341796875, 1027.2119140625, 62.40293884277344, 199.46327209472656, 253.86444091796875, 886.233642578125, 780.4577026367188, 129.84909057617188, 1490.740966796875, -1452.375, 1263.42724609375, 809.0897827148438, -44.17890930175781, 1940.66943359375, 453.3096618652344, 279.123046875, 594.2556762695312, 21.29281997680664, -621.224365234375, -478.08740234375, 505.97137451171875, 1288.4788818359375, -174.70086669921875, 371.48516845703125, 240.47024536132812, 715.063720703125, 1279.885498046875, 1347.950927734375, 415.6697692871094, -230.6935272216797, 91.9177474975586, 811.7938232421875, -18.702835083007812, 302.4859313964844, 933.3397216796875, -334.2149353027344, -65.6666030883789, 797.0186767578125, 428.6191101074219, -96.10980987548828, 255.33148193359375, 1257.9156494140625, 769.6856079101562, -188.96359252929688, -619.4912719726562, 250.87147521972656, 1379.1580810546875, 407.45599365234375, 1948.1341552734375, 1056.130615234375, 343.1943664550781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000671.npy"} +{"epoch": 0.9853157121879589, "step": 672, "batch_size": 64, "mean": 410.8834228515625, "std": 752.448486328125, "min": -1288.2999267578125, "p10": -549.0892730712889, "median": 393.7780456542969, "p90": 1259.2124877929691, "max": 2723.106689453125, "pos_frac": 0.703125, "sample": [395.748046875, -103.96798706054688, -294.0145263671875, 210.60635375976562, -141.99342346191406, 86.56593322753906, 847.4580078125, 451.087158203125, 1744.4349365234375, 400.7062072753906, 91.96709442138672, -125.15869903564453, -389.0892639160156, -345.5241394042969, 514.002685546875, 255.32754516601562, -132.05532836914062, -647.1617431640625, 430.04595947265625, 1720.47607421875, 92.90031433105469, 781.376220703125, 19.01313018798828, -122.42709350585938, 1185.3748779296875, -1288.2999267578125, 1134.6527099609375, -926.572021484375, 498.5713195800781, 2723.106689453125, -141.09730529785156, 976.7485961914062, 977.1478271484375, 1114.489501953125, 1177.431396484375, 724.152587890625, 610.9854125976562, -617.6607055664062, 175.81777954101562, 2048.20556640625, -964.80078125, -11.803550720214844, 378.44061279296875, 1288.6287841796875, 881.6281127929688, 671.031494140625, 923.7061157226562, 205.20172119140625, 882.9114379882812, 663.8466186523438, 1562.4140625, 237.41506958007812, -162.46856689453125, 663.8345947265625, 86.479248046875, 811.9414672851562, 1190.574462890625, 113.72386932373047, -80.25444030761719, -763.0836181640625, 590.841064453125, 391.80804443359375, 1403.285888671875, -782.1412353515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000672.npy"} +{"epoch": 0.986784140969163, "step": 673, "batch_size": 64, "mean": 382.5746765136719, "std": 615.8585205078125, "min": -1285.218017578125, "p10": -278.36803283691404, "median": 384.06573486328125, "p90": 1067.8050292968758, "max": 2976.3876953125, "pos_frac": 0.78125, "sample": [-168.8763885498047, -556.5552368164062, 487.8465881347656, 896.39697265625, 287.489501953125, 493.530517578125, 714.571533203125, 355.5478820800781, 1273.83154296875, 476.0246276855469, -147.24717712402344, 1442.9886474609375, 488.0585021972656, 573.9228515625, 112.60514831542969, 167.3570556640625, -462.04150390625, 834.8611450195312, 1556.48046875, 340.4400634765625, -298.28021240234375, 349.64984130859375, 339.34527587890625, -76.32359313964844, 115.02799987792969, -1285.218017578125, 248.76907348632812, 611.4496459960938, -505.3646545410156, 171.85182189941406, 775.254638671875, 1400.9649658203125, 147.64259338378906, -32.47206497192383, 687.9336547851562, 588.5983276367188, 409.80487060546875, -631.5573120117188, 514.184814453125, 779.3914794921875, 795.323974609375, 85.35831451416016, 236.52828979492188, 679.141357421875, 445.45849609375, 72.53900909423828, 495.4972229003906, -146.4430389404297, 434.6453552246094, 2976.3876953125, 132.73696899414062, 107.09164428710938, 536.3706665039062, 633.546142578125, -665.0072021484375, 554.3048095703125, 448.197021484375, 1141.265625, 118.75726318359375, -231.90628051757812, -43.067344665527344, 617.45556640625, 1224.3853759765625, 358.32659912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000673.npy"} +{"epoch": 0.9882525697503671, "step": 674, "batch_size": 64, "mean": 491.29443359375, "std": 670.5330810546875, "min": -585.914794921875, "p10": -274.142431640625, "median": 424.4930114746094, "p90": 1127.567077636719, "max": 3157.691650390625, "pos_frac": 0.75, "sample": [-103.80712890625, 34.95112228393555, 866.1129760742188, 665.927001953125, 595.86962890625, 759.8340454101562, 34.72062683105469, -75.46272277832031, -504.7997741699219, 1142.0809326171875, 1334.7086181640625, 902.6949462890625, 75.78517150878906, 485.09320068359375, -279.18896484375, 419.74432373046875, -220.42893981933594, 2031.13427734375, 692.0784912109375, 3157.691650390625, 495.68341064453125, 454.4702453613281, -21.337787628173828, 1079.0560302734375, 429.24169921875, -262.3671875, 1081.14013671875, 491.032958984375, 609.2598266601562, -135.598388671875, 1093.701416015625, 2043.3104248046875, -126.74111938476562, 252.09503173828125, 363.38629150390625, 824.5908203125, -298.610595703125, -328.69818115234375, -368.4622497558594, 852.9804077148438, 594.8135986328125, 383.8999938964844, 339.234130859375, 1670.75537109375, 350.0727844238281, 875.7078857421875, 815.489990234375, 953.5886840820312, 557.8973388671875, 52.8646354675293, -585.914794921875, 216.73721313476562, 1856.6981201171875, -340.0126037597656, -3.3733768463134766, 660.3894653320312, 174.22518920898438, 712.335205078125, 396.5019836425781, 635.8399047851562, 151.55886840820312, -23.398359298706055, 158.23379516601562, 295.827880859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000674.npy"} +{"epoch": 0.9897209985315712, "step": 675, "batch_size": 64, "mean": 507.7829284667969, "std": 684.7032470703125, "min": -721.4531860351562, "p10": -144.97400817871093, "median": 367.55784606933594, "p90": 1369.0150390625001, "max": 2773.9462890625, "pos_frac": 0.78125, "sample": [275.84600830078125, 1376.721435546875, 570.5471801757812, 122.56605529785156, 40.80455780029297, 130.53907775878906, 819.9605102539062, -60.765045166015625, 587.3864135742188, 295.8379821777344, 823.6610717773438, -197.562255859375, 610.9923706054688, 1046.704345703125, -145.1607666015625, -34.87342834472656, 167.44061279296875, 1351.033447265625, 566.4815673828125, 484.4682922363281, 757.88037109375, -42.085731506347656, 2268.2216796875, 794.6336669921875, 38.22838592529297, 408.3672180175781, 8.899776458740234, 735.0967407226562, 568.273193359375, 489.73590087890625, 370.76580810546875, 1613.08056640625, 358.77203369140625, 1764.67431640625, -144.53823852539062, 1317.924560546875, 2773.9462890625, -242.2377471923828, 971.0690307617188, 2072.23681640625, 200.9665069580078, 81.65145874023438, -36.424110412597656, 119.94927978515625, 295.9423828125, -37.16363525390625, -721.4531860351562, 205.57638549804688, 655.4408569335938, 134.41323852539062, 143.7161407470703, 1562.703857421875, 743.5751953125, -563.710693359375, -614.6024169921875, 401.27301025390625, 79.6268310546875, 722.6748046875, -53.313201904296875, 1343.50244140625, 364.3498840332031, 1130.568115234375, 1059.6484375, -436.3777160644531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000675.npy"} +{"epoch": 0.9911894273127754, "step": 676, "batch_size": 64, "mean": 473.14080810546875, "std": 591.5277099609375, "min": -881.29736328125, "p10": -125.45591659545896, "median": 405.97882080078125, "p90": 1348.4051757812501, "max": 2309.787353515625, "pos_frac": 0.75, "sample": [12.081199645996094, 1600.6839599609375, 856.144775390625, 890.0971069335938, -24.715927124023438, -95.0938491821289, 417.3661193847656, 769.2666015625, 33.40665054321289, -881.29736328125, 500.2962646484375, 36.84002685546875, -26.817031860351562, 1264.7967529296875, 1741.3074951171875, 2309.787353515625, -74.14209747314453, -192.4457244873047, -56.17271423339844, -195.20553588867188, -33.764183044433594, 507.9552001953125, 610.9110107421875, -77.37026977539062, -138.46823120117188, 751.22119140625, 100.68170166015625, 1266.619384765625, 394.5915222167969, 1564.14599609375, -36.591949462890625, 691.3763427734375, 265.2863464355469, 713.468994140625, 157.6512451171875, 794.1083374023438, 1489.0048828125, 432.961669921875, 508.7579345703125, 1357.17919921875, 735.5420532226562, -12.999168395996094, 168.30918884277344, -351.82061767578125, 1352.855224609375, 1045.8359375, 65.12466430664062, 873.0034790039062, 520.6793212890625, 350.17596435546875, 794.6827392578125, 482.000732421875, 250.1738739013672, 388.748291015625, 47.359474182128906, 306.1488037109375, 876.2926635742188, -203.265625, 1338.021728515625, 132.1887969970703, 456.4854736328125, 190.71820068359375, -232.9993438720703, 501.83868408203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000676.npy"} +{"epoch": 0.9926578560939795, "step": 677, "batch_size": 64, "mean": 454.3424377441406, "std": 740.900634765625, "min": -888.8284912109375, "p10": -234.83784637451168, "median": 357.67608642578125, "p90": 1206.7333007812501, "max": 3911.780029296875, "pos_frac": 0.75, "sample": [227.358642578125, 2733.983642578125, 619.7821655273438, -37.871620178222656, 515.0523681640625, -250.0948486328125, -133.03729248046875, -338.1531677246094, 423.0880126953125, -96.79296875, 516.1799926757812, 539.905029296875, 37.82206726074219, 1248.915771484375, 312.00616455078125, 877.1361694335938, 758.9479370117188, 27.867996215820312, 278.23895263671875, 193.4103240966797, 1189.746337890625, 274.5358581542969, 1156.944580078125, -147.3566436767578, 347.38482666015625, 1300.04736328125, 48.55079650878906, 535.281982421875, 357.99395751953125, 1170.6090087890625, 357.35821533203125, -70.06588745117188, -199.23817443847656, 220.15924072265625, 267.4933776855469, -678.33642578125, 951.6279296875, 360.28399658203125, 3911.780029296875, 127.73388671875, 104.13370513916016, 510.73529052734375, 843.1228637695312, 707.8933715820312, -144.15408325195312, 1214.013427734375, 424.0644836425781, 233.49813842773438, 735.6649169921875, 129.64466857910156, 1331.950927734375, -40.51301956176758, 748.5584716796875, 390.9254455566406, 566.13330078125, -397.4954528808594, -888.8284912109375, 1752.10888671875, 538.6760864257812, 677.7469482421875, 752.90576171875, -299.64373779296875, -738.741455078125, -10.763275146484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000677.npy"} +{"epoch": 0.9941262848751835, "step": 678, "batch_size": 64, "mean": 533.0916748046875, "std": 723.9562377929688, "min": -1719.78564453125, "p10": -146.03075408935544, "median": 431.1685333251953, "p90": 1159.4658447265626, "max": 2627.169677734375, "pos_frac": 0.8125, "sample": [268.0325622558594, 935.142578125, 540.603271484375, 620.8671264648438, 513.5473022460938, -44.203521728515625, 551.2800903320312, -1719.78564453125, 120.21722412109375, -254.10696411132812, -89.17951965332031, 1174.6121826171875, 71.95487976074219, 1124.1243896484375, -52.176719665527344, 568.0963134765625, 881.7728881835938, -302.06201171875, 873.6029663085938, 877.5911254882812, 1051.3853759765625, 1082.251708984375, 15.596214294433594, 2233.02880859375, 1092.927734375, 2627.169677734375, 63.28874206542969, 283.633544921875, -13.404376983642578, 1118.8629150390625, -494.4621276855469, 176.26278686523438, 257.14141845703125, 785.7849731445312, 276.4571838378906, 1036.172607421875, 187.32064819335938, 1100.8282470703125, 293.56341552734375, 892.5184936523438, -155.98558044433594, 823.5193481445312, 1275.651123046875, 1641.9202880859375, 338.1663818359375, 259.3545837402344, 2300.935791015625, 2360.7724609375, 53.599788665771484, 572.7520141601562, 279.1295166015625, -659.1109619140625, 378.55035400390625, 23.467056274414062, 57.32592010498047, 105.43965148925781, 625.3677368164062, 1084.4849853515625, 602.8096313476562, 482.370361328125, 379.9667053222656, -122.80282592773438, 865.665283203125, -181.74252319335938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000678.npy"} +{"epoch": 0.9955947136563876, "step": 679, "batch_size": 64, "mean": 382.27972412109375, "std": 605.42822265625, "min": -721.9423828125, "p10": -290.47289886474607, "median": 288.74595642089844, "p90": 1303.9257202148438, "max": 2081.857666015625, "pos_frac": 0.71875, "sample": [576.9481811523438, 60.07221221923828, 1304.033203125, 2081.857666015625, 405.5057067871094, 713.7910766601562, 850.9905395507812, 461.0498352050781, 1303.6749267578125, 350.2202453613281, 462.9739990234375, 445.6465148925781, 421.3553161621094, 265.99810791015625, 994.50732421875, -418.6412658691406, -160.72702026367188, 123.37472534179688, 193.1436767578125, 940.5819091796875, 628.3192138671875, 1550.734619140625, -105.00679016113281, 104.72634887695312, 162.68836975097656, -320.4270324707031, 321.12890625, 1531.2288818359375, -437.1761169433594, 514.226806640625, 838.5325927734375, 64.47564697265625, -68.7821044921875, -32.32598114013672, 941.73486328125, 380.033203125, 1523.31103515625, 146.35865783691406, -198.78350830078125, 16.831275939941406, 183.47598266601562, -89.31379699707031, 474.9818420410156, -11.707000732421875, -307.0335388183594, 1477.572265625, 311.4938049316406, -159.60733032226562, -721.9423828125, -182.45046997070312, 875.4227294921875, -547.3265991210938, 592.132080078125, -251.83140563964844, 473.66302490234375, 262.6438903808594, -77.91958618164062, 192.17138671875, 786.8731689453125, 1927.356201171875, 204.11148071289062, 630.18896484375, -641.869873046875, 126.63379669189453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000679.npy"} +{"epoch": 0.9970631424375918, "step": 680, "batch_size": 64, "mean": 508.6307678222656, "std": 753.7247924804688, "min": -2368.30126953125, "p10": -207.59720916748043, "median": 377.6241455078125, "p90": 1590.0399047851568, "max": 2350.96435546875, "pos_frac": 0.828125, "sample": [1296.3240966796875, 351.5274963378906, 422.98858642578125, 933.2130126953125, 130.03208923339844, 1147.36181640625, 1782.72705078125, 579.7132568359375, 207.71243286132812, 389.7713317871094, 572.5926513671875, 176.1283416748047, 825.5936889648438, -410.1980895996094, 1064.0260009765625, 1476.6328125, 729.8150024414062, -601.4407958984375, -220.77650451660156, 874.3297119140625, 1139.38232421875, 199.80450439453125, -130.1898956298828, 331.3258056640625, -234.71505737304688, 1032.0106201171875, 888.0681762695312, 179.3724822998047, 630.9256591796875, 1713.23193359375, 182.5583953857422, 96.91455841064453, 156.85507202148438, 1747.037353515625, 1087.2939453125, 104.12748718261719, 166.20574951171875, 506.819580078125, 1850.8428955078125, -583.4601440429688, 123.61871337890625, 365.4769592285156, 485.53314208984375, 595.07958984375, 152.3799285888672, -2368.30126953125, 1762.2344970703125, -176.84552001953125, -530.1336059570312, 1470.2154541015625, 544.253662109375, 2350.96435546875, 21.014297485351562, 25.910751342773438, 195.80123901367188, 1432.8173828125, 723.747802734375, 1638.6429443359375, -40.15250015258789, 154.63082885742188, -129.75466918945312, 205.66055297851562, 30.554046630859375, 726.5357666015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000680.npy"} +{"epoch": 0.9985315712187959, "step": 681, "batch_size": 64, "mean": 399.1241760253906, "std": 649.6876831054688, "min": -895.2818603515625, "p10": -391.7209564208984, "median": 424.4937744140625, "p90": 1222.506518554688, "max": 2216.66552734375, "pos_frac": 0.671875, "sample": [748.1847534179688, -339.79937744140625, 582.5130615234375, 138.11358642578125, 901.71630859375, -895.2818603515625, -6.050323486328125, 464.4501647949219, -442.2178649902344, 267.6610107421875, -274.43316650390625, 385.4816589355469, -449.4920654296875, -429.92401123046875, 1278.3673095703125, 1697.7073974609375, 472.12420654296875, -118.93962860107422, -292.421630859375, 1050.0142822265625, -197.4716796875, -10.385345458984375, 819.551025390625, 463.5058898925781, 1749.572998046875, 666.4053344726562, 205.30496215820312, 1013.956787109375, 908.427490234375, 594.1490478515625, 146.7320556640625, 489.4520263671875, 255.5406494140625, -88.36013793945312, -166.4520263671875, 206.1329803466797, 258.12005615234375, -714.7716064453125, 487.2286071777344, -116.41911315917969, -371.00103759765625, 2216.66552734375, -672.9071044921875, 909.4166870117188, -115.46639251708984, 500.8175048828125, -400.6009216308594, 208.71873474121094, 59.83058166503906, -238.00555419921875, 947.5053100585938, -334.4187316894531, 942.0847778320312, 578.6897583007812, 1340.541748046875, 528.7022094726562, 1092.1646728515625, 1035.6976318359375, 1070.17138671875, 711.170166015625, 237.62315368652344, 1305.84033203125, 1425.8939208984375, 856.818603515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000681.npy"} diff --git a/margin_logs/step_0000001.npy b/margin_logs/step_0000001.npy new file mode 100644 index 0000000..248c095 --- /dev/null +++ b/margin_logs/step_0000001.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb7ed5e9b5d6de6c4e509dd17cf5d9c91337fabd0c174e116c5e60872823ad93 +size 384 diff --git a/margin_logs/step_0000002.npy b/margin_logs/step_0000002.npy new file mode 100644 index 0000000..984e4c2 --- /dev/null +++ b/margin_logs/step_0000002.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc23171824afa57340cda53f69d83aef67c7c0b95175e9ec4a3a7bc3c221bc4f +size 384 diff --git a/margin_logs/step_0000003.npy b/margin_logs/step_0000003.npy new file mode 100644 index 0000000..b5432e7 --- /dev/null +++ b/margin_logs/step_0000003.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63499eff08740a4dd0033cfeb24771a4a545281ae8a60a0bb50a2a0f2afd4667 +size 384 diff --git a/margin_logs/step_0000004.npy b/margin_logs/step_0000004.npy new file mode 100644 index 0000000..5acfcd9 --- /dev/null +++ b/margin_logs/step_0000004.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3e26ec4de370e55600e77cc5547db54529f7be2650ffaabdecc7187101c5bc2 +size 384 diff --git a/margin_logs/step_0000005.npy b/margin_logs/step_0000005.npy new file mode 100644 index 0000000..b2d8814 --- /dev/null +++ b/margin_logs/step_0000005.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cee3795fe4ea9d4de145c37135b410e049a1b3c5cdb1b9b5850a01a9708c658b +size 384 diff --git a/margin_logs/step_0000006.npy b/margin_logs/step_0000006.npy new file mode 100644 index 0000000..f35eb16 --- /dev/null +++ b/margin_logs/step_0000006.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7010bf993bbea21c4bf27eb1a8857e1caa0afe4861ef12ab639405d121f470a7 +size 384 diff --git a/margin_logs/step_0000007.npy b/margin_logs/step_0000007.npy new file mode 100644 index 0000000..8876b1b --- /dev/null +++ b/margin_logs/step_0000007.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb63cda7c89e748cead1673acc3b83b1789647661f612c5b51aa42542b3a190c +size 384 diff --git a/margin_logs/step_0000008.npy b/margin_logs/step_0000008.npy new file mode 100644 index 0000000..8f721c1 --- /dev/null +++ b/margin_logs/step_0000008.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97e6302fcf9351a2ad71d1cdd528b7b03c2cc38ae8d6b4308343c20d3e64d00f +size 384 diff --git a/margin_logs/step_0000009.npy b/margin_logs/step_0000009.npy new file mode 100644 index 0000000..dd6a042 --- /dev/null +++ b/margin_logs/step_0000009.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5fdadadb55efd6107f9de272ecffc4abdf68ec955a9e3b6798295afbaccbb18 +size 384 diff --git a/margin_logs/step_0000010.npy b/margin_logs/step_0000010.npy new file mode 100644 index 0000000..335de7a --- /dev/null +++ b/margin_logs/step_0000010.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f797e8eb4dd1505c31f06b82cf4420d0216fd8af97f1a99a8de066a7e18889e +size 384 diff --git a/margin_logs/step_0000011.npy b/margin_logs/step_0000011.npy new file mode 100644 index 0000000..1ef463f --- /dev/null +++ b/margin_logs/step_0000011.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4647acb8ee93ff22ee11f816b1391496eb10a219a4e42ef6a3b5e52fb738af59 +size 384 diff --git a/margin_logs/step_0000012.npy b/margin_logs/step_0000012.npy new file mode 100644 index 0000000..db6ab1a --- /dev/null +++ b/margin_logs/step_0000012.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23b5f7e456241277b1c480277a54be514aeb70cd4c0c1fd9504cd51577c21d8e +size 384 diff --git a/margin_logs/step_0000013.npy b/margin_logs/step_0000013.npy new file mode 100644 index 0000000..8ccf28d --- /dev/null +++ b/margin_logs/step_0000013.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab4137b4bc2a17315a8e5edb4d1e0abb3ae70387195d068ddb0810040759d244 +size 384 diff --git a/margin_logs/step_0000014.npy b/margin_logs/step_0000014.npy new file mode 100644 index 0000000..24b74ec --- /dev/null +++ b/margin_logs/step_0000014.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c03d41d403a871ce53469fa80c1ead4473e6a36f9bfad2bff8dfec6a05386a1 +size 384 diff --git a/margin_logs/step_0000015.npy b/margin_logs/step_0000015.npy new file mode 100644 index 0000000..83d4ab1 --- /dev/null +++ b/margin_logs/step_0000015.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e920caddc52edc85e56c6e74c7086fac6b2cb25e8d2cb5663c10722b165fc923 +size 384 diff --git a/margin_logs/step_0000016.npy b/margin_logs/step_0000016.npy new file mode 100644 index 0000000..cf60e5e --- /dev/null +++ b/margin_logs/step_0000016.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3491f1a945ae886098229ce149bae3ecda2fe592121f3a886066a50f7701971 +size 384 diff --git a/margin_logs/step_0000017.npy b/margin_logs/step_0000017.npy new file mode 100644 index 0000000..9fef39f --- /dev/null +++ b/margin_logs/step_0000017.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3359f13888d7043c55ddd8d002b98c1fac77ebbf27713eedda4ec579e2f188f +size 384 diff --git a/margin_logs/step_0000018.npy b/margin_logs/step_0000018.npy new file mode 100644 index 0000000..ace7598 --- /dev/null +++ b/margin_logs/step_0000018.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5c7281238da22eb4a59d6c559f54cab12635f40c035fface52b4c6833cd9937 +size 384 diff --git a/margin_logs/step_0000019.npy b/margin_logs/step_0000019.npy new file mode 100644 index 0000000..5bd6292 --- /dev/null +++ b/margin_logs/step_0000019.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85e88a5b7f682a3186f7e4490df97e651f54e7f1dcd86120aa95c589bb316bf1 +size 384 diff --git a/margin_logs/step_0000020.npy b/margin_logs/step_0000020.npy new file mode 100644 index 0000000..a07d0be --- /dev/null +++ b/margin_logs/step_0000020.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d1135052aa2af97fd2d20548eb14f607bbfbae2d8789081388fe760804d90e4 +size 384 diff --git a/margin_logs/step_0000021.npy b/margin_logs/step_0000021.npy new file mode 100644 index 0000000..b984408 --- /dev/null +++ b/margin_logs/step_0000021.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6046acda95afac392e30c017c4ae54bbbc3d2c44eb84e41c5b1d56432728cef3 +size 384 diff --git a/margin_logs/step_0000022.npy b/margin_logs/step_0000022.npy new file mode 100644 index 0000000..22edc84 --- /dev/null +++ b/margin_logs/step_0000022.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a46f7a670106e6c6c4878e84e2c8c3d5cfdb6d9ff24b7655529fcbe6dd9bb311 +size 384 diff --git a/margin_logs/step_0000023.npy b/margin_logs/step_0000023.npy new file mode 100644 index 0000000..704f88f --- /dev/null +++ b/margin_logs/step_0000023.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3317613dc06884a8b79e1a5d5d788ff9e2b529102de0c7afed3b49de5bc3609 +size 384 diff --git a/margin_logs/step_0000024.npy b/margin_logs/step_0000024.npy new file mode 100644 index 0000000..f656bf0 --- /dev/null +++ b/margin_logs/step_0000024.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d93bd395b23554cd4a8bb0a391df13f7d1f8ec9e361f0c1c7ce198259d7017a2 +size 384 diff --git a/margin_logs/step_0000025.npy b/margin_logs/step_0000025.npy new file mode 100644 index 0000000..d204f5a --- /dev/null +++ b/margin_logs/step_0000025.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee224805a5347f81c18a934b5a1d23eae5f928e6cecb60848e6c4b8e1e75a19d +size 384 diff --git a/margin_logs/step_0000026.npy b/margin_logs/step_0000026.npy new file mode 100644 index 0000000..4d5f6c7 --- /dev/null +++ b/margin_logs/step_0000026.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0004817de7fdaa407509c09b88ffcef70fa53e2fcf046f3dccfd99226196c7b8 +size 384 diff --git a/margin_logs/step_0000027.npy b/margin_logs/step_0000027.npy new file mode 100644 index 0000000..bd6a392 --- /dev/null +++ b/margin_logs/step_0000027.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:584d28a351e9a194a14473c6d2e752ce1078eef2b44608fdfae5e9955a3b4903 +size 384 diff --git a/margin_logs/step_0000028.npy b/margin_logs/step_0000028.npy new file mode 100644 index 0000000..56ee355 --- /dev/null +++ b/margin_logs/step_0000028.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fb244faeb91036bdae140597d5cc0f8da4ab20e61391000e39c1a3404a024e3 +size 384 diff --git a/margin_logs/step_0000029.npy b/margin_logs/step_0000029.npy new file mode 100644 index 0000000..2ead000 --- /dev/null +++ b/margin_logs/step_0000029.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:030f3bebeb7a2ee07523f86bbe6170b0093d9cb606e7ce4fe6f4cb1170628caa +size 384 diff --git a/margin_logs/step_0000030.npy b/margin_logs/step_0000030.npy new file mode 100644 index 0000000..de0102a --- /dev/null +++ b/margin_logs/step_0000030.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8981c9bc2824dc69997190dbd683aefef79f73b2d1e490908b61803ea72d0ca4 +size 384 diff --git a/margin_logs/step_0000031.npy b/margin_logs/step_0000031.npy new file mode 100644 index 0000000..3b6c35c --- /dev/null +++ b/margin_logs/step_0000031.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3b95a6030bd3f9cffa6daba32a33b44fbb953c67257b05d7fb32964adb9ccca +size 384 diff --git a/margin_logs/step_0000032.npy b/margin_logs/step_0000032.npy new file mode 100644 index 0000000..8a67c08 --- /dev/null +++ b/margin_logs/step_0000032.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58221de2f8f7c40af54c32d7fb50555b3dc4489f2cc0110d8ab7bd3aba35a54b +size 384 diff --git a/margin_logs/step_0000033.npy b/margin_logs/step_0000033.npy new file mode 100644 index 0000000..7235f6f --- /dev/null +++ b/margin_logs/step_0000033.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9acb3c7c2a2468fbbbc2deb63901d1423f18f296e20c2068349c549bdce1e8e +size 384 diff --git a/margin_logs/step_0000034.npy b/margin_logs/step_0000034.npy new file mode 100644 index 0000000..df088c2 --- /dev/null +++ b/margin_logs/step_0000034.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4708bbf3df4b15bcb3a5bf2b957175ddba56ab5603f231c45a1dde6c35946fec +size 384 diff --git a/margin_logs/step_0000035.npy b/margin_logs/step_0000035.npy new file mode 100644 index 0000000..0956ee4 --- /dev/null +++ b/margin_logs/step_0000035.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13747f8dee232725844d3ecbb74a7e6a5b924e26298bf811cf6a7da5a225590f +size 384 diff --git a/margin_logs/step_0000036.npy b/margin_logs/step_0000036.npy new file mode 100644 index 0000000..f42f3ac --- /dev/null +++ b/margin_logs/step_0000036.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11225cf441dac5477e973638491de0b0c203e78c323d74b053ed4141d08e02f9 +size 384 diff --git a/margin_logs/step_0000037.npy b/margin_logs/step_0000037.npy new file mode 100644 index 0000000..02d3830 --- /dev/null +++ b/margin_logs/step_0000037.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c0c64c2a3df229b78eae316049c143c999a40b4ddbd5076e4d50786772a9713 +size 384 diff --git a/margin_logs/step_0000038.npy b/margin_logs/step_0000038.npy new file mode 100644 index 0000000..424ee59 --- /dev/null +++ b/margin_logs/step_0000038.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8b74a986a9420467dc6e8f9fb93d6f859c0317d3a26319d2c0182dd681c86f7 +size 384 diff --git a/margin_logs/step_0000039.npy b/margin_logs/step_0000039.npy new file mode 100644 index 0000000..dbc20fa --- /dev/null +++ b/margin_logs/step_0000039.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88474811fbc088cc00cb996ca39ff9fc20417f6f031ad8697570584e98b35c1a +size 384 diff --git a/margin_logs/step_0000040.npy b/margin_logs/step_0000040.npy new file mode 100644 index 0000000..01a3a86 --- /dev/null +++ b/margin_logs/step_0000040.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fb266adc0e2cb0747f783b7b4a0b7199e45c31b732147126d3da5a88190f49b +size 384 diff --git a/margin_logs/step_0000041.npy b/margin_logs/step_0000041.npy new file mode 100644 index 0000000..2bb8b19 --- /dev/null +++ b/margin_logs/step_0000041.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e1dd6916b206be49898976fd306f390f22838faaaccc3a254f1ce509d4d1146 +size 384 diff --git a/margin_logs/step_0000042.npy b/margin_logs/step_0000042.npy new file mode 100644 index 0000000..dfa2312 --- /dev/null +++ b/margin_logs/step_0000042.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cea542339ea6d7cc19d141328ce6c02cbbc66e3ebcf79bde31028fb6bafc5a6c +size 384 diff --git a/margin_logs/step_0000043.npy b/margin_logs/step_0000043.npy new file mode 100644 index 0000000..5928105 --- /dev/null +++ b/margin_logs/step_0000043.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbec30ed90cd61166a9c758ae1fa8db7951ed61a64552217ddf2fde31e795cfc +size 384 diff --git a/margin_logs/step_0000044.npy b/margin_logs/step_0000044.npy new file mode 100644 index 0000000..8595096 --- /dev/null +++ b/margin_logs/step_0000044.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f93d872950b343a95089c7b2db5013c239f4175875f284bd33175cfadcd7b36a +size 384 diff --git a/margin_logs/step_0000045.npy b/margin_logs/step_0000045.npy new file mode 100644 index 0000000..108f0ad --- /dev/null +++ b/margin_logs/step_0000045.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1e296285868c53187edd00f4346b56466f0ea67c4899b24054e6432808064ae +size 384 diff --git a/margin_logs/step_0000046.npy b/margin_logs/step_0000046.npy new file mode 100644 index 0000000..a3694d9 --- /dev/null +++ b/margin_logs/step_0000046.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da3d876137dbe4b7de92e9e722076381d1f4ab705cd3fe5e942223013c347a52 +size 384 diff --git a/margin_logs/step_0000047.npy b/margin_logs/step_0000047.npy new file mode 100644 index 0000000..ee71d7d --- /dev/null +++ b/margin_logs/step_0000047.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b7756dfb717cb14383a38aeaa0c02d255b335af3cfcbae8ce6c695f848ca8e2 +size 384 diff --git a/margin_logs/step_0000048.npy b/margin_logs/step_0000048.npy new file mode 100644 index 0000000..a2ef572 --- /dev/null +++ b/margin_logs/step_0000048.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77446c4f843d1c98ad0b5b7260ff72affdba9ed6b556f04eaab8b64c1a4b0481 +size 384 diff --git a/margin_logs/step_0000049.npy b/margin_logs/step_0000049.npy new file mode 100644 index 0000000..eb0b55f --- /dev/null +++ b/margin_logs/step_0000049.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01cdc544a4d05a4917e1bfbd89ac96429fee3e4356fd3aca5ca027d6accf75f1 +size 384 diff --git a/margin_logs/step_0000050.npy b/margin_logs/step_0000050.npy new file mode 100644 index 0000000..f43cec2 --- /dev/null +++ b/margin_logs/step_0000050.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0673c231bd0697e89bd00d30ece1388a5d2877b2e37b3430ec5434019adb4305 +size 384 diff --git a/margin_logs/step_0000051.npy b/margin_logs/step_0000051.npy new file mode 100644 index 0000000..91d15cd --- /dev/null +++ b/margin_logs/step_0000051.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5b4abc3b2e95e2badee0abfc9ef8e546857073cee5c86829e903265c7a3414b +size 384 diff --git a/margin_logs/step_0000052.npy b/margin_logs/step_0000052.npy new file mode 100644 index 0000000..194f810 --- /dev/null +++ b/margin_logs/step_0000052.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57793dfa1190eced7e1b9edd8c14a4ef0f4ca693d759d6b7041a14861128b40d +size 384 diff --git a/margin_logs/step_0000053.npy b/margin_logs/step_0000053.npy new file mode 100644 index 0000000..ef7cb59 --- /dev/null +++ b/margin_logs/step_0000053.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17e82cec1094c824e39c8b6454df673d89266f76b96a1d048909ce84c985b801 +size 384 diff --git a/margin_logs/step_0000054.npy b/margin_logs/step_0000054.npy new file mode 100644 index 0000000..d503761 --- /dev/null +++ b/margin_logs/step_0000054.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a0eb911afaaa227d60aeccbf699c77541175eb0ab4f83148a1f7ec4c7086fb2 +size 384 diff --git a/margin_logs/step_0000055.npy b/margin_logs/step_0000055.npy new file mode 100644 index 0000000..716cb63 --- /dev/null +++ b/margin_logs/step_0000055.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d12ccbae3876496f9b9c72b8c84390d2359aa045a9c012171cfb0dfd1651ef2a +size 384 diff --git a/margin_logs/step_0000056.npy b/margin_logs/step_0000056.npy new file mode 100644 index 0000000..229ab31 --- /dev/null +++ b/margin_logs/step_0000056.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3c166de89e90697915c134af6c03cb936247195e1b968629862d6e9f23acc2a +size 384 diff --git a/margin_logs/step_0000057.npy b/margin_logs/step_0000057.npy new file mode 100644 index 0000000..d7e6146 --- /dev/null +++ b/margin_logs/step_0000057.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35b1bb8b628e83e197d86327e078e8f2a5de3da00feb915f1cf72c906e2da496 +size 384 diff --git a/margin_logs/step_0000058.npy b/margin_logs/step_0000058.npy new file mode 100644 index 0000000..88f2936 --- /dev/null +++ b/margin_logs/step_0000058.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af97fbf0191ba3a98d9f04a2605e84154a10ee8477b9fb2bf5c86f0a17962951 +size 384 diff --git a/margin_logs/step_0000059.npy b/margin_logs/step_0000059.npy new file mode 100644 index 0000000..fdd8bbf --- /dev/null +++ b/margin_logs/step_0000059.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f289c9a1a77122d9c97c6195de70d13b3148c60210aa9eef18760f7dc719a40e +size 384 diff --git a/margin_logs/step_0000060.npy b/margin_logs/step_0000060.npy new file mode 100644 index 0000000..c305d41 --- /dev/null +++ b/margin_logs/step_0000060.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d62e2638551f37913158a9325442ab9057149c1c0d1cf702d0a4c6375c16797 +size 384 diff --git a/margin_logs/step_0000061.npy b/margin_logs/step_0000061.npy new file mode 100644 index 0000000..a697d68 --- /dev/null +++ b/margin_logs/step_0000061.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc628e87de952041cdaf53b178576cca2b3f790a9b6fbd5656e9a2b7a71023c7 +size 384 diff --git a/margin_logs/step_0000062.npy b/margin_logs/step_0000062.npy new file mode 100644 index 0000000..feb7d13 --- /dev/null +++ b/margin_logs/step_0000062.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f970e64bc52a9d53adc4e3a9ed371de784d7886164a26f1a29511adf595a14db +size 384 diff --git a/margin_logs/step_0000063.npy b/margin_logs/step_0000063.npy new file mode 100644 index 0000000..606ac7d --- /dev/null +++ b/margin_logs/step_0000063.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f17dabc3257e599258bb23e56358944bb5d0c776cda1a32dce30a2bb2087a693 +size 384 diff --git a/margin_logs/step_0000064.npy b/margin_logs/step_0000064.npy new file mode 100644 index 0000000..2c34cf5 --- /dev/null +++ b/margin_logs/step_0000064.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bc22ab0db72ba523d07999698374a404261f1103f11186d9d88fcbee75d2a7 +size 384 diff --git a/margin_logs/step_0000065.npy b/margin_logs/step_0000065.npy new file mode 100644 index 0000000..376dcff --- /dev/null +++ b/margin_logs/step_0000065.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4adbf9bdc06fda74a4729ce2ab614e938034065c21a9db93ed20b1affa6eabce +size 384 diff --git a/margin_logs/step_0000066.npy b/margin_logs/step_0000066.npy new file mode 100644 index 0000000..b3e25b2 --- /dev/null +++ b/margin_logs/step_0000066.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4af85e089c959002ac573acbdc9f8ccf5c09d8a7558064060c3be8cc0786859f +size 384 diff --git a/margin_logs/step_0000067.npy b/margin_logs/step_0000067.npy new file mode 100644 index 0000000..f0348b4 --- /dev/null +++ b/margin_logs/step_0000067.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ca735c6e6a798fbaf900d8a717626373be07295d1a3a0c76bf1c1db9e20fb18 +size 384 diff --git a/margin_logs/step_0000068.npy b/margin_logs/step_0000068.npy new file mode 100644 index 0000000..2c08f7e --- /dev/null +++ b/margin_logs/step_0000068.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80af0b2437af46601c6a659a8b4d134887cad158fcd98b106f108e0882a421ac +size 384 diff --git a/margin_logs/step_0000069.npy b/margin_logs/step_0000069.npy new file mode 100644 index 0000000..4906c6f --- /dev/null +++ b/margin_logs/step_0000069.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56f788d63929e702385e7fe5855c3a4b28cf295baaad886444c6aa49adf2052f +size 384 diff --git a/margin_logs/step_0000070.npy b/margin_logs/step_0000070.npy new file mode 100644 index 0000000..b1554b4 --- /dev/null +++ b/margin_logs/step_0000070.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c880f8fc844fbff7399da9547cd60c9be0332ee8ea6129f0118cdffd76b954d6 +size 384 diff --git a/margin_logs/step_0000071.npy b/margin_logs/step_0000071.npy new file mode 100644 index 0000000..e9da1ed --- /dev/null +++ b/margin_logs/step_0000071.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:222557333f04cb25aac185c6438650a74b60b6708d3affe2a9cc86136c6e34d4 +size 384 diff --git a/margin_logs/step_0000072.npy b/margin_logs/step_0000072.npy new file mode 100644 index 0000000..6273480 --- /dev/null +++ b/margin_logs/step_0000072.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26b5c71c431ea1b5c00bcdca9db6403350abafddf32b875a1a2fb415250962fa +size 384 diff --git a/margin_logs/step_0000073.npy b/margin_logs/step_0000073.npy new file mode 100644 index 0000000..a989d83 --- /dev/null +++ b/margin_logs/step_0000073.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:356994fdaea28a87e5f231a90bd097532b033ecc2796eda0c07ad00b1a330efc +size 384 diff --git a/margin_logs/step_0000074.npy b/margin_logs/step_0000074.npy new file mode 100644 index 0000000..5862b8d --- /dev/null +++ b/margin_logs/step_0000074.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce10f5ff56532586a5496d7419c3df12ff5c595d01aca8117b9bb8064a76588a +size 384 diff --git a/margin_logs/step_0000075.npy b/margin_logs/step_0000075.npy new file mode 100644 index 0000000..885c77f --- /dev/null +++ b/margin_logs/step_0000075.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75a38d1e57112ac6d4617aeec868a05f6eaca805afb4d3d9638c5dfe8b8681c1 +size 384 diff --git a/margin_logs/step_0000076.npy b/margin_logs/step_0000076.npy new file mode 100644 index 0000000..adc296c --- /dev/null +++ b/margin_logs/step_0000076.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70c4ae400519d32b18f5dfa795d9b0b38accb8f538827cfe2a20d6ac75a0fdcb +size 384 diff --git a/margin_logs/step_0000077.npy b/margin_logs/step_0000077.npy new file mode 100644 index 0000000..5506c1b --- /dev/null +++ b/margin_logs/step_0000077.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3987cae2f3f039f776ed261f0d8f3a3082ac6e56f726af6b0870a47b050ec5d +size 384 diff --git a/margin_logs/step_0000078.npy b/margin_logs/step_0000078.npy new file mode 100644 index 0000000..76e2d7e --- /dev/null +++ b/margin_logs/step_0000078.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da4eec701f1ef9a9e66f54418d1ae40f1c0d719c457c919a5d2e033ede6aa86 +size 384 diff --git a/margin_logs/step_0000079.npy b/margin_logs/step_0000079.npy new file mode 100644 index 0000000..f3a0e37 --- /dev/null +++ b/margin_logs/step_0000079.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83b3067e260e30f5e120ad42a9feb6b19fde9c5973ac1411ab4cec459d4781ec +size 384 diff --git a/margin_logs/step_0000080.npy b/margin_logs/step_0000080.npy new file mode 100644 index 0000000..a766f80 --- /dev/null +++ b/margin_logs/step_0000080.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a657d77c323b33cad93f244a5de962fb7f6ac4160dd9a65c8b19a979fea51808 +size 384 diff --git a/margin_logs/step_0000081.npy b/margin_logs/step_0000081.npy new file mode 100644 index 0000000..d00bdd1 --- /dev/null +++ b/margin_logs/step_0000081.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef5ea462286c9f7e515898142f5a460baf82395ecc3555c4a5444faddfe39bcf +size 384 diff --git a/margin_logs/step_0000082.npy b/margin_logs/step_0000082.npy new file mode 100644 index 0000000..d4e4f3e --- /dev/null +++ b/margin_logs/step_0000082.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9e8b196e4d7a4b124295cf138d586caa1b3013552a0305ddbad3fa1e8222622 +size 384 diff --git a/margin_logs/step_0000083.npy b/margin_logs/step_0000083.npy new file mode 100644 index 0000000..a403814 --- /dev/null +++ b/margin_logs/step_0000083.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a17b91ce4cacafc4b80d6141c028752bdc964e86beabbe2b69029c87d4deb59 +size 384 diff --git a/margin_logs/step_0000084.npy b/margin_logs/step_0000084.npy new file mode 100644 index 0000000..ae3986d --- /dev/null +++ b/margin_logs/step_0000084.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fe698e7c95176a8ae7574d50c4f86319ea70d9d9c181c2a666a3711c2867abe +size 384 diff --git a/margin_logs/step_0000085.npy b/margin_logs/step_0000085.npy new file mode 100644 index 0000000..42f5b02 --- /dev/null +++ b/margin_logs/step_0000085.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c998f0336e1ef59dea6a2834b49aa656f5e59c8d7e78c5c6c22eac0f5a36ac1 +size 384 diff --git a/margin_logs/step_0000086.npy b/margin_logs/step_0000086.npy new file mode 100644 index 0000000..d178562 --- /dev/null +++ b/margin_logs/step_0000086.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0c2ff15429668b005a85f1549e9e99bea904e0e21a89d2593d4f512f86052af +size 384 diff --git a/margin_logs/step_0000087.npy b/margin_logs/step_0000087.npy new file mode 100644 index 0000000..705f4ca --- /dev/null +++ b/margin_logs/step_0000087.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3a55d88805f9a81e5806cc8408464785caac35b3c868ad54bced56eda4316f6 +size 384 diff --git a/margin_logs/step_0000088.npy b/margin_logs/step_0000088.npy new file mode 100644 index 0000000..4593d6e --- /dev/null +++ b/margin_logs/step_0000088.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d129f4266b67e01a1ff00de01b614903d00113fd8d403a097e53b5ea6a64e6a9 +size 384 diff --git a/margin_logs/step_0000089.npy b/margin_logs/step_0000089.npy new file mode 100644 index 0000000..9cc7b6b --- /dev/null +++ b/margin_logs/step_0000089.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d76f3032491a95910a9acbfec4a3dfa87265f0129680cd09031d6e27b74e673b +size 384 diff --git a/margin_logs/step_0000090.npy b/margin_logs/step_0000090.npy new file mode 100644 index 0000000..6285393 --- /dev/null +++ b/margin_logs/step_0000090.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd1dc15b90fc4fbf38707832f8f1cbd6230b1e8efa665e5c547ecc113221a6b5 +size 384 diff --git a/margin_logs/step_0000091.npy b/margin_logs/step_0000091.npy new file mode 100644 index 0000000..6663f1f --- /dev/null +++ b/margin_logs/step_0000091.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e5fca03fc113d7e1ac2921fab78db283ba932dddb69a23344cc8ba9d39941bd +size 384 diff --git a/margin_logs/step_0000092.npy b/margin_logs/step_0000092.npy new file mode 100644 index 0000000..fff7dec --- /dev/null +++ b/margin_logs/step_0000092.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51e42653992cb6f62a726f0aef434ea57751c4a2d72c078f4adebf9087b5c3b2 +size 384 diff --git a/margin_logs/step_0000093.npy b/margin_logs/step_0000093.npy new file mode 100644 index 0000000..068365e --- /dev/null +++ b/margin_logs/step_0000093.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f940ec11dc69bb40425de59b81d0253aa84ed123dbcbcae83d48b743c4bbd1a7 +size 384 diff --git a/margin_logs/step_0000094.npy b/margin_logs/step_0000094.npy new file mode 100644 index 0000000..a9264b4 --- /dev/null +++ b/margin_logs/step_0000094.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40adcb0d8f7e860fd6ebc527f4186f70576e47531e323f0c7b54365710228ff3 +size 384 diff --git a/margin_logs/step_0000095.npy b/margin_logs/step_0000095.npy new file mode 100644 index 0000000..e5dc9f4 --- /dev/null +++ b/margin_logs/step_0000095.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56efc6cc79b0a4dc7cb3c49f8da56ac77226d6c5d9315acbf2cf1f31a8ed06f3 +size 384 diff --git a/margin_logs/step_0000096.npy b/margin_logs/step_0000096.npy new file mode 100644 index 0000000..ce78cbf --- /dev/null +++ b/margin_logs/step_0000096.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7780a33a49d374434b9971bd45684fba83ed88ad5155ab60648fb0310849e98c +size 384 diff --git a/margin_logs/step_0000097.npy b/margin_logs/step_0000097.npy new file mode 100644 index 0000000..83c74fb --- /dev/null +++ b/margin_logs/step_0000097.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0077056a5477c7d9e102ec95e851153ac34fd7f2a2b0aa08465f81a045f54ff +size 384 diff --git a/margin_logs/step_0000098.npy b/margin_logs/step_0000098.npy new file mode 100644 index 0000000..4e27154 --- /dev/null +++ b/margin_logs/step_0000098.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1af3a8f02d5b8cc1f610fa0e8aab9be64b0e9c3d52f9d70a9dd06931b6c9110 +size 384 diff --git a/margin_logs/step_0000099.npy b/margin_logs/step_0000099.npy new file mode 100644 index 0000000..3b0f197 --- /dev/null +++ b/margin_logs/step_0000099.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:464056991b777459d8f0f068db7c27001681efc838a684d094020099bcd92016 +size 384 diff --git a/margin_logs/step_0000100.npy b/margin_logs/step_0000100.npy new file mode 100644 index 0000000..2773891 --- /dev/null +++ b/margin_logs/step_0000100.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a62306fd91fc4f2134f021958ca35a2782e91677490696d9db48b7f461ef951 +size 384 diff --git a/margin_logs/step_0000101.npy b/margin_logs/step_0000101.npy new file mode 100644 index 0000000..ec2dd34 --- /dev/null +++ b/margin_logs/step_0000101.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:168d2e03054364606ac54c42ef5bbebe9609064049fb37ff98f0f22200e76ed3 +size 384 diff --git a/margin_logs/step_0000102.npy b/margin_logs/step_0000102.npy new file mode 100644 index 0000000..ee7530b --- /dev/null +++ b/margin_logs/step_0000102.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b52cd220c72d3a8995fd8acb4dfd2aadca008f566a7970df98363c20c5f5b3a3 +size 384 diff --git a/margin_logs/step_0000103.npy b/margin_logs/step_0000103.npy new file mode 100644 index 0000000..66bfb2f --- /dev/null +++ b/margin_logs/step_0000103.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee4fe6567bf5c2cfb1229ad3f4b1c248f47804b00e4499d647baa9b8afcec449 +size 384 diff --git a/margin_logs/step_0000104.npy b/margin_logs/step_0000104.npy new file mode 100644 index 0000000..1591f13 --- /dev/null +++ b/margin_logs/step_0000104.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5f6e9c50fd21dceef9bf282c59d5d61f2f38618178a71101d4784dfd42b2f14 +size 384 diff --git a/margin_logs/step_0000105.npy b/margin_logs/step_0000105.npy new file mode 100644 index 0000000..e4c489e --- /dev/null +++ b/margin_logs/step_0000105.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ab3e9f6a97e01f2088bdf2b12215228545488540df71e04f8ed6720627e2442 +size 384 diff --git a/margin_logs/step_0000106.npy b/margin_logs/step_0000106.npy new file mode 100644 index 0000000..0392311 --- /dev/null +++ b/margin_logs/step_0000106.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:639ad639eb80808de8178a3dd74761b593d33f5872585875d34b5266abfb872e +size 384 diff --git a/margin_logs/step_0000107.npy b/margin_logs/step_0000107.npy new file mode 100644 index 0000000..ffa8883 --- /dev/null +++ b/margin_logs/step_0000107.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08c67082d1f4303afea005686f43fb99a4bcae47e12d3360077af1fbb0ac8113 +size 384 diff --git a/margin_logs/step_0000108.npy b/margin_logs/step_0000108.npy new file mode 100644 index 0000000..87b9928 --- /dev/null +++ b/margin_logs/step_0000108.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff8447cb28bf54863ce7ce64cac98933ee53f2025072677101e0df3769e22fe7 +size 384 diff --git a/margin_logs/step_0000109.npy b/margin_logs/step_0000109.npy new file mode 100644 index 0000000..9829c4b --- /dev/null +++ b/margin_logs/step_0000109.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f06c7d46f6379cd6a7027db1fe9817cb0808485756cca7940fb4ddf93584cb4b +size 384 diff --git a/margin_logs/step_0000110.npy b/margin_logs/step_0000110.npy new file mode 100644 index 0000000..a3d50b9 --- /dev/null +++ b/margin_logs/step_0000110.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fa3a4da23bd1e274be83a69351b38a04f0334a5438e9f6d7b05413f5998c044 +size 384 diff --git a/margin_logs/step_0000111.npy b/margin_logs/step_0000111.npy new file mode 100644 index 0000000..dd76137 --- /dev/null +++ b/margin_logs/step_0000111.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5824192ee7c0fbd58acce75dfb1367f17eb9ed1e917c98ad5506a0f449e7171c +size 384 diff --git a/margin_logs/step_0000112.npy b/margin_logs/step_0000112.npy new file mode 100644 index 0000000..a7f0d24 --- /dev/null +++ b/margin_logs/step_0000112.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbf1968bb8ac9baa221a8a84eb7b44c05022b89422447399a288a5706c5cf56d +size 384 diff --git a/margin_logs/step_0000113.npy b/margin_logs/step_0000113.npy new file mode 100644 index 0000000..31e3a2b --- /dev/null +++ b/margin_logs/step_0000113.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e678f1caa5f0ffe1f3864a6ada8f96e3ce8ed79395494d3968af886a88d10709 +size 384 diff --git a/margin_logs/step_0000114.npy b/margin_logs/step_0000114.npy new file mode 100644 index 0000000..429ba64 --- /dev/null +++ b/margin_logs/step_0000114.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c452893ff38d20439a787af0649c5e73d9f57fe49114290d518139cffbb2f0b +size 384 diff --git a/margin_logs/step_0000115.npy b/margin_logs/step_0000115.npy new file mode 100644 index 0000000..73f3458 --- /dev/null +++ b/margin_logs/step_0000115.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6324023f78e550c592c7308fdadc8d6031df7b7e6e1033c2be38d312de45e55c +size 384 diff --git a/margin_logs/step_0000116.npy b/margin_logs/step_0000116.npy new file mode 100644 index 0000000..ab9ebac --- /dev/null +++ b/margin_logs/step_0000116.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bc183b2109c51de6ba902a3926ca97ac4dcd414622a97e95fc4df8cae0272e4 +size 384 diff --git a/margin_logs/step_0000117.npy b/margin_logs/step_0000117.npy new file mode 100644 index 0000000..2eb261d --- /dev/null +++ b/margin_logs/step_0000117.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5dece5d036a6dbdc98a4d2ac8377a2838bec525843831513c8d315774d95b35 +size 384 diff --git a/margin_logs/step_0000118.npy b/margin_logs/step_0000118.npy new file mode 100644 index 0000000..07c2ce6 --- /dev/null +++ b/margin_logs/step_0000118.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05034a33a66ff8ed763972996b333349dd9564feddfe8a5e16dcf308a454af28 +size 384 diff --git a/margin_logs/step_0000119.npy b/margin_logs/step_0000119.npy new file mode 100644 index 0000000..8768cca --- /dev/null +++ b/margin_logs/step_0000119.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a8101f13cc6e0ffe6b04e5439337d29be3b90ec1b2ceb05e646ece5f9cff58d +size 384 diff --git a/margin_logs/step_0000120.npy b/margin_logs/step_0000120.npy new file mode 100644 index 0000000..faaf42c --- /dev/null +++ b/margin_logs/step_0000120.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09e82beec9eff3190f7175c7dd28a4fc337dd77377448d7bc713f7c7d59bc308 +size 384 diff --git a/margin_logs/step_0000121.npy b/margin_logs/step_0000121.npy new file mode 100644 index 0000000..ca0f06e --- /dev/null +++ b/margin_logs/step_0000121.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17734f75a7aa323d0a339ac96f5dab533ecf3dbf9a24bb27d895d84085834eb3 +size 384 diff --git a/margin_logs/step_0000122.npy b/margin_logs/step_0000122.npy new file mode 100644 index 0000000..1d9cef0 --- /dev/null +++ b/margin_logs/step_0000122.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7627b5f4138dab3619b09bc8ed4b981c1abff6712e10cc417dcb4e4ea7c7196 +size 384 diff --git a/margin_logs/step_0000123.npy b/margin_logs/step_0000123.npy new file mode 100644 index 0000000..57abea2 --- /dev/null +++ b/margin_logs/step_0000123.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40f9034a6a6309e64f0e11644407d9dc5a159c5f1f87d40b5de610297e9b2894 +size 384 diff --git a/margin_logs/step_0000124.npy b/margin_logs/step_0000124.npy new file mode 100644 index 0000000..215d9c9 --- /dev/null +++ b/margin_logs/step_0000124.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9202933d0f1174d9910124651d8bfb3e1e565b3a772ab86d6c5d37931b29941 +size 384 diff --git a/margin_logs/step_0000125.npy b/margin_logs/step_0000125.npy new file mode 100644 index 0000000..a9a06df --- /dev/null +++ b/margin_logs/step_0000125.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a323d7a101bfcc3f8ca146e637144e7e4302e975caa2f0add87114aaa07969e +size 384 diff --git a/margin_logs/step_0000126.npy b/margin_logs/step_0000126.npy new file mode 100644 index 0000000..84f0ba7 --- /dev/null +++ b/margin_logs/step_0000126.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:274ac64792f5fb4af04bd5b7a4415aca26a2a4b19805fb0b71e02888c836d99b +size 384 diff --git a/margin_logs/step_0000127.npy b/margin_logs/step_0000127.npy new file mode 100644 index 0000000..dd18e1e --- /dev/null +++ b/margin_logs/step_0000127.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e682a373da54adb4655be6c946e4e2f879a8664d529a0258fcab06a37f8686f3 +size 384 diff --git a/margin_logs/step_0000128.npy b/margin_logs/step_0000128.npy new file mode 100644 index 0000000..3f22c49 --- /dev/null +++ b/margin_logs/step_0000128.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:389e7f05edacf56bfb5c97e26b2eed696847c8c24c17e8797b45cd28c7e38777 +size 384 diff --git a/margin_logs/step_0000129.npy b/margin_logs/step_0000129.npy new file mode 100644 index 0000000..c2f1362 --- /dev/null +++ b/margin_logs/step_0000129.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:519c69ac0acfbfcfaffdfc8d82d1789f78c43db91d540fddda2d1517dd35439a +size 384 diff --git a/margin_logs/step_0000130.npy b/margin_logs/step_0000130.npy new file mode 100644 index 0000000..bf812ff --- /dev/null +++ b/margin_logs/step_0000130.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd35fe344c36614d3bc70424b0aaa966dc85205f978a91f58b7f105a3b6fa4fc +size 384 diff --git a/margin_logs/step_0000131.npy b/margin_logs/step_0000131.npy new file mode 100644 index 0000000..8d3ad0e --- /dev/null +++ b/margin_logs/step_0000131.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38a7f53cad735cc82cd95621f1eea92b0d5660918542918c9ff73fbaaa8318f4 +size 384 diff --git a/margin_logs/step_0000132.npy b/margin_logs/step_0000132.npy new file mode 100644 index 0000000..f35b0c1 --- /dev/null +++ b/margin_logs/step_0000132.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8b69195a0217bb96507b8e91a7fd2acf0b710800c0143909e992c452683a27a +size 384 diff --git a/margin_logs/step_0000133.npy b/margin_logs/step_0000133.npy new file mode 100644 index 0000000..13f3219 --- /dev/null +++ b/margin_logs/step_0000133.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3df4bc40be68eb8716228b08da698bbf64a08c60ef2d30f35f1b094f0b15636 +size 384 diff --git a/margin_logs/step_0000134.npy b/margin_logs/step_0000134.npy new file mode 100644 index 0000000..982a699 --- /dev/null +++ b/margin_logs/step_0000134.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:091a5a5e834866c0a13f1bbd7092939d8e367449dc5a19d7b65ee4ee7a61edd3 +size 384 diff --git a/margin_logs/step_0000135.npy b/margin_logs/step_0000135.npy new file mode 100644 index 0000000..ef59d83 --- /dev/null +++ b/margin_logs/step_0000135.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2195ef434da345abf1869bbf397a1f321a1f41ee16df8eb43e655d05c9f9f941 +size 384 diff --git a/margin_logs/step_0000136.npy b/margin_logs/step_0000136.npy new file mode 100644 index 0000000..5b74ded --- /dev/null +++ b/margin_logs/step_0000136.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8790d3a93aeb229de7ba5d96297bf1a609d6cfc86787c36939a7191b405e4cc8 +size 384 diff --git a/margin_logs/step_0000137.npy b/margin_logs/step_0000137.npy new file mode 100644 index 0000000..5b40fed --- /dev/null +++ b/margin_logs/step_0000137.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb229145b3c19dccea8c3b5cbd86aa4e045e5723558332de61c37b8d7ec5ac91 +size 384 diff --git a/margin_logs/step_0000138.npy b/margin_logs/step_0000138.npy new file mode 100644 index 0000000..dc9b9cc --- /dev/null +++ b/margin_logs/step_0000138.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b141544d1cc35f86708f0d62b2e3085f6f5396174988ea1e3e2b886814c7952 +size 384 diff --git a/margin_logs/step_0000139.npy b/margin_logs/step_0000139.npy new file mode 100644 index 0000000..add4a35 --- /dev/null +++ b/margin_logs/step_0000139.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f54f6b3e175d736882e0a5cb1a933a39e1782cb220f54af81ef7726f1796d637 +size 384 diff --git a/margin_logs/step_0000140.npy b/margin_logs/step_0000140.npy new file mode 100644 index 0000000..386726c --- /dev/null +++ b/margin_logs/step_0000140.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93201bd077ebbcfd0090ec819b5e88a68857b990365aa6156b2cb0a501fb9655 +size 384 diff --git a/margin_logs/step_0000141.npy b/margin_logs/step_0000141.npy new file mode 100644 index 0000000..b7a1418 --- /dev/null +++ b/margin_logs/step_0000141.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6e43c7aa5b7f0c51c69df27843cc423fe67e7e95c142c4a4c22c7ed94fd8efd +size 384 diff --git a/margin_logs/step_0000142.npy b/margin_logs/step_0000142.npy new file mode 100644 index 0000000..05c2b79 --- /dev/null +++ b/margin_logs/step_0000142.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:861cf5368ffc0ff4d6a861194e3c57429ea0a46fc84901188ec7fa0cca9cf252 +size 384 diff --git a/margin_logs/step_0000143.npy b/margin_logs/step_0000143.npy new file mode 100644 index 0000000..4fd9dc5 --- /dev/null +++ b/margin_logs/step_0000143.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:778622f7dbf5ee2c7cf9b734c374a747bfc6fbf69e553a3187d7eaffee26e70e +size 384 diff --git a/margin_logs/step_0000144.npy b/margin_logs/step_0000144.npy new file mode 100644 index 0000000..8b71a7a --- /dev/null +++ b/margin_logs/step_0000144.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e97b9afe5fe0c2971da4e5637e28f9c7863f183f46582ec2e6f77218f9451da +size 384 diff --git a/margin_logs/step_0000145.npy b/margin_logs/step_0000145.npy new file mode 100644 index 0000000..e140c88 --- /dev/null +++ b/margin_logs/step_0000145.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23f3a230b841a1a419f6f763981db6023ce91937aa96df9076ea5578e1a877b8 +size 384 diff --git a/margin_logs/step_0000146.npy b/margin_logs/step_0000146.npy new file mode 100644 index 0000000..4405637 --- /dev/null +++ b/margin_logs/step_0000146.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b35be1b3f86b42f33074b2592473838bb891e5df03fff50a71b70924390441fb +size 384 diff --git a/margin_logs/step_0000147.npy b/margin_logs/step_0000147.npy new file mode 100644 index 0000000..8d0b651 --- /dev/null +++ b/margin_logs/step_0000147.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e39ae8df07f8c5d374d2582e5e77c12c6bfd7e6a93968bf6dcc184acf690f6b +size 384 diff --git a/margin_logs/step_0000148.npy b/margin_logs/step_0000148.npy new file mode 100644 index 0000000..ef25bb7 --- /dev/null +++ b/margin_logs/step_0000148.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70abcd27b2f77dba999f5d908eaab1bec86eac6fff45861d3d789f97857437bf +size 384 diff --git a/margin_logs/step_0000149.npy b/margin_logs/step_0000149.npy new file mode 100644 index 0000000..a1624ee --- /dev/null +++ b/margin_logs/step_0000149.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7ffe5b4ace49b6add2ec8fa606d763b4ef8dabbaec0393c06ac3a7e31b40483 +size 384 diff --git a/margin_logs/step_0000150.npy b/margin_logs/step_0000150.npy new file mode 100644 index 0000000..e50bbc2 --- /dev/null +++ b/margin_logs/step_0000150.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baa6f6c8db21331bab21f865fb26d0d4d021162de8039e16feda15e0949112cf +size 384 diff --git a/margin_logs/step_0000151.npy b/margin_logs/step_0000151.npy new file mode 100644 index 0000000..99d70c9 --- /dev/null +++ b/margin_logs/step_0000151.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a448fdf78067b19f9dd05649e8eace119eeefaad5d5ebe80acd937aebaefbbe7 +size 384 diff --git a/margin_logs/step_0000152.npy b/margin_logs/step_0000152.npy new file mode 100644 index 0000000..b0323a6 --- /dev/null +++ b/margin_logs/step_0000152.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43ae3943f3ce1c522e657652073394c5699a31f0da76a3c8dd68e110c782e99a +size 384 diff --git a/margin_logs/step_0000153.npy b/margin_logs/step_0000153.npy new file mode 100644 index 0000000..81990e6 --- /dev/null +++ b/margin_logs/step_0000153.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe7245444378bbc7c6cb1160df514cdd56c26904ed1ded22b1f4fa63231e7708 +size 384 diff --git a/margin_logs/step_0000154.npy b/margin_logs/step_0000154.npy new file mode 100644 index 0000000..ca40342 --- /dev/null +++ b/margin_logs/step_0000154.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f325cf470d547219ff92ae5624c52e2e2fd540548b2a8f63c5d6e044dd98215 +size 384 diff --git a/margin_logs/step_0000155.npy b/margin_logs/step_0000155.npy new file mode 100644 index 0000000..bb1f758 --- /dev/null +++ b/margin_logs/step_0000155.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1bdd4129f8c1f7dad5594b3259c571c6989c495841cbc0854c98111e607e48a +size 384 diff --git a/margin_logs/step_0000156.npy b/margin_logs/step_0000156.npy new file mode 100644 index 0000000..070bb25 --- /dev/null +++ b/margin_logs/step_0000156.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a3615bcfe77e6e185d74f6040aa796e036482aaf19f956bef792efe4c4cb4ae +size 384 diff --git a/margin_logs/step_0000157.npy b/margin_logs/step_0000157.npy new file mode 100644 index 0000000..2231a0b --- /dev/null +++ b/margin_logs/step_0000157.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1b35648d8aa602ca02f8a939692afe631b1d515715091a9e3c839ada414efdf +size 384 diff --git a/margin_logs/step_0000158.npy b/margin_logs/step_0000158.npy new file mode 100644 index 0000000..3f8769d --- /dev/null +++ b/margin_logs/step_0000158.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c44010389fedf8ee659337caba47f4d270cae06c7dfaa2729317be99332a0b66 +size 384 diff --git a/margin_logs/step_0000159.npy b/margin_logs/step_0000159.npy new file mode 100644 index 0000000..24cc16e --- /dev/null +++ b/margin_logs/step_0000159.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e10043f632a08582106ebf3118d0b2d3ef19465e2d1dac08c32f2e658e795ae +size 384 diff --git a/margin_logs/step_0000160.npy b/margin_logs/step_0000160.npy new file mode 100644 index 0000000..a0599a6 --- /dev/null +++ b/margin_logs/step_0000160.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47be60d510692504ec481250b0faccb0a243a467dfb8012b523f1f9beb7b1a5b +size 384 diff --git a/margin_logs/step_0000161.npy b/margin_logs/step_0000161.npy new file mode 100644 index 0000000..0496038 --- /dev/null +++ b/margin_logs/step_0000161.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24022e59036d47ae275fa36f6ba957177144277ff24a53693ebb8c4b6c0a09e3 +size 384 diff --git a/margin_logs/step_0000162.npy b/margin_logs/step_0000162.npy new file mode 100644 index 0000000..d18a3fe --- /dev/null +++ b/margin_logs/step_0000162.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5accb3b707a8f07496ed8cbd8277d212490422230588b13692cb6e1d73955fe6 +size 384 diff --git a/margin_logs/step_0000163.npy b/margin_logs/step_0000163.npy new file mode 100644 index 0000000..0592105 --- /dev/null +++ b/margin_logs/step_0000163.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8856bf99079b83ecde22025d324a6f428650aedc67b7830dd8ec7d209e1e9cc +size 384 diff --git a/margin_logs/step_0000164.npy b/margin_logs/step_0000164.npy new file mode 100644 index 0000000..a6f9e72 --- /dev/null +++ b/margin_logs/step_0000164.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c10e3a611dc3e5dedbdc74f75adba2106b2cdab6da07bfc078cada22e2233700 +size 384 diff --git a/margin_logs/step_0000165.npy b/margin_logs/step_0000165.npy new file mode 100644 index 0000000..fd21a27 --- /dev/null +++ b/margin_logs/step_0000165.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a36cfd4b10e218c85a930daad78599aaaa43032971c23115a4846029843e323 +size 384 diff --git a/margin_logs/step_0000166.npy b/margin_logs/step_0000166.npy new file mode 100644 index 0000000..4b74318 --- /dev/null +++ b/margin_logs/step_0000166.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35c53f33d0fafc1d21aadab75ff0cd497f3ead3ebca666256f96bce0a94cdf9f +size 384 diff --git a/margin_logs/step_0000167.npy b/margin_logs/step_0000167.npy new file mode 100644 index 0000000..4716309 --- /dev/null +++ b/margin_logs/step_0000167.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0da2e41caac0603cfcb5bab3d7f19a2a644fc8fec7c1afcfbdc15cea907aea61 +size 384 diff --git a/margin_logs/step_0000168.npy b/margin_logs/step_0000168.npy new file mode 100644 index 0000000..40a49d6 --- /dev/null +++ b/margin_logs/step_0000168.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:102a8378db9847083f513fcfad585b808e3fa92e313942a368cdfd290c532ade +size 384 diff --git a/margin_logs/step_0000169.npy b/margin_logs/step_0000169.npy new file mode 100644 index 0000000..18aac1d --- /dev/null +++ b/margin_logs/step_0000169.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12fb3c91c293e4229a13ef2d63c1a0ba2f017fcff21d8b29ccf8ca56b7fb3b96 +size 384 diff --git a/margin_logs/step_0000170.npy b/margin_logs/step_0000170.npy new file mode 100644 index 0000000..f41ec5d --- /dev/null +++ b/margin_logs/step_0000170.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3467e2a1837ee18906e025d9fb72c2f1c03388ff12fdb35acc2b2bfc0d0d5170 +size 384 diff --git a/margin_logs/step_0000171.npy b/margin_logs/step_0000171.npy new file mode 100644 index 0000000..01d4159 --- /dev/null +++ b/margin_logs/step_0000171.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee70225bbded1fd48a6df77042a2b33e25db1d035b5485a026fe700a111c05ed +size 384 diff --git a/margin_logs/step_0000172.npy b/margin_logs/step_0000172.npy new file mode 100644 index 0000000..9f728ee --- /dev/null +++ b/margin_logs/step_0000172.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae014e6a71d56e8de39203f7609b8e867a31f640492bba93a151e3029fa81896 +size 384 diff --git a/margin_logs/step_0000173.npy b/margin_logs/step_0000173.npy new file mode 100644 index 0000000..7beaed7 --- /dev/null +++ b/margin_logs/step_0000173.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17182ea3fbbabab3a48f502dd1e2995ba757549b5470d9f1f366e80aeff7e13e +size 384 diff --git a/margin_logs/step_0000174.npy b/margin_logs/step_0000174.npy new file mode 100644 index 0000000..ad37f09 --- /dev/null +++ b/margin_logs/step_0000174.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:297f403d6fc295e354ff7c7dbb07cebaf40ac028af3bd47cea1d4bc5d87e41c2 +size 384 diff --git a/margin_logs/step_0000175.npy b/margin_logs/step_0000175.npy new file mode 100644 index 0000000..7cd5f9d --- /dev/null +++ b/margin_logs/step_0000175.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:377f631908da920c0cc8fa201e8397968e114f101acb0781bd6456bbdf7c3bbb +size 384 diff --git a/margin_logs/step_0000176.npy b/margin_logs/step_0000176.npy new file mode 100644 index 0000000..095a037 --- /dev/null +++ b/margin_logs/step_0000176.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94a39b5f635eccc878037e38693c3154808a9e76a2c25965f7de89f4a83cbb3d +size 384 diff --git a/margin_logs/step_0000177.npy b/margin_logs/step_0000177.npy new file mode 100644 index 0000000..09b87ef --- /dev/null +++ b/margin_logs/step_0000177.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:080b40a88d33fcf6bd56c47a5e47f0ff78d422bc2d1ec3616a91306f68753d35 +size 384 diff --git a/margin_logs/step_0000178.npy b/margin_logs/step_0000178.npy new file mode 100644 index 0000000..e3470c1 --- /dev/null +++ b/margin_logs/step_0000178.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8672e179955f712fd0c98ab5e12bd95a5a8aae74f41958867d9d380fc26829e8 +size 384 diff --git a/margin_logs/step_0000179.npy b/margin_logs/step_0000179.npy new file mode 100644 index 0000000..48d1087 --- /dev/null +++ b/margin_logs/step_0000179.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:488c8d4d40c171d8251d7ccb3a94e514b7a3ba0d919ae7062b1ad44c05a0a86d +size 384 diff --git a/margin_logs/step_0000180.npy b/margin_logs/step_0000180.npy new file mode 100644 index 0000000..86a4378 --- /dev/null +++ b/margin_logs/step_0000180.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba71c0973b1a372616ba677dd5718cbe6c11f803b23b1bb133856993534485b2 +size 384 diff --git a/margin_logs/step_0000181.npy b/margin_logs/step_0000181.npy new file mode 100644 index 0000000..a95a9a1 --- /dev/null +++ b/margin_logs/step_0000181.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7aa6cbf89c4f431bdc97d1625ee3383b1afab23e43f489b462976f06980abb3 +size 384 diff --git a/margin_logs/step_0000182.npy b/margin_logs/step_0000182.npy new file mode 100644 index 0000000..ff9d90e --- /dev/null +++ b/margin_logs/step_0000182.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d36ed45743d4314ef9c91a2c534376196ff4ce00f674258c485c651c0b5b03c +size 384 diff --git a/margin_logs/step_0000183.npy b/margin_logs/step_0000183.npy new file mode 100644 index 0000000..9c52fc3 --- /dev/null +++ b/margin_logs/step_0000183.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13fad3f6e0c921e3b0e21f6bd42a06bb0de8f933039258e5f24d812e7119d480 +size 384 diff --git a/margin_logs/step_0000184.npy b/margin_logs/step_0000184.npy new file mode 100644 index 0000000..357dfeb --- /dev/null +++ b/margin_logs/step_0000184.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b78c206d1d7b70bd967ed0451ffd6a411c6244224b6e5de3c1736715289de3be +size 384 diff --git a/margin_logs/step_0000185.npy b/margin_logs/step_0000185.npy new file mode 100644 index 0000000..733f2df --- /dev/null +++ b/margin_logs/step_0000185.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:389f34deb98da3f6877342e3f98c8ea3a784238a991e2f5be9114b938ae91f9d +size 384 diff --git a/margin_logs/step_0000186.npy b/margin_logs/step_0000186.npy new file mode 100644 index 0000000..4dc522f --- /dev/null +++ b/margin_logs/step_0000186.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87e08bb3afa909ea51d7aba7556dab6f5d8da53615055a6f6e8faaa8c42e3b74 +size 384 diff --git a/margin_logs/step_0000187.npy b/margin_logs/step_0000187.npy new file mode 100644 index 0000000..6f914ce --- /dev/null +++ b/margin_logs/step_0000187.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:942ade6655b69a10708a2a196c5b11394033d8c398325442b53b6d27e016a98f +size 384 diff --git a/margin_logs/step_0000188.npy b/margin_logs/step_0000188.npy new file mode 100644 index 0000000..5b973d5 --- /dev/null +++ b/margin_logs/step_0000188.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77f57502f6e784805ce981643289d500ff364b508d934d9400bfeee93b698018 +size 384 diff --git a/margin_logs/step_0000189.npy b/margin_logs/step_0000189.npy new file mode 100644 index 0000000..23ab60c --- /dev/null +++ b/margin_logs/step_0000189.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa5cf0d860692a92c3729ebe0a5f3e6e56433e231bf6bee95ae961c1b0d4ecc5 +size 384 diff --git a/margin_logs/step_0000190.npy b/margin_logs/step_0000190.npy new file mode 100644 index 0000000..54c835a --- /dev/null +++ b/margin_logs/step_0000190.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dae6e289417033d90d24a918a455d16cadf294bc62118e12096577f646671f6f +size 384 diff --git a/margin_logs/step_0000191.npy b/margin_logs/step_0000191.npy new file mode 100644 index 0000000..c4e3b90 --- /dev/null +++ b/margin_logs/step_0000191.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc350e1de531aa6a6ea45ee619da0838692095387193ae53da74efa147b8b11b +size 384 diff --git a/margin_logs/step_0000192.npy b/margin_logs/step_0000192.npy new file mode 100644 index 0000000..afa8b4b --- /dev/null +++ b/margin_logs/step_0000192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91839087252d660620738b2b9557e5e6da4f06f070d2055b5b79784db8184248 +size 384 diff --git a/margin_logs/step_0000193.npy b/margin_logs/step_0000193.npy new file mode 100644 index 0000000..d1f6cad --- /dev/null +++ b/margin_logs/step_0000193.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eac6ea04488184d17830a429c55027d11695427c58045dc16a2ef5fafc8a1458 +size 384 diff --git a/margin_logs/step_0000194.npy b/margin_logs/step_0000194.npy new file mode 100644 index 0000000..07754e2 --- /dev/null +++ b/margin_logs/step_0000194.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e1dd32d9f371cb6851fff4cb0836191816b14ab36d5ebea8c8fed528c3b7807 +size 384 diff --git a/margin_logs/step_0000195.npy b/margin_logs/step_0000195.npy new file mode 100644 index 0000000..463f84d --- /dev/null +++ b/margin_logs/step_0000195.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79a43c2e26a4fde7ea7d94ff7d1a9d13c606f48710fa139dfdf94d385a872319 +size 384 diff --git a/margin_logs/step_0000196.npy b/margin_logs/step_0000196.npy new file mode 100644 index 0000000..a08f766 --- /dev/null +++ b/margin_logs/step_0000196.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc0053e043575e5132049bc8529fb16d3a892a56568590febc38aeb53908d081 +size 384 diff --git a/margin_logs/step_0000197.npy b/margin_logs/step_0000197.npy new file mode 100644 index 0000000..94b621f --- /dev/null +++ b/margin_logs/step_0000197.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6aa44ff964c1c77930fd4deb6693f36d589fe61454f557390081256e4b663bb +size 384 diff --git a/margin_logs/step_0000198.npy b/margin_logs/step_0000198.npy new file mode 100644 index 0000000..a678c40 --- /dev/null +++ b/margin_logs/step_0000198.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:653d0b9934021f763ba1a3bda8b2a2cd99c928df9b44b21107e5649b1bf1d184 +size 384 diff --git a/margin_logs/step_0000199.npy b/margin_logs/step_0000199.npy new file mode 100644 index 0000000..155588e --- /dev/null +++ b/margin_logs/step_0000199.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54ebb1b6ee1b1e8d443565d914653070e50e51cbabc3b245b73ec6354eef8988 +size 384 diff --git a/margin_logs/step_0000200.npy b/margin_logs/step_0000200.npy new file mode 100644 index 0000000..e1f748b --- /dev/null +++ b/margin_logs/step_0000200.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84c255ecf9d46e79f3ff3858f39b97d9ae2e1d13bcb9cb05f732dc56978c1236 +size 384 diff --git a/margin_logs/step_0000201.npy b/margin_logs/step_0000201.npy new file mode 100644 index 0000000..13877b8 --- /dev/null +++ b/margin_logs/step_0000201.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3433dec2ebf2896a04f4cbe6a76fc75e8fb8f2ba29946fba37e677d3b3fd8a2c +size 384 diff --git a/margin_logs/step_0000202.npy b/margin_logs/step_0000202.npy new file mode 100644 index 0000000..4c024a1 --- /dev/null +++ b/margin_logs/step_0000202.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9fc8271403ab76eedbeb97864a84bc3cdb59c8c604b0fa632857bfa65004acf +size 384 diff --git a/margin_logs/step_0000203.npy b/margin_logs/step_0000203.npy new file mode 100644 index 0000000..dd72c82 --- /dev/null +++ b/margin_logs/step_0000203.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6b074d4e6bc6ceb3a9da434f8b5c057ea441f5be2177cb385eace803c793b0d +size 384 diff --git a/margin_logs/step_0000204.npy b/margin_logs/step_0000204.npy new file mode 100644 index 0000000..85fbbc0 --- /dev/null +++ b/margin_logs/step_0000204.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b22881e5eae9018e8d5f6e7d8f9ae985422e54c578194cc05298d2cb210a06b4 +size 384 diff --git a/margin_logs/step_0000205.npy b/margin_logs/step_0000205.npy new file mode 100644 index 0000000..3482f9d --- /dev/null +++ b/margin_logs/step_0000205.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e84e992c635b6ea444b65f965a3aa24b19f4d36356bb08a5b189cce6bc5dcd54 +size 384 diff --git a/margin_logs/step_0000206.npy b/margin_logs/step_0000206.npy new file mode 100644 index 0000000..2ece6ca --- /dev/null +++ b/margin_logs/step_0000206.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9b6cb705149bf6baa18c605799c23a6e3e0bc5a481a4d0a415e07531686af44 +size 384 diff --git a/margin_logs/step_0000207.npy b/margin_logs/step_0000207.npy new file mode 100644 index 0000000..11cd119 --- /dev/null +++ b/margin_logs/step_0000207.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a26ea93285354fc3499cf9117f43fc7855292f94dc65951d83024f15775c5b3d +size 384 diff --git a/margin_logs/step_0000208.npy b/margin_logs/step_0000208.npy new file mode 100644 index 0000000..7252076 --- /dev/null +++ b/margin_logs/step_0000208.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:736c167d22ec82e3b8e54b67cf104138588626e8424ac03d30e28a4747acd243 +size 384 diff --git a/margin_logs/step_0000209.npy b/margin_logs/step_0000209.npy new file mode 100644 index 0000000..11501f8 --- /dev/null +++ b/margin_logs/step_0000209.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c8e103126aa89212ae68e47ced30d419ea2cff06530084f0d4518652f4a43ff +size 384 diff --git a/margin_logs/step_0000210.npy b/margin_logs/step_0000210.npy new file mode 100644 index 0000000..6c33395 --- /dev/null +++ b/margin_logs/step_0000210.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:566bc93b1691444bf8690aeaa97f2f1a91a64eeedecf66c1e5f38bfc1afd705f +size 384 diff --git a/margin_logs/step_0000211.npy b/margin_logs/step_0000211.npy new file mode 100644 index 0000000..690c6b1 --- /dev/null +++ b/margin_logs/step_0000211.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1df0458135130df58bea354458fe0f9b1f64dc879e4a70ea24d28afa91b3a713 +size 384 diff --git a/margin_logs/step_0000212.npy b/margin_logs/step_0000212.npy new file mode 100644 index 0000000..b3efee2 --- /dev/null +++ b/margin_logs/step_0000212.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:231cc898feac72809e99f62c7683de26c6b84b54be5f7bfb78913e86fe3811d8 +size 384 diff --git a/margin_logs/step_0000213.npy b/margin_logs/step_0000213.npy new file mode 100644 index 0000000..72b3c8e --- /dev/null +++ b/margin_logs/step_0000213.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5ec1e81478e74a4c2d0d7cb1226a0879681e745cc8c96500584b12f5d681f67 +size 384 diff --git a/margin_logs/step_0000214.npy b/margin_logs/step_0000214.npy new file mode 100644 index 0000000..bb39884 --- /dev/null +++ b/margin_logs/step_0000214.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dd06e0c4c090d07666f33b96b44f0a815dac0883476f430fa7379fb99cf7c0c +size 384 diff --git a/margin_logs/step_0000215.npy b/margin_logs/step_0000215.npy new file mode 100644 index 0000000..201ad68 --- /dev/null +++ b/margin_logs/step_0000215.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a5c0bc4bffc98fb55f7ec49ac16686e3b65e2ed782b7079f9af8abf9c1e527e +size 384 diff --git a/margin_logs/step_0000216.npy b/margin_logs/step_0000216.npy new file mode 100644 index 0000000..b9f5eb1 --- /dev/null +++ b/margin_logs/step_0000216.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:323500e7fea2ac200e657ffef4fb5fcd146d0e03be971cfcabbc577f81b7df98 +size 384 diff --git a/margin_logs/step_0000217.npy b/margin_logs/step_0000217.npy new file mode 100644 index 0000000..ccbd2ab --- /dev/null +++ b/margin_logs/step_0000217.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3a8e836c21664cc2eba59a17abb3029a534a8fb2f63ceab15ba146a19c618c2 +size 384 diff --git a/margin_logs/step_0000218.npy b/margin_logs/step_0000218.npy new file mode 100644 index 0000000..284acbe --- /dev/null +++ b/margin_logs/step_0000218.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fedc821fba3d4deb31c3fe4c994eddb5821ed1a1fd8e6f39c301d6ba96d2617 +size 384 diff --git a/margin_logs/step_0000219.npy b/margin_logs/step_0000219.npy new file mode 100644 index 0000000..265fb52 --- /dev/null +++ b/margin_logs/step_0000219.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7af349fd5537db8237db68ca72fb43eec24c5e75949ee658fb39bc4496372627 +size 384 diff --git a/margin_logs/step_0000220.npy b/margin_logs/step_0000220.npy new file mode 100644 index 0000000..3a93b2c --- /dev/null +++ b/margin_logs/step_0000220.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20958b533a91b6320893a98b53b05abf67f971000ff42c55e2c33bd693f1ef54 +size 384 diff --git a/margin_logs/step_0000221.npy b/margin_logs/step_0000221.npy new file mode 100644 index 0000000..bbf04a5 --- /dev/null +++ b/margin_logs/step_0000221.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59228221f73c7d1b070a1da092e51877d29a5bebf96497299bc24b393adbc0b6 +size 384 diff --git a/margin_logs/step_0000222.npy b/margin_logs/step_0000222.npy new file mode 100644 index 0000000..ccc1cb0 --- /dev/null +++ b/margin_logs/step_0000222.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6d7d95e81b8d525c276b76b8089576dd5410fd62a9924fcf94e7f9f7a6a762e +size 384 diff --git a/margin_logs/step_0000223.npy b/margin_logs/step_0000223.npy new file mode 100644 index 0000000..213b651 --- /dev/null +++ b/margin_logs/step_0000223.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bca69ffcd801e44504e7f54dfd7b7d41d4542d1cb987c679d9299ca9b09f1598 +size 384 diff --git a/margin_logs/step_0000224.npy b/margin_logs/step_0000224.npy new file mode 100644 index 0000000..f356322 --- /dev/null +++ b/margin_logs/step_0000224.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29b7bf342a9e02503bc6fb9da471fee2eca4e98f37d6af8643f3c65cb355a4be +size 384 diff --git a/margin_logs/step_0000225.npy b/margin_logs/step_0000225.npy new file mode 100644 index 0000000..4dc996d --- /dev/null +++ b/margin_logs/step_0000225.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:898f38b286051ed3120ea1ac0ed8941124c3ed6c3dd85cd653c944e92664500c +size 384 diff --git a/margin_logs/step_0000226.npy b/margin_logs/step_0000226.npy new file mode 100644 index 0000000..f318bcc --- /dev/null +++ b/margin_logs/step_0000226.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b2597833e148d8336e9a4f6097349ca1d8cebf00d9d8492755f06f53991bfe2 +size 384 diff --git a/margin_logs/step_0000227.npy b/margin_logs/step_0000227.npy new file mode 100644 index 0000000..56f16e0 --- /dev/null +++ b/margin_logs/step_0000227.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bbbb740cb57632dbeb11632e9bca3e9e0b2e13f9e1f843f8956ff38f3f5eb77 +size 384 diff --git a/margin_logs/step_0000228.npy b/margin_logs/step_0000228.npy new file mode 100644 index 0000000..e5a47d3 --- /dev/null +++ b/margin_logs/step_0000228.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e01ecace6b967a1501c7c9de837bfbc17aa74a59e9b99b652756af8ee02c0864 +size 384 diff --git a/margin_logs/step_0000229.npy b/margin_logs/step_0000229.npy new file mode 100644 index 0000000..5c3db36 --- /dev/null +++ b/margin_logs/step_0000229.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:842666624faaf890cc6ce2be801331b349642282038dc2179d7cdce0355032b6 +size 384 diff --git a/margin_logs/step_0000230.npy b/margin_logs/step_0000230.npy new file mode 100644 index 0000000..b7181e9 --- /dev/null +++ b/margin_logs/step_0000230.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:810fcc9d7b19b25f24619131a2050c013fa318fc8e1432d8417843819d4a9178 +size 384 diff --git a/margin_logs/step_0000231.npy b/margin_logs/step_0000231.npy new file mode 100644 index 0000000..dc85a44 --- /dev/null +++ b/margin_logs/step_0000231.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:833978dced07f9e8333cb1bd9c3b07f933632acff3f348c132ba93015f5a5b0f +size 384 diff --git a/margin_logs/step_0000232.npy b/margin_logs/step_0000232.npy new file mode 100644 index 0000000..872bdf7 --- /dev/null +++ b/margin_logs/step_0000232.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43dd413394814c5e355ba9b36f7e845e9f85f8b54509ee678cc0e11c8555c839 +size 384 diff --git a/margin_logs/step_0000233.npy b/margin_logs/step_0000233.npy new file mode 100644 index 0000000..7cd9827 --- /dev/null +++ b/margin_logs/step_0000233.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9afee2fcbe4a898e93d70ed6da9be40206da6d494daa9b91bba9783acb84a7c9 +size 384 diff --git a/margin_logs/step_0000234.npy b/margin_logs/step_0000234.npy new file mode 100644 index 0000000..c546bf7 --- /dev/null +++ b/margin_logs/step_0000234.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d2a3468d1586935304ff500733a4a0e88b84429867dd8a5148809105247096b +size 384 diff --git a/margin_logs/step_0000235.npy b/margin_logs/step_0000235.npy new file mode 100644 index 0000000..4db01a3 --- /dev/null +++ b/margin_logs/step_0000235.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f95eb4765fb0bad87ca2f2df18d84250db661022be798e8630d21e75274e1ba2 +size 384 diff --git a/margin_logs/step_0000236.npy b/margin_logs/step_0000236.npy new file mode 100644 index 0000000..be4aff8 --- /dev/null +++ b/margin_logs/step_0000236.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a0bc5a48792e8b04de5ba27f25498c32b9b78d594338d6cd2e25ab3f91a57f6 +size 384 diff --git a/margin_logs/step_0000237.npy b/margin_logs/step_0000237.npy new file mode 100644 index 0000000..7eb8aab --- /dev/null +++ b/margin_logs/step_0000237.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fea1e270445b9031702c1362c9362eb263766faf62adc01a6594afd316c6628f +size 384 diff --git a/margin_logs/step_0000238.npy b/margin_logs/step_0000238.npy new file mode 100644 index 0000000..1b4eb05 --- /dev/null +++ b/margin_logs/step_0000238.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21529f9c59166bd57e2ecde402473f30804bb21750e1d1cdc692f17887f65930 +size 384 diff --git a/margin_logs/step_0000239.npy b/margin_logs/step_0000239.npy new file mode 100644 index 0000000..cad6f2c --- /dev/null +++ b/margin_logs/step_0000239.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:079596e361f4284d2ddc55b24e9932273623e52fb8c2a82df52f78228ba27d3d +size 384 diff --git a/margin_logs/step_0000240.npy b/margin_logs/step_0000240.npy new file mode 100644 index 0000000..4e86def --- /dev/null +++ b/margin_logs/step_0000240.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22cc2b1fad19172318b2a4cdccc0d1ff4673ca456e630b813c9af45c511912aa +size 384 diff --git a/margin_logs/step_0000241.npy b/margin_logs/step_0000241.npy new file mode 100644 index 0000000..b797315 --- /dev/null +++ b/margin_logs/step_0000241.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13c824f4fd901e38656986f0601eccf372b39f999ae517ddb51dc327f154f26f +size 384 diff --git a/margin_logs/step_0000242.npy b/margin_logs/step_0000242.npy new file mode 100644 index 0000000..fdf7159 --- /dev/null +++ b/margin_logs/step_0000242.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7969b7adfbf8db0956e60a94b2f77c7dfe9aaf75d42a46de16a125edb57e612e +size 384 diff --git a/margin_logs/step_0000243.npy b/margin_logs/step_0000243.npy new file mode 100644 index 0000000..4f60b50 --- /dev/null +++ b/margin_logs/step_0000243.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73f3a9cf4192767025a921b951cb45c6810d3551b2c69130510270d50d0236f0 +size 384 diff --git a/margin_logs/step_0000244.npy b/margin_logs/step_0000244.npy new file mode 100644 index 0000000..b141ce9 --- /dev/null +++ b/margin_logs/step_0000244.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53db5ffd32ad83d710d41f823c33bb267f8ddc411c288d72b0f16e18530b33ae +size 384 diff --git a/margin_logs/step_0000245.npy b/margin_logs/step_0000245.npy new file mode 100644 index 0000000..eb6c7c6 --- /dev/null +++ b/margin_logs/step_0000245.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09e92c2d41ba2dce3021ed77d49a25e181911ce4a42aae64e0728b9ffa91afae +size 384 diff --git a/margin_logs/step_0000246.npy b/margin_logs/step_0000246.npy new file mode 100644 index 0000000..8daeaf5 --- /dev/null +++ b/margin_logs/step_0000246.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd5539714295b3526f91db382fe10359bd40789edddb1693560be598c58dd46f +size 384 diff --git a/margin_logs/step_0000247.npy b/margin_logs/step_0000247.npy new file mode 100644 index 0000000..0c0750f --- /dev/null +++ b/margin_logs/step_0000247.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f04b0302116017606b441926d6791258bd3d4b06835dee8b473cc7846e44c8d +size 384 diff --git a/margin_logs/step_0000248.npy b/margin_logs/step_0000248.npy new file mode 100644 index 0000000..2e9a583 --- /dev/null +++ b/margin_logs/step_0000248.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66d6146c61d9986ce9629f517152534c022f6c43979eaf07f589bde5af6880e5 +size 384 diff --git a/margin_logs/step_0000249.npy b/margin_logs/step_0000249.npy new file mode 100644 index 0000000..ece3194 --- /dev/null +++ b/margin_logs/step_0000249.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd1fe2b12c8598372b77dd9f7ffd6ac5f8410fe58e4926eb14bc12502773a55c +size 384 diff --git a/margin_logs/step_0000250.npy b/margin_logs/step_0000250.npy new file mode 100644 index 0000000..99019d6 --- /dev/null +++ b/margin_logs/step_0000250.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a71570292f63483f0d596222fdde47157798a457a2d979a571a1a7521c698de +size 384 diff --git a/margin_logs/step_0000251.npy b/margin_logs/step_0000251.npy new file mode 100644 index 0000000..4e6542e --- /dev/null +++ b/margin_logs/step_0000251.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7575bc2b2f1e344e2d0d40c6eacee87c3b2252858aec7fd8662341909d568c2 +size 384 diff --git a/margin_logs/step_0000252.npy b/margin_logs/step_0000252.npy new file mode 100644 index 0000000..f61c98c --- /dev/null +++ b/margin_logs/step_0000252.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:691dfe045a9ab05a155ab1840e0dee956bad67bf4e05893dae5cabb2d4de3b27 +size 384 diff --git a/margin_logs/step_0000253.npy b/margin_logs/step_0000253.npy new file mode 100644 index 0000000..f14d42c --- /dev/null +++ b/margin_logs/step_0000253.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6054ba4dd83ca945620c24555fedf1a77f565a5272437c033e28e123d2cc194c +size 384 diff --git a/margin_logs/step_0000254.npy b/margin_logs/step_0000254.npy new file mode 100644 index 0000000..2e055b3 --- /dev/null +++ b/margin_logs/step_0000254.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7ae1c7ec2999c20da75207b0310d0ec78d83734bdd63856e658af861513607e +size 384 diff --git a/margin_logs/step_0000255.npy b/margin_logs/step_0000255.npy new file mode 100644 index 0000000..1498a65 --- /dev/null +++ b/margin_logs/step_0000255.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9a2791d7e02e684676acc92d6b4fbfd42ec5fd88d780c5cad05b414faee1c08 +size 384 diff --git a/margin_logs/step_0000256.npy b/margin_logs/step_0000256.npy new file mode 100644 index 0000000..51ec14f --- /dev/null +++ b/margin_logs/step_0000256.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:976c46bde1a3be51e89f757d9d6c1762bf155e868ee3086af6e1ce9d53621060 +size 384 diff --git a/margin_logs/step_0000257.npy b/margin_logs/step_0000257.npy new file mode 100644 index 0000000..308f6f6 --- /dev/null +++ b/margin_logs/step_0000257.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b63353bd142b985c4b160174b494c584ddc36df1958f99d1c4a79526ce905839 +size 384 diff --git a/margin_logs/step_0000258.npy b/margin_logs/step_0000258.npy new file mode 100644 index 0000000..3d9eade --- /dev/null +++ b/margin_logs/step_0000258.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a16113c1e87de27ae8220257e0e6c00b74784ccce65eb041bd98606eee52fab5 +size 384 diff --git a/margin_logs/step_0000259.npy b/margin_logs/step_0000259.npy new file mode 100644 index 0000000..b4ced54 --- /dev/null +++ b/margin_logs/step_0000259.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8b1d30daaece117ae5b505f8331c981344c7bb93433e6bb0dee2dce989b117a +size 384 diff --git a/margin_logs/step_0000260.npy b/margin_logs/step_0000260.npy new file mode 100644 index 0000000..986c42f --- /dev/null +++ b/margin_logs/step_0000260.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15f39f164474e7ed906293277ee83f7737f19a92df89d104d1e870e2a6777263 +size 384 diff --git a/margin_logs/step_0000261.npy b/margin_logs/step_0000261.npy new file mode 100644 index 0000000..690742d --- /dev/null +++ b/margin_logs/step_0000261.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55c375193d5201e524f494d690f33daf33996d20fab34aaf934656a28c03de79 +size 384 diff --git a/margin_logs/step_0000262.npy b/margin_logs/step_0000262.npy new file mode 100644 index 0000000..ec7f2fc --- /dev/null +++ b/margin_logs/step_0000262.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57871d06bb5573a5ac3e39bb4d65634faf1de94c145043fb330978f5c7b84d81 +size 384 diff --git a/margin_logs/step_0000263.npy b/margin_logs/step_0000263.npy new file mode 100644 index 0000000..11c48ac --- /dev/null +++ b/margin_logs/step_0000263.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56081c0e670b349035e3f6f5ec5586cf897c70cd855a040f8370851c9023cd7b +size 384 diff --git a/margin_logs/step_0000264.npy b/margin_logs/step_0000264.npy new file mode 100644 index 0000000..683033a --- /dev/null +++ b/margin_logs/step_0000264.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db5ea391760f6c6586ed919537e6d2fcb385a5bfa87001a40058f284c05a284a +size 384 diff --git a/margin_logs/step_0000265.npy b/margin_logs/step_0000265.npy new file mode 100644 index 0000000..6893b41 --- /dev/null +++ b/margin_logs/step_0000265.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:875f34e594e24cf5c37aab9c0fbff3aa3085c07c3d1cf34168c0ab496f8966e0 +size 384 diff --git a/margin_logs/step_0000266.npy b/margin_logs/step_0000266.npy new file mode 100644 index 0000000..16dd7c7 --- /dev/null +++ b/margin_logs/step_0000266.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2026d48d1eafd882adef238de14dd1beac64b4bfc82a5925fa50b84606883d70 +size 384 diff --git a/margin_logs/step_0000267.npy b/margin_logs/step_0000267.npy new file mode 100644 index 0000000..12ff05c --- /dev/null +++ b/margin_logs/step_0000267.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a254208a36faacad14e1649d98cf23f650600389982166e3fabf0cc617651a83 +size 384 diff --git a/margin_logs/step_0000268.npy b/margin_logs/step_0000268.npy new file mode 100644 index 0000000..0d77c1c --- /dev/null +++ b/margin_logs/step_0000268.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d290c3b04565bc56f43969677e5a43cb645d071b21fefdaf586b14a2c4bbec5b +size 384 diff --git a/margin_logs/step_0000269.npy b/margin_logs/step_0000269.npy new file mode 100644 index 0000000..340bbab --- /dev/null +++ b/margin_logs/step_0000269.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be22edbac435fd3915935d2189604c24938ce7718e5eed22c200af694c579b9e +size 384 diff --git a/margin_logs/step_0000270.npy b/margin_logs/step_0000270.npy new file mode 100644 index 0000000..adcc050 --- /dev/null +++ b/margin_logs/step_0000270.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32d5c07ea05479b461864b5acbd09c8714f9617e36b022c764c31c5b29196596 +size 384 diff --git a/margin_logs/step_0000271.npy b/margin_logs/step_0000271.npy new file mode 100644 index 0000000..d70e7cb --- /dev/null +++ b/margin_logs/step_0000271.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7834cdf613c69dab40e851448cb9b310adaef8f8f05484fe5c9a4ddb47adf0c +size 384 diff --git a/margin_logs/step_0000272.npy b/margin_logs/step_0000272.npy new file mode 100644 index 0000000..2dad5d6 --- /dev/null +++ b/margin_logs/step_0000272.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20654af5fd03f9160d53ac8e6d32e90e7df3f5f7ddf3ed57f18ffbcd9b5da418 +size 384 diff --git a/margin_logs/step_0000273.npy b/margin_logs/step_0000273.npy new file mode 100644 index 0000000..44e3ab9 --- /dev/null +++ b/margin_logs/step_0000273.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38761673ed4b5ac4c1374cede76626833b4c4023a776c5866ed7fb5d3c76f0f1 +size 384 diff --git a/margin_logs/step_0000274.npy b/margin_logs/step_0000274.npy new file mode 100644 index 0000000..bf3102f --- /dev/null +++ b/margin_logs/step_0000274.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37dd2848406090baec2b12af13fda89b89b82e94724c39d976ce1394047151d4 +size 384 diff --git a/margin_logs/step_0000275.npy b/margin_logs/step_0000275.npy new file mode 100644 index 0000000..a42c20f --- /dev/null +++ b/margin_logs/step_0000275.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4b1f259f3801a7fec66d053b82fa3ba9917e973fb1122a2d401a8e611d3246b +size 384 diff --git a/margin_logs/step_0000276.npy b/margin_logs/step_0000276.npy new file mode 100644 index 0000000..b62004e --- /dev/null +++ b/margin_logs/step_0000276.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e96f110ce933f4907e94db640b5735c6ebefa6e973a9dd153c65a5ad48212ee4 +size 384 diff --git a/margin_logs/step_0000277.npy b/margin_logs/step_0000277.npy new file mode 100644 index 0000000..fd56652 --- /dev/null +++ b/margin_logs/step_0000277.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0d0b90e9ab78e5423b12a532ce52586dc2031f495925c75423ce51cfc1e53c0 +size 384 diff --git a/margin_logs/step_0000278.npy b/margin_logs/step_0000278.npy new file mode 100644 index 0000000..b2109e4 --- /dev/null +++ b/margin_logs/step_0000278.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aed64aab49fcc530c7e6fef3b75739823e54bacea7e3c0b4086d5138b6182b55 +size 384 diff --git a/margin_logs/step_0000279.npy b/margin_logs/step_0000279.npy new file mode 100644 index 0000000..50ce490 --- /dev/null +++ b/margin_logs/step_0000279.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8cd03572c60790de1b39e5095cd94a1b4401e62b884b964031018b2d0eb5137 +size 384 diff --git a/margin_logs/step_0000280.npy b/margin_logs/step_0000280.npy new file mode 100644 index 0000000..e85b8af --- /dev/null +++ b/margin_logs/step_0000280.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a63c9f50c74cc37044a51eb7842fa8872881b836a6f536bdefa2b08559cfeb1 +size 384 diff --git a/margin_logs/step_0000281.npy b/margin_logs/step_0000281.npy new file mode 100644 index 0000000..3076d36 --- /dev/null +++ b/margin_logs/step_0000281.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc1465bee35fb471fd8a7108a59e193645f9f58bb87913c465e3a5da0dc6412d +size 384 diff --git a/margin_logs/step_0000282.npy b/margin_logs/step_0000282.npy new file mode 100644 index 0000000..47a1f9c --- /dev/null +++ b/margin_logs/step_0000282.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0d923041a1872d5c23fca2087450842ef8a430331f4168f1212177f79a20a50 +size 384 diff --git a/margin_logs/step_0000283.npy b/margin_logs/step_0000283.npy new file mode 100644 index 0000000..4090f58 --- /dev/null +++ b/margin_logs/step_0000283.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5391e6a7215349f29cac3124933608fcc2df9e604056b456063bead8b3bd1e44 +size 384 diff --git a/margin_logs/step_0000284.npy b/margin_logs/step_0000284.npy new file mode 100644 index 0000000..1794030 --- /dev/null +++ b/margin_logs/step_0000284.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81514e8dd87bf4d435e3e64bef4448f93d19a954319c2784fb2a987cfebf4552 +size 384 diff --git a/margin_logs/step_0000285.npy b/margin_logs/step_0000285.npy new file mode 100644 index 0000000..d0a0acb --- /dev/null +++ b/margin_logs/step_0000285.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1865382c473d3fa8fe0bf91338e56a641f3df0ef940beedb7416a339fbc81a4 +size 384 diff --git a/margin_logs/step_0000286.npy b/margin_logs/step_0000286.npy new file mode 100644 index 0000000..05b7760 --- /dev/null +++ b/margin_logs/step_0000286.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63d021d6cfbcc5b81b1a2e237afcd6efdd7c81d998a1f021c969d695357a0f40 +size 384 diff --git a/margin_logs/step_0000287.npy b/margin_logs/step_0000287.npy new file mode 100644 index 0000000..5191154 --- /dev/null +++ b/margin_logs/step_0000287.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fc0ceb3b86bfaf49650194e137deea2024a981cb80af69805c4a358d38c96fb +size 384 diff --git a/margin_logs/step_0000288.npy b/margin_logs/step_0000288.npy new file mode 100644 index 0000000..65ee502 --- /dev/null +++ b/margin_logs/step_0000288.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a0192c56f014da00457881c908842d7a97517bc12925acaa6e2a31c2f50826a +size 384 diff --git a/margin_logs/step_0000289.npy b/margin_logs/step_0000289.npy new file mode 100644 index 0000000..42b2558 --- /dev/null +++ b/margin_logs/step_0000289.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb71e20f18a220884f7a513b63bbdd3987030683bc62a659883db4ec526ec9e3 +size 384 diff --git a/margin_logs/step_0000290.npy b/margin_logs/step_0000290.npy new file mode 100644 index 0000000..e035546 --- /dev/null +++ b/margin_logs/step_0000290.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f111a1176bfcf690a3a84ea0d079e1bb02c0bc537dfa7541d418f7b8e542e46 +size 384 diff --git a/margin_logs/step_0000291.npy b/margin_logs/step_0000291.npy new file mode 100644 index 0000000..555ae95 --- /dev/null +++ b/margin_logs/step_0000291.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f7d4d669a059240e54bcf643c69c078aa5d5ce79653d07e2ec7fb2d379f149c +size 384 diff --git a/margin_logs/step_0000292.npy b/margin_logs/step_0000292.npy new file mode 100644 index 0000000..b71e08a --- /dev/null +++ b/margin_logs/step_0000292.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c996e2b894c86f0a8b987cba5039432e2bd4ca78cee84854919e18b74d443dbb +size 384 diff --git a/margin_logs/step_0000293.npy b/margin_logs/step_0000293.npy new file mode 100644 index 0000000..d921e10 --- /dev/null +++ b/margin_logs/step_0000293.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a281ab6165d58d89756835ae7963a3a1af5625e6214f61f22e2546a609230c1 +size 384 diff --git a/margin_logs/step_0000294.npy b/margin_logs/step_0000294.npy new file mode 100644 index 0000000..6bc015f --- /dev/null +++ b/margin_logs/step_0000294.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8b188b200a18f9053466183d422f2db028e1d3933538c3bc341b2f0579b4fd9 +size 384 diff --git a/margin_logs/step_0000295.npy b/margin_logs/step_0000295.npy new file mode 100644 index 0000000..53c5d06 --- /dev/null +++ b/margin_logs/step_0000295.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3bcd89f72e2192bdf99d02eb38883c951def7bd8b2e1cafa89e4d4673677f6c +size 384 diff --git a/margin_logs/step_0000296.npy b/margin_logs/step_0000296.npy new file mode 100644 index 0000000..e5c5388 --- /dev/null +++ b/margin_logs/step_0000296.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9e8abd790391fa465a37a7fb68b9f77f2e102589dd18015cf00dacc6070f5a1 +size 384 diff --git a/margin_logs/step_0000297.npy b/margin_logs/step_0000297.npy new file mode 100644 index 0000000..4a70504 --- /dev/null +++ b/margin_logs/step_0000297.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee6fde6681c00f3b97637b3dd9ec7e7f76bb7e605538e4820e93b017e64b465f +size 384 diff --git a/margin_logs/step_0000298.npy b/margin_logs/step_0000298.npy new file mode 100644 index 0000000..37e543a --- /dev/null +++ b/margin_logs/step_0000298.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2ed56f68a2681ac8157478990e8ad37ae6e4b8f81003b8b892dcb649c0fc558 +size 384 diff --git a/margin_logs/step_0000299.npy b/margin_logs/step_0000299.npy new file mode 100644 index 0000000..391988d --- /dev/null +++ b/margin_logs/step_0000299.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a93e3a208bc515c6072eb44915c9bbc81c962e23989a8225fed2acc003a7d83c +size 384 diff --git a/margin_logs/step_0000300.npy b/margin_logs/step_0000300.npy new file mode 100644 index 0000000..d9de566 --- /dev/null +++ b/margin_logs/step_0000300.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:786f91ebebf231a96535f7389853a28a711c26b6094015e7ab7d7ff63dc5e22c +size 384 diff --git a/margin_logs/step_0000301.npy b/margin_logs/step_0000301.npy new file mode 100644 index 0000000..1fedca1 --- /dev/null +++ b/margin_logs/step_0000301.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f55ffa77bd2230aa68281b1b5edbbd7d1d745d7da1c9f64ca9bb4c5defca61a +size 384 diff --git a/margin_logs/step_0000302.npy b/margin_logs/step_0000302.npy new file mode 100644 index 0000000..b778170 --- /dev/null +++ b/margin_logs/step_0000302.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:352a9df61b105620c890f05aafd807565137ed89ec2d1900bafa55c75a40f0e0 +size 384 diff --git a/margin_logs/step_0000303.npy b/margin_logs/step_0000303.npy new file mode 100644 index 0000000..dcc24fb --- /dev/null +++ b/margin_logs/step_0000303.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45f78208f16e2ec6bcb4364979d8cec7184eadc07102ec0bcff59236a383efe8 +size 384 diff --git a/margin_logs/step_0000304.npy b/margin_logs/step_0000304.npy new file mode 100644 index 0000000..637ada5 --- /dev/null +++ b/margin_logs/step_0000304.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d8e76934124f6a5057aa4fdc95ca5f63e2c1b62122ad6bffc64b0afef92af15 +size 384 diff --git a/margin_logs/step_0000305.npy b/margin_logs/step_0000305.npy new file mode 100644 index 0000000..47107bd --- /dev/null +++ b/margin_logs/step_0000305.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6a3f27cf80c0de56510ef316759eb8845e6f4f959ee0584f12bfacec9797809 +size 384 diff --git a/margin_logs/step_0000306.npy b/margin_logs/step_0000306.npy new file mode 100644 index 0000000..bd984f0 --- /dev/null +++ b/margin_logs/step_0000306.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:957352f7370113e3ff2e8884697fb001d14879e58bebb7b2ce73974f621b2141 +size 384 diff --git a/margin_logs/step_0000307.npy b/margin_logs/step_0000307.npy new file mode 100644 index 0000000..8b857a9 --- /dev/null +++ b/margin_logs/step_0000307.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccbbd28958e29088587c55483bce49c1542a9b147181abe4caf494c36f2950c5 +size 384 diff --git a/margin_logs/step_0000308.npy b/margin_logs/step_0000308.npy new file mode 100644 index 0000000..465ec98 --- /dev/null +++ b/margin_logs/step_0000308.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:209cb3c8d8efdaf7bf909b6365f9e822f0569dc545b5c1cc081a90ceec524fcc +size 384 diff --git a/margin_logs/step_0000309.npy b/margin_logs/step_0000309.npy new file mode 100644 index 0000000..3f4a9b7 --- /dev/null +++ b/margin_logs/step_0000309.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65b3a4a53d4781bf0532209432ded1d19c34662fc26049014b3796015b8a6d66 +size 384 diff --git a/margin_logs/step_0000310.npy b/margin_logs/step_0000310.npy new file mode 100644 index 0000000..8f7256b --- /dev/null +++ b/margin_logs/step_0000310.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d21244ea649ac94c33349acccbed721b32c72ff68feeda822aaeed2cf4693b45 +size 384 diff --git a/margin_logs/step_0000311.npy b/margin_logs/step_0000311.npy new file mode 100644 index 0000000..86c0f01 --- /dev/null +++ b/margin_logs/step_0000311.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aae25eaf271e980d9dab828dccb3a7af56b69dc7145f1e6e36ad3f30b432c8a +size 384 diff --git a/margin_logs/step_0000312.npy b/margin_logs/step_0000312.npy new file mode 100644 index 0000000..4f25990 --- /dev/null +++ b/margin_logs/step_0000312.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31e0da9f911261705ac9f5dc5261489e87ba4917dfdc2112462731dc860ad123 +size 384 diff --git a/margin_logs/step_0000313.npy b/margin_logs/step_0000313.npy new file mode 100644 index 0000000..8379013 --- /dev/null +++ b/margin_logs/step_0000313.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0be91481a9511ff4e70fafda1665d65822c0851b39fe37f2d81b378e596440ea +size 384 diff --git a/margin_logs/step_0000314.npy b/margin_logs/step_0000314.npy new file mode 100644 index 0000000..62d6ed8 --- /dev/null +++ b/margin_logs/step_0000314.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e46ca1ed0914bd1f752b9721b8e0747e8c4d887652eeb51e66ed64b0aabf027 +size 384 diff --git a/margin_logs/step_0000315.npy b/margin_logs/step_0000315.npy new file mode 100644 index 0000000..98ccef7 --- /dev/null +++ b/margin_logs/step_0000315.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4452818057f1fa72c5f33178e6d79403022052fb71ea3d879bd7a880716d7fe +size 384 diff --git a/margin_logs/step_0000316.npy b/margin_logs/step_0000316.npy new file mode 100644 index 0000000..1bb358f --- /dev/null +++ b/margin_logs/step_0000316.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4224c5667cba9816d2a7f22ebee2e87a343f7a3665b7605e564212c413eb7da +size 384 diff --git a/margin_logs/step_0000317.npy b/margin_logs/step_0000317.npy new file mode 100644 index 0000000..e26f8a8 --- /dev/null +++ b/margin_logs/step_0000317.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ed14e68cbcb892918974e43ef2a68b79791dd3f31ffb0cff68370b3de000039 +size 384 diff --git a/margin_logs/step_0000318.npy b/margin_logs/step_0000318.npy new file mode 100644 index 0000000..6045368 --- /dev/null +++ b/margin_logs/step_0000318.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e99ddb16200d3076a281d590830e520bc11065b79289e009524ba7e42046e361 +size 384 diff --git a/margin_logs/step_0000319.npy b/margin_logs/step_0000319.npy new file mode 100644 index 0000000..24a5496 --- /dev/null +++ b/margin_logs/step_0000319.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40dcec8d93a9b29e236f25a834a2eb5e46e27678d4b68e7887ebf4a7424fe264 +size 384 diff --git a/margin_logs/step_0000320.npy b/margin_logs/step_0000320.npy new file mode 100644 index 0000000..78306a1 --- /dev/null +++ b/margin_logs/step_0000320.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e6eb0dc84f11bafddc843347795e7bd0742caebed59515c6a86456696e57e90 +size 384 diff --git a/margin_logs/step_0000321.npy b/margin_logs/step_0000321.npy new file mode 100644 index 0000000..96a3949 --- /dev/null +++ b/margin_logs/step_0000321.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97c6cb175e1500e5177a19ce0b66f383b4b5cf457e18cdeffbf012f1e10b3313 +size 384 diff --git a/margin_logs/step_0000322.npy b/margin_logs/step_0000322.npy new file mode 100644 index 0000000..5e17644 --- /dev/null +++ b/margin_logs/step_0000322.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b42694e3c8ce4a051c33fea566ea7f8a8f44a533a90133e6c16281398bb9c245 +size 384 diff --git a/margin_logs/step_0000323.npy b/margin_logs/step_0000323.npy new file mode 100644 index 0000000..d5c79fe --- /dev/null +++ b/margin_logs/step_0000323.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5006908f5ea2531755040892fc8d2f277908c0c161888cd830342f303b64aff +size 384 diff --git a/margin_logs/step_0000324.npy b/margin_logs/step_0000324.npy new file mode 100644 index 0000000..bc4da96 --- /dev/null +++ b/margin_logs/step_0000324.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f819cb328be582e225949ff36b8bbb3f0f386aa294ab4234c2f38afffab5fab +size 384 diff --git a/margin_logs/step_0000325.npy b/margin_logs/step_0000325.npy new file mode 100644 index 0000000..6a7d4d4 --- /dev/null +++ b/margin_logs/step_0000325.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baf75bacaabdc6375da9fab242a0e4745aa990a8b0fed9e314fdff141022960e +size 384 diff --git a/margin_logs/step_0000326.npy b/margin_logs/step_0000326.npy new file mode 100644 index 0000000..1291236 --- /dev/null +++ b/margin_logs/step_0000326.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b86bf2dd68a35e7b12bcbed42e6ee331e55bbe194f0d1d9849163578c61b159 +size 384 diff --git a/margin_logs/step_0000327.npy b/margin_logs/step_0000327.npy new file mode 100644 index 0000000..f8df04f --- /dev/null +++ b/margin_logs/step_0000327.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5ffc273e64e55608c975edcc8dbfcf62f31be9d80aedbb72ed8b63b9db4ad8f +size 384 diff --git a/margin_logs/step_0000328.npy b/margin_logs/step_0000328.npy new file mode 100644 index 0000000..a5b27ca --- /dev/null +++ b/margin_logs/step_0000328.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dce98669b946f41981b25662ec6e34866c5714b1a5b2a96e7a4dc92d8bbaed8e +size 384 diff --git a/margin_logs/step_0000329.npy b/margin_logs/step_0000329.npy new file mode 100644 index 0000000..687a245 --- /dev/null +++ b/margin_logs/step_0000329.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55fe1b212c01fd0bf5b7294706ef18d98bb585982736aafecc8928a30aee5a68 +size 384 diff --git a/margin_logs/step_0000330.npy b/margin_logs/step_0000330.npy new file mode 100644 index 0000000..36228a9 --- /dev/null +++ b/margin_logs/step_0000330.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55c55d0b61e1a5aad406bb4a5963ab1754a18f6356dd57006938f9e71639b945 +size 384 diff --git a/margin_logs/step_0000331.npy b/margin_logs/step_0000331.npy new file mode 100644 index 0000000..bc82838 --- /dev/null +++ b/margin_logs/step_0000331.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf7d8711528a5de5707dfecb9722067b3e585e86bcbbf28b36d18e046b5ded05 +size 384 diff --git a/margin_logs/step_0000332.npy b/margin_logs/step_0000332.npy new file mode 100644 index 0000000..c595b92 --- /dev/null +++ b/margin_logs/step_0000332.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d5645999f90b75df94e516d80232753f819bb8c688a6e8cd3b2c532aa771c64 +size 384 diff --git a/margin_logs/step_0000333.npy b/margin_logs/step_0000333.npy new file mode 100644 index 0000000..e7eb962 --- /dev/null +++ b/margin_logs/step_0000333.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca2560cfef87b5ebadd7d93a3cffd184eceec4ca86dc395723276ff170ec6d89 +size 384 diff --git a/margin_logs/step_0000334.npy b/margin_logs/step_0000334.npy new file mode 100644 index 0000000..9a5e185 --- /dev/null +++ b/margin_logs/step_0000334.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6697d013ef62d33e245c8a247ce4b4b1bedc185f84200e066bbb3d75b52f0cee +size 384 diff --git a/margin_logs/step_0000335.npy b/margin_logs/step_0000335.npy new file mode 100644 index 0000000..423f260 --- /dev/null +++ b/margin_logs/step_0000335.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7f0c6e9e240166ed1ac9fa2e975d5a906e8e34db2f10b625641d81f4ca86b7b +size 384 diff --git a/margin_logs/step_0000336.npy b/margin_logs/step_0000336.npy new file mode 100644 index 0000000..90a2995 --- /dev/null +++ b/margin_logs/step_0000336.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daa8ffafe361c99bcd22e2e2d887b12934c4761a770361f9fbcad8fcb934c0ac +size 384 diff --git a/margin_logs/step_0000337.npy b/margin_logs/step_0000337.npy new file mode 100644 index 0000000..75896ea --- /dev/null +++ b/margin_logs/step_0000337.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33c0d9356db8aed54e5124c879a9bf2393f35efac7c7b2a98a4722b6c270b8a2 +size 384 diff --git a/margin_logs/step_0000338.npy b/margin_logs/step_0000338.npy new file mode 100644 index 0000000..4e1867d --- /dev/null +++ b/margin_logs/step_0000338.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbecbaca1fc00d4e05557a3efadc7f74f89893eb518348d7088688c322f6cb13 +size 384 diff --git a/margin_logs/step_0000339.npy b/margin_logs/step_0000339.npy new file mode 100644 index 0000000..d54ab70 --- /dev/null +++ b/margin_logs/step_0000339.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d090481bf2d049f44df208c4ab74ccf58c4a2847b8f600f07da1fb1e7c1e6126 +size 384 diff --git a/margin_logs/step_0000340.npy b/margin_logs/step_0000340.npy new file mode 100644 index 0000000..f6ba47b --- /dev/null +++ b/margin_logs/step_0000340.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:188a04e053812373db4013dd792df8d10e72bd7f9e085a3ea21ca25320b574f0 +size 384 diff --git a/margin_logs/step_0000341.npy b/margin_logs/step_0000341.npy new file mode 100644 index 0000000..2ce6ebf --- /dev/null +++ b/margin_logs/step_0000341.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8d99d65810d74852935e7bb9f5530729e3db53090d6c4ef3876725a5547a330 +size 384 diff --git a/margin_logs/step_0000342.npy b/margin_logs/step_0000342.npy new file mode 100644 index 0000000..6751c36 --- /dev/null +++ b/margin_logs/step_0000342.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a10c3675b24cce89cb91d6c937140cb1435637c3be1aa05c45b632897def7283 +size 384 diff --git a/margin_logs/step_0000343.npy b/margin_logs/step_0000343.npy new file mode 100644 index 0000000..0b21884 --- /dev/null +++ b/margin_logs/step_0000343.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14e37efaf9e0b5dfe5468f5e553d899242237d4b34ea1d92fde243f0cf1aef92 +size 384 diff --git a/margin_logs/step_0000344.npy b/margin_logs/step_0000344.npy new file mode 100644 index 0000000..c3ec8d5 --- /dev/null +++ b/margin_logs/step_0000344.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c79a3e5297db7f120638cba8d1af48bdc6bea656f6e60691335e5f0988fa335f +size 384 diff --git a/margin_logs/step_0000345.npy b/margin_logs/step_0000345.npy new file mode 100644 index 0000000..62f0e6c --- /dev/null +++ b/margin_logs/step_0000345.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3164196dc05172343b49e31ea24071633c90fe3248afa1ed71b6b36c94e13f3c +size 384 diff --git a/margin_logs/step_0000346.npy b/margin_logs/step_0000346.npy new file mode 100644 index 0000000..4199375 --- /dev/null +++ b/margin_logs/step_0000346.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40ff2ebdce202640916aa199f494bb3c45c5f0f02a591f04d5058fd99d81346 +size 384 diff --git a/margin_logs/step_0000347.npy b/margin_logs/step_0000347.npy new file mode 100644 index 0000000..9609ec2 --- /dev/null +++ b/margin_logs/step_0000347.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7b44085e129ed22deb01fb00fe17765550c60feca4b6424eb9fd2ebe1851a23 +size 384 diff --git a/margin_logs/step_0000348.npy b/margin_logs/step_0000348.npy new file mode 100644 index 0000000..baa124d --- /dev/null +++ b/margin_logs/step_0000348.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e0dcb9ead07ad58c079b332630d69e8a4dea75c6e46216c6f1eb4dc900f324e +size 384 diff --git a/margin_logs/step_0000349.npy b/margin_logs/step_0000349.npy new file mode 100644 index 0000000..1bb0ddb --- /dev/null +++ b/margin_logs/step_0000349.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfd0dae73448979b7c1fe928ceffc15ae348c3d873e9b0bdddd23bf12072dafa +size 384 diff --git a/margin_logs/step_0000350.npy b/margin_logs/step_0000350.npy new file mode 100644 index 0000000..7150041 --- /dev/null +++ b/margin_logs/step_0000350.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a09f3e7ea67f14e62a52a13f551338717abbd307a3942d075a5c9aa64970408c +size 384 diff --git a/margin_logs/step_0000351.npy b/margin_logs/step_0000351.npy new file mode 100644 index 0000000..3d98996 --- /dev/null +++ b/margin_logs/step_0000351.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0feefe26acd3bc8a8dd9c007e5d3b82b494f77f3bc8894df59958a8b3ce68306 +size 384 diff --git a/margin_logs/step_0000352.npy b/margin_logs/step_0000352.npy new file mode 100644 index 0000000..e8f4c89 --- /dev/null +++ b/margin_logs/step_0000352.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ce71d4a9a5bfd3d13b6d35313db3d1a7dc026e942367a0f65c58456b6c08be2 +size 384 diff --git a/margin_logs/step_0000353.npy b/margin_logs/step_0000353.npy new file mode 100644 index 0000000..0ea1c5f --- /dev/null +++ b/margin_logs/step_0000353.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9466c36624e47facc8f147907f0c439a9101065282bc1c51d47db7eecb068c3e +size 384 diff --git a/margin_logs/step_0000354.npy b/margin_logs/step_0000354.npy new file mode 100644 index 0000000..c8dd362 --- /dev/null +++ b/margin_logs/step_0000354.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22cb59e5f54f174a9c4e15e655fcb08d5e9e5d22e5d0b44cf63515824b6de15f +size 384 diff --git a/margin_logs/step_0000355.npy b/margin_logs/step_0000355.npy new file mode 100644 index 0000000..711a9b2 --- /dev/null +++ b/margin_logs/step_0000355.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec6b06e564b53b7eb380393d1824677a5ba275d4766752415e1db148b2168b22 +size 384 diff --git a/margin_logs/step_0000356.npy b/margin_logs/step_0000356.npy new file mode 100644 index 0000000..e689f91 --- /dev/null +++ b/margin_logs/step_0000356.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ad9a6b9affbf71a02e2debec3307874850bdf2e608a00428cd8cdece5005dd9 +size 384 diff --git a/margin_logs/step_0000357.npy b/margin_logs/step_0000357.npy new file mode 100644 index 0000000..c0980e8 --- /dev/null +++ b/margin_logs/step_0000357.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3eb67e3e0fa2f8383041b2658f897d4d1ea14cfb69a5b8006fd6fc507025a3f6 +size 384 diff --git a/margin_logs/step_0000358.npy b/margin_logs/step_0000358.npy new file mode 100644 index 0000000..647d835 --- /dev/null +++ b/margin_logs/step_0000358.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:984c4eff6c8a815b43fc227e6962fecc1632d69e22a2446cb058aa0878c0d127 +size 384 diff --git a/margin_logs/step_0000359.npy b/margin_logs/step_0000359.npy new file mode 100644 index 0000000..4555e9b --- /dev/null +++ b/margin_logs/step_0000359.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d48f027a8c0c98bfd79591b16bec208abfccc1184bf36adacd759e4af8fd968e +size 384 diff --git a/margin_logs/step_0000360.npy b/margin_logs/step_0000360.npy new file mode 100644 index 0000000..839b54d --- /dev/null +++ b/margin_logs/step_0000360.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02ba05c4c1e56843df7c8bc88e589f371d8dd501c8c4f4292c42612966d0a6f2 +size 384 diff --git a/margin_logs/step_0000361.npy b/margin_logs/step_0000361.npy new file mode 100644 index 0000000..043fb17 --- /dev/null +++ b/margin_logs/step_0000361.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8db42af1871628094aae9174193dd833449482b10918c0945d1a5859264229fb +size 384 diff --git a/margin_logs/step_0000362.npy b/margin_logs/step_0000362.npy new file mode 100644 index 0000000..c5bd6c2 --- /dev/null +++ b/margin_logs/step_0000362.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00577012fce4bb31fa744a8ef9fe13c4fe4550dc6cdaf0bb9710a4fac24a69f2 +size 384 diff --git a/margin_logs/step_0000363.npy b/margin_logs/step_0000363.npy new file mode 100644 index 0000000..2587300 --- /dev/null +++ b/margin_logs/step_0000363.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f70d4546a654afa461115889d59e58f06d9ccac0c3e978ce87b19d73a0f76999 +size 384 diff --git a/margin_logs/step_0000364.npy b/margin_logs/step_0000364.npy new file mode 100644 index 0000000..eade4ba --- /dev/null +++ b/margin_logs/step_0000364.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46ca80de1876b8697629b3ca7a880db5698341da89ca6ff004f3002a6426977b +size 384 diff --git a/margin_logs/step_0000365.npy b/margin_logs/step_0000365.npy new file mode 100644 index 0000000..323cdbf --- /dev/null +++ b/margin_logs/step_0000365.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69a033a45c9597a6881c453dc705fff58ec29db51ac2ae5a768d9a52ebd865d1 +size 384 diff --git a/margin_logs/step_0000366.npy b/margin_logs/step_0000366.npy new file mode 100644 index 0000000..9b2dd1f --- /dev/null +++ b/margin_logs/step_0000366.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e340fb519d9f2b9fbdb76af8da3efa9a78c51ef0b5ecf0dad3497cbf3cd6d733 +size 384 diff --git a/margin_logs/step_0000367.npy b/margin_logs/step_0000367.npy new file mode 100644 index 0000000..c8f06cc --- /dev/null +++ b/margin_logs/step_0000367.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34be2cfbccccd02fbc30602c89837becd7a9a6765f272621e05b240579724da5 +size 384 diff --git a/margin_logs/step_0000368.npy b/margin_logs/step_0000368.npy new file mode 100644 index 0000000..7886054 --- /dev/null +++ b/margin_logs/step_0000368.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c65fd2afdcaa5c1b0cd754f53a0fc8851f58dfcd6f64870863be9863074864ae +size 384 diff --git a/margin_logs/step_0000369.npy b/margin_logs/step_0000369.npy new file mode 100644 index 0000000..a1bfbf3 --- /dev/null +++ b/margin_logs/step_0000369.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2745bce3933e32b60703c1ef279fdb82309abd1538ee882be026839953b28422 +size 384 diff --git a/margin_logs/step_0000370.npy b/margin_logs/step_0000370.npy new file mode 100644 index 0000000..755a424 --- /dev/null +++ b/margin_logs/step_0000370.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52e803f71a2369121381f8882d88cfe852c7c7dbd617b5f7ef40a0de519f33fa +size 384 diff --git a/margin_logs/step_0000371.npy b/margin_logs/step_0000371.npy new file mode 100644 index 0000000..5c60291 --- /dev/null +++ b/margin_logs/step_0000371.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce031cb2a5184a0d90aa2a0cb2afffbadd4a5e827379f2422e5e6cfb0aa32a67 +size 384 diff --git a/margin_logs/step_0000372.npy b/margin_logs/step_0000372.npy new file mode 100644 index 0000000..5e6386e --- /dev/null +++ b/margin_logs/step_0000372.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59622e2039af713a69c1f3572da532fcd72185eb70b77bbb1f9df20ba929b090 +size 384 diff --git a/margin_logs/step_0000373.npy b/margin_logs/step_0000373.npy new file mode 100644 index 0000000..762d1b6 --- /dev/null +++ b/margin_logs/step_0000373.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f260224af9902f7abb63daf09e4c03348beb6573d839f0dbe5623634962ea892 +size 384 diff --git a/margin_logs/step_0000374.npy b/margin_logs/step_0000374.npy new file mode 100644 index 0000000..242b94c --- /dev/null +++ b/margin_logs/step_0000374.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c442dc9d58b89a902c8d3fe45943cda7b173565f866595f8636fa80454b37a3c +size 384 diff --git a/margin_logs/step_0000375.npy b/margin_logs/step_0000375.npy new file mode 100644 index 0000000..c4531bb --- /dev/null +++ b/margin_logs/step_0000375.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e32eb912b5c21b62f24443baca2bde56a03b51d32bfd0ab2046e4f260f038471 +size 384 diff --git a/margin_logs/step_0000376.npy b/margin_logs/step_0000376.npy new file mode 100644 index 0000000..dcbceee --- /dev/null +++ b/margin_logs/step_0000376.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c301cfc358579500245e0cad14f6f6211a02f381038df59765de6c5c194fd244 +size 384 diff --git a/margin_logs/step_0000377.npy b/margin_logs/step_0000377.npy new file mode 100644 index 0000000..6f006ff --- /dev/null +++ b/margin_logs/step_0000377.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3916ee2f84e8a2c24cced1692e582659e0a1fc6177f300ccbf2a5e3bc13e978f +size 384 diff --git a/margin_logs/step_0000378.npy b/margin_logs/step_0000378.npy new file mode 100644 index 0000000..05b3094 --- /dev/null +++ b/margin_logs/step_0000378.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a76e621b4d8e710f7615d36cee233cdcce47742d8f6f35fda5e343d9080225a +size 384 diff --git a/margin_logs/step_0000379.npy b/margin_logs/step_0000379.npy new file mode 100644 index 0000000..e3c57fd --- /dev/null +++ b/margin_logs/step_0000379.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f18acbd0441ae703476ddb76fdbdc47f30d755c1340a4f94f1fa85864015a1c9 +size 384 diff --git a/margin_logs/step_0000380.npy b/margin_logs/step_0000380.npy new file mode 100644 index 0000000..1f74f2a --- /dev/null +++ b/margin_logs/step_0000380.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f93ccf1df542ec08378f0b0b37fa7da36aa8e2f2014e6c469d667b43752b6c9 +size 384 diff --git a/margin_logs/step_0000381.npy b/margin_logs/step_0000381.npy new file mode 100644 index 0000000..58a4576 --- /dev/null +++ b/margin_logs/step_0000381.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a551c52bf1a6b28349e15734e38b21f48d3874482d158bd1c93cd2391b1be32 +size 384 diff --git a/margin_logs/step_0000382.npy b/margin_logs/step_0000382.npy new file mode 100644 index 0000000..db1c718 --- /dev/null +++ b/margin_logs/step_0000382.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:659430e47b324dccdeabdf85747eb93dd945141b33d523cc9dbcbe822118fdd8 +size 384 diff --git a/margin_logs/step_0000383.npy b/margin_logs/step_0000383.npy new file mode 100644 index 0000000..64b5117 --- /dev/null +++ b/margin_logs/step_0000383.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:735765d0a60609b19b539c462487e9b91703eb2ca28d8fccccbda076b6237ede +size 384 diff --git a/margin_logs/step_0000384.npy b/margin_logs/step_0000384.npy new file mode 100644 index 0000000..d29349c --- /dev/null +++ b/margin_logs/step_0000384.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:981e437972f3ef2fed4dcf4f32d39da22be7c08ce0a1f800c28981b0c6aa6124 +size 384 diff --git a/margin_logs/step_0000385.npy b/margin_logs/step_0000385.npy new file mode 100644 index 0000000..9f8d242 --- /dev/null +++ b/margin_logs/step_0000385.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff4171365645d7d9e81c80170cb332e28cc315cddd1b3a7ce3d493e06562205c +size 384 diff --git a/margin_logs/step_0000386.npy b/margin_logs/step_0000386.npy new file mode 100644 index 0000000..83c1994 --- /dev/null +++ b/margin_logs/step_0000386.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a19993f918d4fafc4824508c416a11d8941fc5b4a7e7e53fea231c02034405bd +size 384 diff --git a/margin_logs/step_0000387.npy b/margin_logs/step_0000387.npy new file mode 100644 index 0000000..43efd7c --- /dev/null +++ b/margin_logs/step_0000387.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f46851f05c39e39835234eb666aea66c52c1e75d47b266a4b3464364d543a991 +size 384 diff --git a/margin_logs/step_0000388.npy b/margin_logs/step_0000388.npy new file mode 100644 index 0000000..5a1e848 --- /dev/null +++ b/margin_logs/step_0000388.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69c5c69e4108f4b92254c67f55111f2f7abd095cc7c6bfdf9506b160058969e8 +size 384 diff --git a/margin_logs/step_0000389.npy b/margin_logs/step_0000389.npy new file mode 100644 index 0000000..a496201 --- /dev/null +++ b/margin_logs/step_0000389.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09764f8010eff90bb517cd36cadf78f2840829427aa8e27af942281ebdf240a9 +size 384 diff --git a/margin_logs/step_0000390.npy b/margin_logs/step_0000390.npy new file mode 100644 index 0000000..afde019 --- /dev/null +++ b/margin_logs/step_0000390.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afdec63701ddbafd5981102a2e09e2b706630132b8516e8ee812eefbda9b3d2c +size 384 diff --git a/margin_logs/step_0000391.npy b/margin_logs/step_0000391.npy new file mode 100644 index 0000000..1d84a26 --- /dev/null +++ b/margin_logs/step_0000391.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a992e304ff68b3d805ea78eb92cd8b35bc853f49f2b9653bd2c45e71df3c8ab +size 384 diff --git a/margin_logs/step_0000392.npy b/margin_logs/step_0000392.npy new file mode 100644 index 0000000..e020dc9 --- /dev/null +++ b/margin_logs/step_0000392.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6db6c662797ff55f294cd01293fdd6a74e40bff6d008f415ccd1a1068626f2e3 +size 384 diff --git a/margin_logs/step_0000393.npy b/margin_logs/step_0000393.npy new file mode 100644 index 0000000..f930aa2 --- /dev/null +++ b/margin_logs/step_0000393.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06875719c566d71b468e7415b247b63a757c0b34846a5dbc83fcfd20ab5132f9 +size 384 diff --git a/margin_logs/step_0000394.npy b/margin_logs/step_0000394.npy new file mode 100644 index 0000000..6bcf144 --- /dev/null +++ b/margin_logs/step_0000394.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb463f78435f37f9e7dc539f5edbdc9364a609e5e33519fc8cbc409540a6ccc4 +size 384 diff --git a/margin_logs/step_0000395.npy b/margin_logs/step_0000395.npy new file mode 100644 index 0000000..860ab87 --- /dev/null +++ b/margin_logs/step_0000395.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05a46708ee3e7060b135141925cc61349a854076a2ce92dea4dab5b10d334e4f +size 384 diff --git a/margin_logs/step_0000396.npy b/margin_logs/step_0000396.npy new file mode 100644 index 0000000..edcdaf1 --- /dev/null +++ b/margin_logs/step_0000396.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5aaa0495bdc3df1a221f1e2fd8425c962107c486da438999a03a5b0e0be52a7b +size 384 diff --git a/margin_logs/step_0000397.npy b/margin_logs/step_0000397.npy new file mode 100644 index 0000000..de255a7 --- /dev/null +++ b/margin_logs/step_0000397.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31cb704af2af76eb0ca88eb7c948a2630d98c34127252ebda5a88c1ee3f32c47 +size 384 diff --git a/margin_logs/step_0000398.npy b/margin_logs/step_0000398.npy new file mode 100644 index 0000000..896bf0c --- /dev/null +++ b/margin_logs/step_0000398.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebb83fc50e53396871bcc089eb4d30613172724dc1b0815df5831f35e6848a2e +size 384 diff --git a/margin_logs/step_0000399.npy b/margin_logs/step_0000399.npy new file mode 100644 index 0000000..ed22079 --- /dev/null +++ b/margin_logs/step_0000399.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:386a54f8ff7311b8abd6fbd907c828af258c5380ff7a1653a66d673c11717e27 +size 384 diff --git a/margin_logs/step_0000400.npy b/margin_logs/step_0000400.npy new file mode 100644 index 0000000..3d1564e --- /dev/null +++ b/margin_logs/step_0000400.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc22506d27a5baf133a8cc21dbbfaccdd67230a07cf4ca0397ef58eb8357f493 +size 384 diff --git a/margin_logs/step_0000401.npy b/margin_logs/step_0000401.npy new file mode 100644 index 0000000..fe0cd19 --- /dev/null +++ b/margin_logs/step_0000401.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25335de77fe2573ae1b3bb719a443f63c495887342df74cbb1aa89b78bbb8865 +size 384 diff --git a/margin_logs/step_0000402.npy b/margin_logs/step_0000402.npy new file mode 100644 index 0000000..db27b64 --- /dev/null +++ b/margin_logs/step_0000402.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:129c5cf667972d3865800ab8fa8f466511170097db7d8dbd39213a40bb91a2af +size 384 diff --git a/margin_logs/step_0000403.npy b/margin_logs/step_0000403.npy new file mode 100644 index 0000000..dc002be --- /dev/null +++ b/margin_logs/step_0000403.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f7f07b033ad017958f0114a7de342be0afe048c1603ac6ec836f67d44377da8 +size 384 diff --git a/margin_logs/step_0000404.npy b/margin_logs/step_0000404.npy new file mode 100644 index 0000000..bbda8b7 --- /dev/null +++ b/margin_logs/step_0000404.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7e1b705b16c02783dd319d246e2b7e1a6908324868dd68cc60e4b133a4ce6d3 +size 384 diff --git a/margin_logs/step_0000405.npy b/margin_logs/step_0000405.npy new file mode 100644 index 0000000..9dc79d3 --- /dev/null +++ b/margin_logs/step_0000405.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da050a5e2a096ee6949dea2b869d6113b5ddb1032c66660e0e36454ec6c14a0d +size 384 diff --git a/margin_logs/step_0000406.npy b/margin_logs/step_0000406.npy new file mode 100644 index 0000000..40dde75 --- /dev/null +++ b/margin_logs/step_0000406.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2417ed49c8a40f063ac9c5ce41a8ef15bec6d25c800cb8326ad98eb90ce61e0e +size 384 diff --git a/margin_logs/step_0000407.npy b/margin_logs/step_0000407.npy new file mode 100644 index 0000000..51fc59d --- /dev/null +++ b/margin_logs/step_0000407.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e3dc10a20c798c442041df385277b88a89910728d7a7947c8a560dca8386dbd +size 384 diff --git a/margin_logs/step_0000408.npy b/margin_logs/step_0000408.npy new file mode 100644 index 0000000..6fbed9d --- /dev/null +++ b/margin_logs/step_0000408.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:821a8570ceeb9c8f12b97d69d9f075cee399b288b5912dc8ea7e576782171956 +size 384 diff --git a/margin_logs/step_0000409.npy b/margin_logs/step_0000409.npy new file mode 100644 index 0000000..c7e4945 --- /dev/null +++ b/margin_logs/step_0000409.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bba302885becbc309b06dc003874069166c13963cd6d083325e1e5b0af046215 +size 384 diff --git a/margin_logs/step_0000410.npy b/margin_logs/step_0000410.npy new file mode 100644 index 0000000..ca0846e --- /dev/null +++ b/margin_logs/step_0000410.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00e1546c3c77e9355cbbaed052d3c922646efb8e3f335f55d0dd27d709550731 +size 384 diff --git a/margin_logs/step_0000411.npy b/margin_logs/step_0000411.npy new file mode 100644 index 0000000..68241d6 --- /dev/null +++ b/margin_logs/step_0000411.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7adad41eb9eeb5887f87259420cc0664dc7af30f36ce12bf167a0d1f3237c78 +size 384 diff --git a/margin_logs/step_0000412.npy b/margin_logs/step_0000412.npy new file mode 100644 index 0000000..293a6f0 --- /dev/null +++ b/margin_logs/step_0000412.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08b13019f8b1d65da2fb06bdf2c0862c6ae569d68e728e0665c80d85f486f1f4 +size 384 diff --git a/margin_logs/step_0000413.npy b/margin_logs/step_0000413.npy new file mode 100644 index 0000000..b1ba157 --- /dev/null +++ b/margin_logs/step_0000413.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91351ee9c31f3e84e89922cab83e189a7e2b83e74d95098638c71bb0b405e2d3 +size 384 diff --git a/margin_logs/step_0000414.npy b/margin_logs/step_0000414.npy new file mode 100644 index 0000000..00e0689 --- /dev/null +++ b/margin_logs/step_0000414.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2c22a49c197e8dfcca8376292b24163cda30f3373b8f322fafdd7944b933b8f +size 384 diff --git a/margin_logs/step_0000415.npy b/margin_logs/step_0000415.npy new file mode 100644 index 0000000..3578f78 --- /dev/null +++ b/margin_logs/step_0000415.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84708d87c07d977261eed64a4a58ff6987f2c9028025db3c91dbeceaa2d40184 +size 384 diff --git a/margin_logs/step_0000416.npy b/margin_logs/step_0000416.npy new file mode 100644 index 0000000..4629354 --- /dev/null +++ b/margin_logs/step_0000416.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aedd9ed8b5710cf1aff5b69da0b1fe40af3051ab27781b7a6c916e01999ce6ad +size 384 diff --git a/margin_logs/step_0000417.npy b/margin_logs/step_0000417.npy new file mode 100644 index 0000000..efe7574 --- /dev/null +++ b/margin_logs/step_0000417.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af8f4c9cb5838d018831df5f88c19a2ba4649e8c5f9a1c87f7952a6d4a354e57 +size 384 diff --git a/margin_logs/step_0000418.npy b/margin_logs/step_0000418.npy new file mode 100644 index 0000000..5c51acd --- /dev/null +++ b/margin_logs/step_0000418.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b86d761050d19ffd63d2ae1d203dfd9f0857fe673f9d3bc3cfa6b9f31f4d6f68 +size 384 diff --git a/margin_logs/step_0000419.npy b/margin_logs/step_0000419.npy new file mode 100644 index 0000000..c810b63 --- /dev/null +++ b/margin_logs/step_0000419.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5570266c907b07ecd704e0a0d4cf62c187693e37a12b5f2bd3740874f7f48f6c +size 384 diff --git a/margin_logs/step_0000420.npy b/margin_logs/step_0000420.npy new file mode 100644 index 0000000..aa3f1ac --- /dev/null +++ b/margin_logs/step_0000420.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f7fe6603edf7bcba7c1173c237daa19dbf3e3c20570bac07938dd37c15aef2e +size 384 diff --git a/margin_logs/step_0000421.npy b/margin_logs/step_0000421.npy new file mode 100644 index 0000000..b756ebe --- /dev/null +++ b/margin_logs/step_0000421.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7dd38544d6b1d1be9b4241ece00647c7bf380743308c4999c009218aeb57356 +size 384 diff --git a/margin_logs/step_0000422.npy b/margin_logs/step_0000422.npy new file mode 100644 index 0000000..b81738d --- /dev/null +++ b/margin_logs/step_0000422.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4d29a23eb74c6a8a88ec0e10f692f269ba347237960782504ca0a24b76af7a7 +size 384 diff --git a/margin_logs/step_0000423.npy b/margin_logs/step_0000423.npy new file mode 100644 index 0000000..ff6cf8c --- /dev/null +++ b/margin_logs/step_0000423.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f1ef18bb02786d513458cf05f8d4fe1a6a783be8fb91d81f8b132215046ef7e +size 384 diff --git a/margin_logs/step_0000424.npy b/margin_logs/step_0000424.npy new file mode 100644 index 0000000..bd5bb8c --- /dev/null +++ b/margin_logs/step_0000424.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc250dfa0fd82a8a101975b887cfe631375028a23708fc5e746be214a19e0252 +size 384 diff --git a/margin_logs/step_0000425.npy b/margin_logs/step_0000425.npy new file mode 100644 index 0000000..39c6dae --- /dev/null +++ b/margin_logs/step_0000425.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:366f9e0f09b17c69c904c7f51ae06ca7e153eea235817f29d93221a72b3f855b +size 384 diff --git a/margin_logs/step_0000426.npy b/margin_logs/step_0000426.npy new file mode 100644 index 0000000..72ac345 --- /dev/null +++ b/margin_logs/step_0000426.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7023b6602c45cd3f53159b13718910bd8d30b90fba3945fdd38d00cfbf13d0f +size 384 diff --git a/margin_logs/step_0000427.npy b/margin_logs/step_0000427.npy new file mode 100644 index 0000000..fdf0f4a --- /dev/null +++ b/margin_logs/step_0000427.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68cd68397315bfbf63156e3d233453c101626cb02ad080328c1de039f894f0a3 +size 384 diff --git a/margin_logs/step_0000428.npy b/margin_logs/step_0000428.npy new file mode 100644 index 0000000..637a04d --- /dev/null +++ b/margin_logs/step_0000428.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87219ea9c8ca79b97227e1ab997636f4c10f0c2628215b026fa88f3dc6d80045 +size 384 diff --git a/margin_logs/step_0000429.npy b/margin_logs/step_0000429.npy new file mode 100644 index 0000000..b8873dd --- /dev/null +++ b/margin_logs/step_0000429.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fc532b5a1f65107275cb92efa974e62adc4427c8020c6e8ba98abfb07a484fe +size 384 diff --git a/margin_logs/step_0000430.npy b/margin_logs/step_0000430.npy new file mode 100644 index 0000000..aedd4f7 --- /dev/null +++ b/margin_logs/step_0000430.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b1a69b72ba5a4d20ac4c61ddd5c17b5fc7b4aaeb9a75ddc90705c2806ab0625 +size 384 diff --git a/margin_logs/step_0000431.npy b/margin_logs/step_0000431.npy new file mode 100644 index 0000000..4d2d749 --- /dev/null +++ b/margin_logs/step_0000431.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab94d9d2714fd997436ed25d8c91a84bc57e00f564023bde6823fc1d51e1d95d +size 384 diff --git a/margin_logs/step_0000432.npy b/margin_logs/step_0000432.npy new file mode 100644 index 0000000..d652477 --- /dev/null +++ b/margin_logs/step_0000432.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a60dac881e8947b458d6392580469f047b102f02d9a807436d8cbcd3b04cb5f9 +size 384 diff --git a/margin_logs/step_0000433.npy b/margin_logs/step_0000433.npy new file mode 100644 index 0000000..9e38d8c --- /dev/null +++ b/margin_logs/step_0000433.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f6bdf468ef433296d560f70d5e08ddb2e67d6e9bb4a7e5b0f861dca7e39c984 +size 384 diff --git a/margin_logs/step_0000434.npy b/margin_logs/step_0000434.npy new file mode 100644 index 0000000..f6e69bf --- /dev/null +++ b/margin_logs/step_0000434.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af041c5b4c42d12a29cf25f624f42fd4f34c4712f105a986176a90296f8b8b55 +size 384 diff --git a/margin_logs/step_0000435.npy b/margin_logs/step_0000435.npy new file mode 100644 index 0000000..463c460 --- /dev/null +++ b/margin_logs/step_0000435.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35eb960110e4dde98a2608b21a022b0d5e79a2d61cdd547e2cf3becbd18e94dd +size 384 diff --git a/margin_logs/step_0000436.npy b/margin_logs/step_0000436.npy new file mode 100644 index 0000000..1a41eed --- /dev/null +++ b/margin_logs/step_0000436.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c855a7c16d7a8f5c543ee5081c5c898c6455ddafbdfd150c802b6d893a1b6893 +size 384 diff --git a/margin_logs/step_0000437.npy b/margin_logs/step_0000437.npy new file mode 100644 index 0000000..04f537a --- /dev/null +++ b/margin_logs/step_0000437.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bdf442c12ae0002bab5b314356a4172ca6ab59a80d1569884ce99006db4cc80 +size 384 diff --git a/margin_logs/step_0000438.npy b/margin_logs/step_0000438.npy new file mode 100644 index 0000000..2e0e0d4 --- /dev/null +++ b/margin_logs/step_0000438.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ddaae52f163860130a259ec37a0ba532fd58a30e912c9dbaeb1164b0ec4eed5 +size 384 diff --git a/margin_logs/step_0000439.npy b/margin_logs/step_0000439.npy new file mode 100644 index 0000000..cea10dc --- /dev/null +++ b/margin_logs/step_0000439.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3caf15ad4037eb4bb114b319908966adcc9d5e2df73f93bc5cca1286043f002 +size 384 diff --git a/margin_logs/step_0000440.npy b/margin_logs/step_0000440.npy new file mode 100644 index 0000000..4343e54 --- /dev/null +++ b/margin_logs/step_0000440.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae78cefcc595afce8bf17175ad4aec1305e85e19c2ad393f9a72d0c996ecc5a3 +size 384 diff --git a/margin_logs/step_0000441.npy b/margin_logs/step_0000441.npy new file mode 100644 index 0000000..fea833d --- /dev/null +++ b/margin_logs/step_0000441.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe1a486237f7c245592b0e1a8f91a4886497629400d0d15eee15ee7f5d4d2d1f +size 384 diff --git a/margin_logs/step_0000442.npy b/margin_logs/step_0000442.npy new file mode 100644 index 0000000..5a7193e --- /dev/null +++ b/margin_logs/step_0000442.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20bd880a4e4bb75f39d83cfd460d2a6cd224663b789d4044c2d930909a3f7cde +size 384 diff --git a/margin_logs/step_0000443.npy b/margin_logs/step_0000443.npy new file mode 100644 index 0000000..57cd9b4 --- /dev/null +++ b/margin_logs/step_0000443.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e1bf35f7bfcb067dcf4cf61b22e48b7312c87ccc8252730b136c36407734960 +size 384 diff --git a/margin_logs/step_0000444.npy b/margin_logs/step_0000444.npy new file mode 100644 index 0000000..9c463ac --- /dev/null +++ b/margin_logs/step_0000444.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ffb0827c212cc1d9047035edf5e75dbb2624b3459c43b8c84152d9d6c8fc306 +size 384 diff --git a/margin_logs/step_0000445.npy b/margin_logs/step_0000445.npy new file mode 100644 index 0000000..a6db066 --- /dev/null +++ b/margin_logs/step_0000445.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c846edf2cae39c4bd01ab74fe6f3fd1a2fc55a7110158021b106350f57ee7e6a +size 384 diff --git a/margin_logs/step_0000446.npy b/margin_logs/step_0000446.npy new file mode 100644 index 0000000..85250a3 --- /dev/null +++ b/margin_logs/step_0000446.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:074edfdf98137c3a133b675ec485b711e9076c2487e07aed3aa77def296212e7 +size 384 diff --git a/margin_logs/step_0000447.npy b/margin_logs/step_0000447.npy new file mode 100644 index 0000000..c8e866b --- /dev/null +++ b/margin_logs/step_0000447.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d6d1603ee83599b3d5b46a4ce02a298515293091d654ddee4b820179f4dbe5a +size 384 diff --git a/margin_logs/step_0000448.npy b/margin_logs/step_0000448.npy new file mode 100644 index 0000000..f525184 --- /dev/null +++ b/margin_logs/step_0000448.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:981eb6024d798ab282ddbdac89c0ddd4f2b99af01d7b141b192e33464d1a9a27 +size 384 diff --git a/margin_logs/step_0000449.npy b/margin_logs/step_0000449.npy new file mode 100644 index 0000000..d43b37d --- /dev/null +++ b/margin_logs/step_0000449.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8951bb908ba77576378a658634f5ae58ca3fef8f45cc5d5a55aa88854352e5c5 +size 384 diff --git a/margin_logs/step_0000450.npy b/margin_logs/step_0000450.npy new file mode 100644 index 0000000..92b8d74 --- /dev/null +++ b/margin_logs/step_0000450.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64f50b432c78aef0fefdaf5b9f050ed9022554fdce1b4f16f7ef5d3206b1b014 +size 384 diff --git a/margin_logs/step_0000451.npy b/margin_logs/step_0000451.npy new file mode 100644 index 0000000..7e440b6 --- /dev/null +++ b/margin_logs/step_0000451.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2607f00007fff36dc1b0e2377dc716a8172eb549c6d94c42c68948bd9e1d146 +size 384 diff --git a/margin_logs/step_0000452.npy b/margin_logs/step_0000452.npy new file mode 100644 index 0000000..289d3fd --- /dev/null +++ b/margin_logs/step_0000452.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dcf6c88a0265e9e7bf8f7e1355078fcf691c8f85828db7e26f2567f77a69483 +size 384 diff --git a/margin_logs/step_0000453.npy b/margin_logs/step_0000453.npy new file mode 100644 index 0000000..ed0ed7d --- /dev/null +++ b/margin_logs/step_0000453.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7171ae966e5df0bb1764149a51cde6440e9717159f28a2204a67565b97782ec +size 384 diff --git a/margin_logs/step_0000454.npy b/margin_logs/step_0000454.npy new file mode 100644 index 0000000..0ece7d4 --- /dev/null +++ b/margin_logs/step_0000454.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:136701d3e799bfb92127ce1d8250ec25a5cd659ce2e068c054318e6cbf540b7a +size 384 diff --git a/margin_logs/step_0000455.npy b/margin_logs/step_0000455.npy new file mode 100644 index 0000000..b48876a --- /dev/null +++ b/margin_logs/step_0000455.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0afd178c886552dd3239b3bc29ba0140e83ea470839b5d3e9fa60f5793687e78 +size 384 diff --git a/margin_logs/step_0000456.npy b/margin_logs/step_0000456.npy new file mode 100644 index 0000000..7d96f8a --- /dev/null +++ b/margin_logs/step_0000456.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5787bd1921b2a3a0adfc958d9a72af43a67636fa468e9091007abb6a3d0e0434 +size 384 diff --git a/margin_logs/step_0000457.npy b/margin_logs/step_0000457.npy new file mode 100644 index 0000000..314a537 --- /dev/null +++ b/margin_logs/step_0000457.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb74c7cc2450c982837e248b5b715c666d050d2b7e594851d7c30ed869cf3444 +size 384 diff --git a/margin_logs/step_0000458.npy b/margin_logs/step_0000458.npy new file mode 100644 index 0000000..555df4e --- /dev/null +++ b/margin_logs/step_0000458.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0a3c006fad5186158430dd88323cf69617ea8e21e8718e73949e3e8e48d5fca +size 384 diff --git a/margin_logs/step_0000459.npy b/margin_logs/step_0000459.npy new file mode 100644 index 0000000..ea54827 --- /dev/null +++ b/margin_logs/step_0000459.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97fcb0b0a9c171a1f89a176775b2303b4a5399e412f24b790ed64ac71f060aa8 +size 384 diff --git a/margin_logs/step_0000460.npy b/margin_logs/step_0000460.npy new file mode 100644 index 0000000..b019a28 --- /dev/null +++ b/margin_logs/step_0000460.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46b2d5223a229180f3ce964a20632e4ffff04ddcfe70ecf9375815e124ac4dfd +size 384 diff --git a/margin_logs/step_0000461.npy b/margin_logs/step_0000461.npy new file mode 100644 index 0000000..f227ef1 --- /dev/null +++ b/margin_logs/step_0000461.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b448c0f54948ff88c149e16ebe61d2a9a1eec11098c9d679a7c174330412710a +size 384 diff --git a/margin_logs/step_0000462.npy b/margin_logs/step_0000462.npy new file mode 100644 index 0000000..9988ece --- /dev/null +++ b/margin_logs/step_0000462.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08995e8c21df0490feb4568dc5464bef66720a3b677cf1227b8c71f147398dfb +size 384 diff --git a/margin_logs/step_0000463.npy b/margin_logs/step_0000463.npy new file mode 100644 index 0000000..cb2378a --- /dev/null +++ b/margin_logs/step_0000463.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2028caef9463ae1c60712e18487c89d5025c14a0524d9fb7afcbc47e57bf8f4d +size 384 diff --git a/margin_logs/step_0000464.npy b/margin_logs/step_0000464.npy new file mode 100644 index 0000000..7e4b338 --- /dev/null +++ b/margin_logs/step_0000464.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b05fc176915897fba1a49d3f4ad5c7374123bcff0a3e2e22adf3daeb8e118d12 +size 384 diff --git a/margin_logs/step_0000465.npy b/margin_logs/step_0000465.npy new file mode 100644 index 0000000..813ce3c --- /dev/null +++ b/margin_logs/step_0000465.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8a106481b6fd72bc3919f65e67767a41dd61b56caa6b5d2300e71818f966d6a +size 384 diff --git a/margin_logs/step_0000466.npy b/margin_logs/step_0000466.npy new file mode 100644 index 0000000..2b29842 --- /dev/null +++ b/margin_logs/step_0000466.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17ecad59726fec5ac722be4aa3b6172cfbaf6d96c9b22006ecb8a448e5f1a5c9 +size 384 diff --git a/margin_logs/step_0000467.npy b/margin_logs/step_0000467.npy new file mode 100644 index 0000000..3c511e0 --- /dev/null +++ b/margin_logs/step_0000467.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6679e886e90f5a8024ab54ec09fa7885778361ecff217cf05570bbfc0f07e6db +size 384 diff --git a/margin_logs/step_0000468.npy b/margin_logs/step_0000468.npy new file mode 100644 index 0000000..a0d6e0a --- /dev/null +++ b/margin_logs/step_0000468.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e898e2254bc67ef8db84cd29416b2db3d3dfb5a4b67dd2ec44ff0d5baf47b125 +size 384 diff --git a/margin_logs/step_0000469.npy b/margin_logs/step_0000469.npy new file mode 100644 index 0000000..d9db724 --- /dev/null +++ b/margin_logs/step_0000469.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83a74f283f4d3f0dccbf976127d1dd39e566906def762fe26639ddad881a4801 +size 384 diff --git a/margin_logs/step_0000470.npy b/margin_logs/step_0000470.npy new file mode 100644 index 0000000..ba928a2 --- /dev/null +++ b/margin_logs/step_0000470.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dfceb8885d9cccc841e79d5294eb4bf7e0cc2ca40a7735155f89400e189e719 +size 384 diff --git a/margin_logs/step_0000471.npy b/margin_logs/step_0000471.npy new file mode 100644 index 0000000..737f2ca --- /dev/null +++ b/margin_logs/step_0000471.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33dfb570c6214b3ac6b659fac3bf81c1116481f90d134e5fbf12c9c09b2077bb +size 384 diff --git a/margin_logs/step_0000472.npy b/margin_logs/step_0000472.npy new file mode 100644 index 0000000..137aab5 --- /dev/null +++ b/margin_logs/step_0000472.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41e994bef7b4f7adae31ecd2bfbc7fc3ad8d24d709cb8930b15e868fb6f93823 +size 384 diff --git a/margin_logs/step_0000473.npy b/margin_logs/step_0000473.npy new file mode 100644 index 0000000..b915837 --- /dev/null +++ b/margin_logs/step_0000473.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b62ac7b6c017711d783ac2f7d65bb078c60c8cc2b5c38e2909a66d4b3d117fee +size 384 diff --git a/margin_logs/step_0000474.npy b/margin_logs/step_0000474.npy new file mode 100644 index 0000000..ec804ff --- /dev/null +++ b/margin_logs/step_0000474.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b436de0f56e49020b56b79deec19c5d0528251c7c2a7e60754ee304267e08a41 +size 384 diff --git a/margin_logs/step_0000475.npy b/margin_logs/step_0000475.npy new file mode 100644 index 0000000..4773f77 --- /dev/null +++ b/margin_logs/step_0000475.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0996b6ba7134ab553a9860ac09811c7ad8361f2269ee61174e854cd8a816b69f +size 384 diff --git a/margin_logs/step_0000476.npy b/margin_logs/step_0000476.npy new file mode 100644 index 0000000..69df92e --- /dev/null +++ b/margin_logs/step_0000476.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c371880d21a0f17e978db2675e94aa4539209ec0f4f019425129e2df43d98db1 +size 384 diff --git a/margin_logs/step_0000477.npy b/margin_logs/step_0000477.npy new file mode 100644 index 0000000..43860a8 --- /dev/null +++ b/margin_logs/step_0000477.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71899ff5dd44b14666340cc3940dc682d14d9b32b8533ed0e209cdda2855aa31 +size 384 diff --git a/margin_logs/step_0000478.npy b/margin_logs/step_0000478.npy new file mode 100644 index 0000000..a42083f --- /dev/null +++ b/margin_logs/step_0000478.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:117163d1426b10ff4820709a62f642aa526bcd00a0b5554bb2ffd6906ca975c5 +size 384 diff --git a/margin_logs/step_0000479.npy b/margin_logs/step_0000479.npy new file mode 100644 index 0000000..23da24a --- /dev/null +++ b/margin_logs/step_0000479.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:988133849a11d06195aae93883bc2a5ec4c2b16b604fd07f0eedb19ffc6ec17c +size 384 diff --git a/margin_logs/step_0000480.npy b/margin_logs/step_0000480.npy new file mode 100644 index 0000000..0a6057a --- /dev/null +++ b/margin_logs/step_0000480.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:592a115fe3ccbd39a434e0032cceabff1e0a3d71a55f402cc913415ac12e0783 +size 384 diff --git a/margin_logs/step_0000481.npy b/margin_logs/step_0000481.npy new file mode 100644 index 0000000..1672349 --- /dev/null +++ b/margin_logs/step_0000481.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:360e44ab4f4581715ae664a49ffc2fb409f4e4228bfccbf747ef4e91fe60c719 +size 384 diff --git a/margin_logs/step_0000482.npy b/margin_logs/step_0000482.npy new file mode 100644 index 0000000..ef1846f --- /dev/null +++ b/margin_logs/step_0000482.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c07e398d5943ed53593cbbda33854e155de34ae8c386bb872e6dd19775312c0 +size 384 diff --git a/margin_logs/step_0000483.npy b/margin_logs/step_0000483.npy new file mode 100644 index 0000000..85f73ac --- /dev/null +++ b/margin_logs/step_0000483.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40048955ede6bbf7f9d28dadfae44c331d52a61ea098c7df3a13327031466ebf +size 384 diff --git a/margin_logs/step_0000484.npy b/margin_logs/step_0000484.npy new file mode 100644 index 0000000..3b1768b --- /dev/null +++ b/margin_logs/step_0000484.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae847595ace670f42f82ff9e8854bfecb84b590121cc1d4beb19fe852b5dcc58 +size 384 diff --git a/margin_logs/step_0000485.npy b/margin_logs/step_0000485.npy new file mode 100644 index 0000000..88592cd --- /dev/null +++ b/margin_logs/step_0000485.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77df19e377828d45140b19a48bd8d23e42fa68cfe2495909e3ed0b83f2041f81 +size 384 diff --git a/margin_logs/step_0000486.npy b/margin_logs/step_0000486.npy new file mode 100644 index 0000000..d830770 --- /dev/null +++ b/margin_logs/step_0000486.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:208b4e1c3c51f4d80f07b549ab228ea17926d247ad1db7d91f50d2d462bc391b +size 384 diff --git a/margin_logs/step_0000487.npy b/margin_logs/step_0000487.npy new file mode 100644 index 0000000..79401fd --- /dev/null +++ b/margin_logs/step_0000487.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:676ea6e7485ff17fb767226db765cd7c8e00b40799c0f67603c85eed7c7d4e2a +size 384 diff --git a/margin_logs/step_0000488.npy b/margin_logs/step_0000488.npy new file mode 100644 index 0000000..18259b9 --- /dev/null +++ b/margin_logs/step_0000488.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa21fb5fa7d32f9b0e387a889bd371f2d0446175fe12c5d4e95f43dc6e58ea5a +size 384 diff --git a/margin_logs/step_0000489.npy b/margin_logs/step_0000489.npy new file mode 100644 index 0000000..9f143b5 --- /dev/null +++ b/margin_logs/step_0000489.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60eb1f5e8e73c5b6cb51a012ded0cec2bd20f2c0455ae131892e8f976d15822d +size 384 diff --git a/margin_logs/step_0000490.npy b/margin_logs/step_0000490.npy new file mode 100644 index 0000000..7a2aad7 --- /dev/null +++ b/margin_logs/step_0000490.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca7ed64797c37fb71456b897f6a071604f673551b9c2045d712f388fefa5cc51 +size 384 diff --git a/margin_logs/step_0000491.npy b/margin_logs/step_0000491.npy new file mode 100644 index 0000000..d03c4b1 --- /dev/null +++ b/margin_logs/step_0000491.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea99699a30f88090f8f1f3b8e0ebf99fcb44d24dc02608e92ce43bfd0d5f606f +size 384 diff --git a/margin_logs/step_0000492.npy b/margin_logs/step_0000492.npy new file mode 100644 index 0000000..dfee2d8 --- /dev/null +++ b/margin_logs/step_0000492.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8a9d854871f6b05c839ae7b2b9cbfb90f828a71ccd65154b24c3491a57de35c +size 384 diff --git a/margin_logs/step_0000493.npy b/margin_logs/step_0000493.npy new file mode 100644 index 0000000..d126cad --- /dev/null +++ b/margin_logs/step_0000493.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:600f67efec8dd61467de193ff8c61910ee040ae59eab596454faf644b643ce93 +size 384 diff --git a/margin_logs/step_0000494.npy b/margin_logs/step_0000494.npy new file mode 100644 index 0000000..b8b2995 --- /dev/null +++ b/margin_logs/step_0000494.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38c3970e137dd7b74c87719b6dd3d651aee496f01b006625e2e624b5486c2151 +size 384 diff --git a/margin_logs/step_0000495.npy b/margin_logs/step_0000495.npy new file mode 100644 index 0000000..96b94c1 --- /dev/null +++ b/margin_logs/step_0000495.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18fe9b6c8a6e203319aef314b8bd9237b956eb1003ee1d248a2f39b3ca46a821 +size 384 diff --git a/margin_logs/step_0000496.npy b/margin_logs/step_0000496.npy new file mode 100644 index 0000000..6e94417 --- /dev/null +++ b/margin_logs/step_0000496.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4528668afbff47173a262c138975b346dab9ce1f677555e5a13574b94b664d08 +size 384 diff --git a/margin_logs/step_0000497.npy b/margin_logs/step_0000497.npy new file mode 100644 index 0000000..8f0ad16 --- /dev/null +++ b/margin_logs/step_0000497.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5545a08a6d5d4ff6967b9da1483f24a8b64bd47dbd41a0a5be7e562d53938eab +size 384 diff --git a/margin_logs/step_0000498.npy b/margin_logs/step_0000498.npy new file mode 100644 index 0000000..4236123 --- /dev/null +++ b/margin_logs/step_0000498.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebffa13dae7e701c7f9e6d61acfc5bc23ada2ef6a207a8100931fdf7a212489a +size 384 diff --git a/margin_logs/step_0000499.npy b/margin_logs/step_0000499.npy new file mode 100644 index 0000000..a5458f8 --- /dev/null +++ b/margin_logs/step_0000499.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85aa39596b776863e0e2d0c40785fd22a71343bab24b9b6418d491c440e62b5c +size 384 diff --git a/margin_logs/step_0000500.npy b/margin_logs/step_0000500.npy new file mode 100644 index 0000000..ea7220c --- /dev/null +++ b/margin_logs/step_0000500.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0f8ee4c6a735287e7f658b2544bea5a2022057d69034c217103217a6d862325 +size 384 diff --git a/margin_logs/step_0000501.npy b/margin_logs/step_0000501.npy new file mode 100644 index 0000000..ca33ab0 --- /dev/null +++ b/margin_logs/step_0000501.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:879dd1189815e6f2372333b6f1740f27c9608412490d7d62af8cc118080079d3 +size 384 diff --git a/margin_logs/step_0000502.npy b/margin_logs/step_0000502.npy new file mode 100644 index 0000000..69a9e66 --- /dev/null +++ b/margin_logs/step_0000502.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46e83438e84a60b5a94d48ce0bcdc45635ec98b273c7c83f40e698617fad5f72 +size 384 diff --git a/margin_logs/step_0000503.npy b/margin_logs/step_0000503.npy new file mode 100644 index 0000000..1b97bce --- /dev/null +++ b/margin_logs/step_0000503.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37de7916e205176b8872120c4940be3d56601db4f52b19ca826635ae2a1da0fa +size 384 diff --git a/margin_logs/step_0000504.npy b/margin_logs/step_0000504.npy new file mode 100644 index 0000000..ecb1db4 --- /dev/null +++ b/margin_logs/step_0000504.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:927b4fc305b0de8ef41815e74bc33bacb17dce61f9b86085aff0a547b25cf560 +size 384 diff --git a/margin_logs/step_0000505.npy b/margin_logs/step_0000505.npy new file mode 100644 index 0000000..443b54e --- /dev/null +++ b/margin_logs/step_0000505.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff9ed05455a3e19a2c48b00fd73b22a3e28ab2649f389f8646224f085e52658e +size 384 diff --git a/margin_logs/step_0000506.npy b/margin_logs/step_0000506.npy new file mode 100644 index 0000000..5654b59 --- /dev/null +++ b/margin_logs/step_0000506.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dac3188cefbcd4807c2fc3ed0330f91377ff59d411d84c2226b4f4b531ff757 +size 384 diff --git a/margin_logs/step_0000507.npy b/margin_logs/step_0000507.npy new file mode 100644 index 0000000..f9c8b90 --- /dev/null +++ b/margin_logs/step_0000507.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0b098834f1a45307843352b8b6788e9eccbe5e0e5af71daf20b6f819515b9d3 +size 384 diff --git a/margin_logs/step_0000508.npy b/margin_logs/step_0000508.npy new file mode 100644 index 0000000..fc02a94 --- /dev/null +++ b/margin_logs/step_0000508.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:849bdfebbc142756b0c04e3a3434661fccc88a932e3a66191eaae232617f2d3f +size 384 diff --git a/margin_logs/step_0000509.npy b/margin_logs/step_0000509.npy new file mode 100644 index 0000000..5aa7bd5 --- /dev/null +++ b/margin_logs/step_0000509.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e44d8be40cc40f1f10ba2317b9b83cd85907d4025ae5e7b57df27a0ff128bfd +size 384 diff --git a/margin_logs/step_0000510.npy b/margin_logs/step_0000510.npy new file mode 100644 index 0000000..957829d --- /dev/null +++ b/margin_logs/step_0000510.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:918914db89ace83904b4ad0863af57791d9730a54fc0fdf7d0a7e07758a26086 +size 384 diff --git a/margin_logs/step_0000511.npy b/margin_logs/step_0000511.npy new file mode 100644 index 0000000..ba831e9 --- /dev/null +++ b/margin_logs/step_0000511.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:140088241480175c2cffc23a0539b005b7b42d673465a554a5b18718c6eac85a +size 384 diff --git a/margin_logs/step_0000512.npy b/margin_logs/step_0000512.npy new file mode 100644 index 0000000..42724fc --- /dev/null +++ b/margin_logs/step_0000512.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:237e83319b83df28f9a081a4d5bdaf0bfc18c2eb2726cfdf00c9b5ac240c75c3 +size 384 diff --git a/margin_logs/step_0000513.npy b/margin_logs/step_0000513.npy new file mode 100644 index 0000000..8c9ee63 --- /dev/null +++ b/margin_logs/step_0000513.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:601fc43870f67877733750c539434c7fcead83f1b277cf6be607a6998ab0a500 +size 384 diff --git a/margin_logs/step_0000514.npy b/margin_logs/step_0000514.npy new file mode 100644 index 0000000..2b3d803 --- /dev/null +++ b/margin_logs/step_0000514.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dafb7f17b079648054047143daa7a2918388f968284ed7da646d00fb233e5611 +size 384 diff --git a/margin_logs/step_0000515.npy b/margin_logs/step_0000515.npy new file mode 100644 index 0000000..d710f0f --- /dev/null +++ b/margin_logs/step_0000515.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49b3134fba56695811666cb58e43414ed9f11719985843dc8db7bed7b10a7001 +size 384 diff --git a/margin_logs/step_0000516.npy b/margin_logs/step_0000516.npy new file mode 100644 index 0000000..19e85de --- /dev/null +++ b/margin_logs/step_0000516.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4d0c0ba130bbfefc69212f0a646b1cd202b336b5f7df52d02d8c88e4b64e007 +size 384 diff --git a/margin_logs/step_0000517.npy b/margin_logs/step_0000517.npy new file mode 100644 index 0000000..ce351d7 --- /dev/null +++ b/margin_logs/step_0000517.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:237f3c22c2a91fb7ba60e82e99186eb8b6354e680206efb51d484592ee7c923c +size 384 diff --git a/margin_logs/step_0000518.npy b/margin_logs/step_0000518.npy new file mode 100644 index 0000000..17b1cfc --- /dev/null +++ b/margin_logs/step_0000518.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ddf187e3816af76f170a17720a96598033a17785d3cd9c5fb6dc4e9567f968f +size 384 diff --git a/margin_logs/step_0000519.npy b/margin_logs/step_0000519.npy new file mode 100644 index 0000000..663d9d2 --- /dev/null +++ b/margin_logs/step_0000519.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:902755e1cb9b313955a40c5dd3cdc55bad540b1ce0d0d1e825f0b445b0196673 +size 384 diff --git a/margin_logs/step_0000520.npy b/margin_logs/step_0000520.npy new file mode 100644 index 0000000..1111352 --- /dev/null +++ b/margin_logs/step_0000520.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e02a616dd6b92319e5621caa52c60739560f637db0a4bf4bea8e6bc6ecc8d2d +size 384 diff --git a/margin_logs/step_0000521.npy b/margin_logs/step_0000521.npy new file mode 100644 index 0000000..54696b4 --- /dev/null +++ b/margin_logs/step_0000521.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29c14010314fd40217fa58dae9ccc0657a4c474b9d882c5533ed72722b91f07e +size 384 diff --git a/margin_logs/step_0000522.npy b/margin_logs/step_0000522.npy new file mode 100644 index 0000000..4fbee65 --- /dev/null +++ b/margin_logs/step_0000522.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31fe344264d16a7686ecd47939a110a4fbb43ac0b80678c72f153dedd4fffca8 +size 384 diff --git a/margin_logs/step_0000523.npy b/margin_logs/step_0000523.npy new file mode 100644 index 0000000..74df1bd --- /dev/null +++ b/margin_logs/step_0000523.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1622de1432131bd884e40848cadc59e4802d7c114165a0814886f7d7fb0ec4c7 +size 384 diff --git a/margin_logs/step_0000524.npy b/margin_logs/step_0000524.npy new file mode 100644 index 0000000..a80908c --- /dev/null +++ b/margin_logs/step_0000524.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:128853ce567dde91c5e58855494d52e7205b8c8dd261fb302277cd402d6f7867 +size 384 diff --git a/margin_logs/step_0000525.npy b/margin_logs/step_0000525.npy new file mode 100644 index 0000000..c2bf747 --- /dev/null +++ b/margin_logs/step_0000525.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:569ed47330929d9ec1a110b6358bbeccaf65e1c14fb53c9bf3a1413df74efe98 +size 384 diff --git a/margin_logs/step_0000526.npy b/margin_logs/step_0000526.npy new file mode 100644 index 0000000..451a7bd --- /dev/null +++ b/margin_logs/step_0000526.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a05e170f2de2d1fd62866cf54dbb957ea6f4509766794d226e5475903b84b5f +size 384 diff --git a/margin_logs/step_0000527.npy b/margin_logs/step_0000527.npy new file mode 100644 index 0000000..63d87fb --- /dev/null +++ b/margin_logs/step_0000527.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70c25ad2e69b09113869c171d1898dc461616df0fbd4472b219bb43e06a725f2 +size 384 diff --git a/margin_logs/step_0000528.npy b/margin_logs/step_0000528.npy new file mode 100644 index 0000000..3e44d8f --- /dev/null +++ b/margin_logs/step_0000528.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1a73fe99d422550c03dd4d04e17b8bef66e25bb5c35feef90843194f43dc3f1 +size 384 diff --git a/margin_logs/step_0000529.npy b/margin_logs/step_0000529.npy new file mode 100644 index 0000000..99fc663 --- /dev/null +++ b/margin_logs/step_0000529.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76d5eb0400ae98f2ef74bd710b66200e5d23ca98c5146a9112fea8f499ff4fe6 +size 384 diff --git a/margin_logs/step_0000530.npy b/margin_logs/step_0000530.npy new file mode 100644 index 0000000..bb32172 --- /dev/null +++ b/margin_logs/step_0000530.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e9d6f63a2e0895df08cbf9fe26463c632f09edfc413e091ccb3c8e48aea8010 +size 384 diff --git a/margin_logs/step_0000531.npy b/margin_logs/step_0000531.npy new file mode 100644 index 0000000..b77ff72 --- /dev/null +++ b/margin_logs/step_0000531.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a7bcab2ee7cb6b2ee7a15938f67600bac344cbd433c06529865b4dc652116c7 +size 384 diff --git a/margin_logs/step_0000532.npy b/margin_logs/step_0000532.npy new file mode 100644 index 0000000..db16ce6 --- /dev/null +++ b/margin_logs/step_0000532.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b85cffbc3e7a922ef934a2f8ea2a6d1762c959272fe437f7746740cb8e6c0be3 +size 384 diff --git a/margin_logs/step_0000533.npy b/margin_logs/step_0000533.npy new file mode 100644 index 0000000..5342d0f --- /dev/null +++ b/margin_logs/step_0000533.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8921a95a2d3fd8f408733f9a3279ad205a32c9a9d50d65a1d338e634c85ee74c +size 384 diff --git a/margin_logs/step_0000534.npy b/margin_logs/step_0000534.npy new file mode 100644 index 0000000..b9eb8ae --- /dev/null +++ b/margin_logs/step_0000534.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fa154a9f8e70414e0cff54ff0bf8f3e22e71f78cb562eb29dc3163abc1230a3 +size 384 diff --git a/margin_logs/step_0000535.npy b/margin_logs/step_0000535.npy new file mode 100644 index 0000000..1d7c3e7 --- /dev/null +++ b/margin_logs/step_0000535.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e28cda83b3ccc430b71a59959f29fedcf665534fdda7af349d8e1dc3ae6535e8 +size 384 diff --git a/margin_logs/step_0000536.npy b/margin_logs/step_0000536.npy new file mode 100644 index 0000000..49c8e3c --- /dev/null +++ b/margin_logs/step_0000536.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e269ce134e4a2e24bce999e319ea72725e44d02f36163e35743f1bbf627bae8 +size 384 diff --git a/margin_logs/step_0000537.npy b/margin_logs/step_0000537.npy new file mode 100644 index 0000000..e101e71 --- /dev/null +++ b/margin_logs/step_0000537.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab88dcab7641b4b41e7413b5512a6e7d5c60ac1ebb270018cc31e7292fd0a3be +size 384 diff --git a/margin_logs/step_0000538.npy b/margin_logs/step_0000538.npy new file mode 100644 index 0000000..2c68a79 --- /dev/null +++ b/margin_logs/step_0000538.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6a1af3798a4cc11bbf4ebba9113e1458ce5663e40737432ded38bd080f90202 +size 384 diff --git a/margin_logs/step_0000539.npy b/margin_logs/step_0000539.npy new file mode 100644 index 0000000..362eb13 --- /dev/null +++ b/margin_logs/step_0000539.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12eb69eb14f801ef19e2ce897201097a4c3bb99a12d008412ed2c1cd5fddf346 +size 384 diff --git a/margin_logs/step_0000540.npy b/margin_logs/step_0000540.npy new file mode 100644 index 0000000..cedf09c --- /dev/null +++ b/margin_logs/step_0000540.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67df7a2361a2a7180d49a65a206c5ba759ffaa0e58680904383a8c868f5d60f8 +size 384 diff --git a/margin_logs/step_0000541.npy b/margin_logs/step_0000541.npy new file mode 100644 index 0000000..293b122 --- /dev/null +++ b/margin_logs/step_0000541.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59a37bd447c9ac228996f0906b5d6f81205982161d00f0ff150f47560a2134e6 +size 384 diff --git a/margin_logs/step_0000542.npy b/margin_logs/step_0000542.npy new file mode 100644 index 0000000..db2caa3 --- /dev/null +++ b/margin_logs/step_0000542.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e24862d44b6b43f2ea70ae92400be60c59adfedac4f57d98bf362d244bb0614 +size 384 diff --git a/margin_logs/step_0000543.npy b/margin_logs/step_0000543.npy new file mode 100644 index 0000000..e3ef965 --- /dev/null +++ b/margin_logs/step_0000543.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:834d2658d5eebf8855bebd6bd7ca2afcb58f1fdf292a47ae5cece6088268f136 +size 384 diff --git a/margin_logs/step_0000544.npy b/margin_logs/step_0000544.npy new file mode 100644 index 0000000..d46241e --- /dev/null +++ b/margin_logs/step_0000544.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fee66ac705aa9c43f458e025b791463b03003f47d8adb0e4c8769da74795f639 +size 384 diff --git a/margin_logs/step_0000545.npy b/margin_logs/step_0000545.npy new file mode 100644 index 0000000..02e7187 --- /dev/null +++ b/margin_logs/step_0000545.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e4ebebe9376056d48d31d13373fb81ed0315ba998e499bdb1517c61bb5cc994 +size 384 diff --git a/margin_logs/step_0000546.npy b/margin_logs/step_0000546.npy new file mode 100644 index 0000000..248c408 --- /dev/null +++ b/margin_logs/step_0000546.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f36abdb06af100cb5628ae21a18bad5f7ad860c307bba611c5912b3d967b96a8 +size 384 diff --git a/margin_logs/step_0000547.npy b/margin_logs/step_0000547.npy new file mode 100644 index 0000000..f432921 --- /dev/null +++ b/margin_logs/step_0000547.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d931d76cdd0a6e83cdd3d9b128f0366513d8e30d39a09539554d472e81fb1010 +size 384 diff --git a/margin_logs/step_0000548.npy b/margin_logs/step_0000548.npy new file mode 100644 index 0000000..88e51ef --- /dev/null +++ b/margin_logs/step_0000548.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38df7b340ebb2f78da4ff6e898e38e3f1fe842a5128184eee177174d1d7732f6 +size 384 diff --git a/margin_logs/step_0000549.npy b/margin_logs/step_0000549.npy new file mode 100644 index 0000000..5d32ba8 --- /dev/null +++ b/margin_logs/step_0000549.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab877d8f7afac0078c9dbee742e9dc16c13461870cb56f52fc7ba3afa1665181 +size 384 diff --git a/margin_logs/step_0000550.npy b/margin_logs/step_0000550.npy new file mode 100644 index 0000000..72af3df --- /dev/null +++ b/margin_logs/step_0000550.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00c1809447ebf0eb31b4c498fd8a9f7cff6c1e8f63f11688e2296324fe28f17d +size 384 diff --git a/margin_logs/step_0000551.npy b/margin_logs/step_0000551.npy new file mode 100644 index 0000000..d36d9ce --- /dev/null +++ b/margin_logs/step_0000551.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d222f36a894f780122cd2477e2178742b161b45303938e56ec5a7e33e8a644bf +size 384 diff --git a/margin_logs/step_0000552.npy b/margin_logs/step_0000552.npy new file mode 100644 index 0000000..360bcab --- /dev/null +++ b/margin_logs/step_0000552.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0aa6a4cbd032f9c2de69afff26d362eb4d9ccb6cfc1d07bdfddcc1b6c4d2384 +size 384 diff --git a/margin_logs/step_0000553.npy b/margin_logs/step_0000553.npy new file mode 100644 index 0000000..e2eff24 --- /dev/null +++ b/margin_logs/step_0000553.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:777d645bba198e9d881c422d40c316ec9730636c86a35e4d78463d9569b1d762 +size 384 diff --git a/margin_logs/step_0000554.npy b/margin_logs/step_0000554.npy new file mode 100644 index 0000000..cde4390 --- /dev/null +++ b/margin_logs/step_0000554.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e74984d03e26afb19385679645a9ec62ac755e1a5cb1befd17bf33ee8628ce8e +size 384 diff --git a/margin_logs/step_0000555.npy b/margin_logs/step_0000555.npy new file mode 100644 index 0000000..9f7c4d8 --- /dev/null +++ b/margin_logs/step_0000555.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c3e79379c3bdc6ee3f3b43e66cdbc47369b5247bc9477ba2d18662bbb6531fb +size 384 diff --git a/margin_logs/step_0000556.npy b/margin_logs/step_0000556.npy new file mode 100644 index 0000000..75ab45c --- /dev/null +++ b/margin_logs/step_0000556.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:188dd2b2d8cfcef5f5af6601b6c6f20fb09f45e9bf38ff1467d3ed167065e6e6 +size 384 diff --git a/margin_logs/step_0000557.npy b/margin_logs/step_0000557.npy new file mode 100644 index 0000000..ef9a7eb --- /dev/null +++ b/margin_logs/step_0000557.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:896dc341c45c9f1478c586ca8bc73fa30649eed3f1da0b6cf53648e5da9cc699 +size 384 diff --git a/margin_logs/step_0000558.npy b/margin_logs/step_0000558.npy new file mode 100644 index 0000000..68d443a --- /dev/null +++ b/margin_logs/step_0000558.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff1040649c03645b3f6404c8431b217d662b75809eef203c53302d088ff8160d +size 384 diff --git a/margin_logs/step_0000559.npy b/margin_logs/step_0000559.npy new file mode 100644 index 0000000..a4a6e95 --- /dev/null +++ b/margin_logs/step_0000559.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc734dbe58b84efd0d7d859ca038bd07c0d567607642c7f7e5ddf200bd949373 +size 384 diff --git a/margin_logs/step_0000560.npy b/margin_logs/step_0000560.npy new file mode 100644 index 0000000..26b49d2 --- /dev/null +++ b/margin_logs/step_0000560.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5233161ffe4feea9e652a555acdb6f0dd9939a2806e2d3964046b8094c0e8d9 +size 384 diff --git a/margin_logs/step_0000561.npy b/margin_logs/step_0000561.npy new file mode 100644 index 0000000..69ae102 --- /dev/null +++ b/margin_logs/step_0000561.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c51ffd56a5e1da79e5335059d62bd7cafc408c8151309718c37181e43c429edb +size 384 diff --git a/margin_logs/step_0000562.npy b/margin_logs/step_0000562.npy new file mode 100644 index 0000000..a228d17 --- /dev/null +++ b/margin_logs/step_0000562.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79e922c7d05d0707701d3a90e5c1de20d86861861ba16b2ca6524549bcef3967 +size 384 diff --git a/margin_logs/step_0000563.npy b/margin_logs/step_0000563.npy new file mode 100644 index 0000000..aefbaee --- /dev/null +++ b/margin_logs/step_0000563.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7795113812593832aa72962f3c938f7f4d2dbb22da5b72372199da3f66b963e +size 384 diff --git a/margin_logs/step_0000564.npy b/margin_logs/step_0000564.npy new file mode 100644 index 0000000..caa2f02 --- /dev/null +++ b/margin_logs/step_0000564.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f1cc609006477f4e470e6a67a01b948f165554c0a4bb2f22f083be30e6cf73f +size 384 diff --git a/margin_logs/step_0000565.npy b/margin_logs/step_0000565.npy new file mode 100644 index 0000000..86fe642 --- /dev/null +++ b/margin_logs/step_0000565.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb66541cff18d45bab5aee2d0edc4740e44aa7bc65fb335439ff88628ef2c56b +size 384 diff --git a/margin_logs/step_0000566.npy b/margin_logs/step_0000566.npy new file mode 100644 index 0000000..40bcbec --- /dev/null +++ b/margin_logs/step_0000566.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d16de1513aa87f75dccb128e6c67c9e080d40e49a621712a527f4b5b3fdf950c +size 384 diff --git a/margin_logs/step_0000567.npy b/margin_logs/step_0000567.npy new file mode 100644 index 0000000..c78bfa9 --- /dev/null +++ b/margin_logs/step_0000567.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfbfcbd455fd6991217d8103881b4f11615c3b357a9302abb374630eea5cd938 +size 384 diff --git a/margin_logs/step_0000568.npy b/margin_logs/step_0000568.npy new file mode 100644 index 0000000..47d8715 --- /dev/null +++ b/margin_logs/step_0000568.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8793d3b41566d554c138972d2e15e1c5d233672f48ae6573fc4508edaf6db22 +size 384 diff --git a/margin_logs/step_0000569.npy b/margin_logs/step_0000569.npy new file mode 100644 index 0000000..e4049cd --- /dev/null +++ b/margin_logs/step_0000569.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd362f6b3000fb015b92b164555199f7af8014c320bbffe6712d9f3941d8243f +size 384 diff --git a/margin_logs/step_0000570.npy b/margin_logs/step_0000570.npy new file mode 100644 index 0000000..b54bd18 --- /dev/null +++ b/margin_logs/step_0000570.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecc1e3954033195195747a11cdf179978f9a0ad6392e822cc32cc41183d0adc9 +size 384 diff --git a/margin_logs/step_0000571.npy b/margin_logs/step_0000571.npy new file mode 100644 index 0000000..422c620 --- /dev/null +++ b/margin_logs/step_0000571.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db896eac74d4e12b8c51391e15fd60c3bb0bf30d83010c39d28413d7be120c74 +size 384 diff --git a/margin_logs/step_0000572.npy b/margin_logs/step_0000572.npy new file mode 100644 index 0000000..0f5c253 --- /dev/null +++ b/margin_logs/step_0000572.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e81939f16ff04992abce19c0f54cf75cae4e101981c746fbf372f3e29fd03dd +size 384 diff --git a/margin_logs/step_0000573.npy b/margin_logs/step_0000573.npy new file mode 100644 index 0000000..b54b04b --- /dev/null +++ b/margin_logs/step_0000573.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a37301829aa3f89d09a3a5324d40aa477fea2b6c937b74fc744e0cd602ad65de +size 384 diff --git a/margin_logs/step_0000574.npy b/margin_logs/step_0000574.npy new file mode 100644 index 0000000..25e7e3a --- /dev/null +++ b/margin_logs/step_0000574.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:812cbc946bc2abf2548252c95b8d4c3729d7a7c61cbcc477b13cbc7bf3e21095 +size 384 diff --git a/margin_logs/step_0000575.npy b/margin_logs/step_0000575.npy new file mode 100644 index 0000000..cf99dd9 --- /dev/null +++ b/margin_logs/step_0000575.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fab3cfe097c6ed97f1298cf7723cf3676911bac1f64add753f20a17aff17f89 +size 384 diff --git a/margin_logs/step_0000576.npy b/margin_logs/step_0000576.npy new file mode 100644 index 0000000..292de0d --- /dev/null +++ b/margin_logs/step_0000576.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea6a765ab82d1ee594b05e885cdb7bcf19a85737788497efac93c9b0a0919b63 +size 384 diff --git a/margin_logs/step_0000577.npy b/margin_logs/step_0000577.npy new file mode 100644 index 0000000..e324fb5 --- /dev/null +++ b/margin_logs/step_0000577.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53a3e7d83dccd0756fb0fecf3509c2d442517b788b3c4919a3fac8c58719ff74 +size 384 diff --git a/margin_logs/step_0000578.npy b/margin_logs/step_0000578.npy new file mode 100644 index 0000000..ab7abe5 --- /dev/null +++ b/margin_logs/step_0000578.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d920d0bf7070a1d8b688c1fe78bfa4f583da8db62d6929888ae3bd89f2e9a4de +size 384 diff --git a/margin_logs/step_0000579.npy b/margin_logs/step_0000579.npy new file mode 100644 index 0000000..ccb5e92 --- /dev/null +++ b/margin_logs/step_0000579.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d0bea3f768945944aea8fa484bb0f9235858881f24e723be0d16a4bae7635bd +size 384 diff --git a/margin_logs/step_0000580.npy b/margin_logs/step_0000580.npy new file mode 100644 index 0000000..0d8af2f --- /dev/null +++ b/margin_logs/step_0000580.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c15edd35fd8da5fc1060bd267f038559ce99236c485ac5d97c870385ca512a2 +size 384 diff --git a/margin_logs/step_0000581.npy b/margin_logs/step_0000581.npy new file mode 100644 index 0000000..c158885 --- /dev/null +++ b/margin_logs/step_0000581.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15710644a844abf10afd4cac2e19498fb5effd2b518a7b3bf14837465c968de6 +size 384 diff --git a/margin_logs/step_0000582.npy b/margin_logs/step_0000582.npy new file mode 100644 index 0000000..3dc3251 --- /dev/null +++ b/margin_logs/step_0000582.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01589b5646c98c7b9269e5157c719999ca14ad322380546d7d0c3f7f5d44e18b +size 384 diff --git a/margin_logs/step_0000583.npy b/margin_logs/step_0000583.npy new file mode 100644 index 0000000..6d97774 --- /dev/null +++ b/margin_logs/step_0000583.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e554a43961d93f12de271fcf93a577462b72f9203d3d0616e14b16b544bbae2a +size 384 diff --git a/margin_logs/step_0000584.npy b/margin_logs/step_0000584.npy new file mode 100644 index 0000000..5a97d68 --- /dev/null +++ b/margin_logs/step_0000584.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c0464616740f9ba378a5b2c85cb5782917d9b3c93350e08bca5ec1e213591e0 +size 384 diff --git a/margin_logs/step_0000585.npy b/margin_logs/step_0000585.npy new file mode 100644 index 0000000..23e6872 --- /dev/null +++ b/margin_logs/step_0000585.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e59d1f64db328643e9044376bfb33919279e6491f6e6fe7ff2eceda7eb20e9a2 +size 384 diff --git a/margin_logs/step_0000586.npy b/margin_logs/step_0000586.npy new file mode 100644 index 0000000..6290106 --- /dev/null +++ b/margin_logs/step_0000586.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f25be7ff044dc73b1312668b20fb494435278ff8e6f8081b43647d66c3275e1 +size 384 diff --git a/margin_logs/step_0000587.npy b/margin_logs/step_0000587.npy new file mode 100644 index 0000000..6f73fca --- /dev/null +++ b/margin_logs/step_0000587.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5bc03594a3e75c0363285379df2c5fd86b7b50dee24b103d85cb0fe2faf879 +size 384 diff --git a/margin_logs/step_0000588.npy b/margin_logs/step_0000588.npy new file mode 100644 index 0000000..63b4b38 --- /dev/null +++ b/margin_logs/step_0000588.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c70ae715832511044eea0d6a0ce2b0f96baa6efeabab44495bbb44d609b2b9f6 +size 384 diff --git a/margin_logs/step_0000589.npy b/margin_logs/step_0000589.npy new file mode 100644 index 0000000..a44e06b --- /dev/null +++ b/margin_logs/step_0000589.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80a6978474b7971a05739a1febc0dedd1b3fa92378f6a9200b55b200977f4b52 +size 384 diff --git a/margin_logs/step_0000590.npy b/margin_logs/step_0000590.npy new file mode 100644 index 0000000..afbdd9e --- /dev/null +++ b/margin_logs/step_0000590.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f53abce329b49d71a5460ffdd57b7cff2b0863722b10032094d2d1c7ecedcc58 +size 384 diff --git a/margin_logs/step_0000591.npy b/margin_logs/step_0000591.npy new file mode 100644 index 0000000..db30155 --- /dev/null +++ b/margin_logs/step_0000591.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cafdbdd5e39273da1d361575a7d76f7eb4017920fdf811577212df36888df2b +size 384 diff --git a/margin_logs/step_0000592.npy b/margin_logs/step_0000592.npy new file mode 100644 index 0000000..3e6afc4 --- /dev/null +++ b/margin_logs/step_0000592.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cb450e635983a53a14a1da12cac12dd3958a6542cd05aa06e512c6b929bd123 +size 384 diff --git a/margin_logs/step_0000593.npy b/margin_logs/step_0000593.npy new file mode 100644 index 0000000..5fc8ea5 --- /dev/null +++ b/margin_logs/step_0000593.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:619de6d69fe66f61d7fbded339de6bbfeb63ae60c5a5caaade2389f57bfb77f0 +size 384 diff --git a/margin_logs/step_0000594.npy b/margin_logs/step_0000594.npy new file mode 100644 index 0000000..9155262 --- /dev/null +++ b/margin_logs/step_0000594.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1441e8eb885a7787df151a81e1e569379e42a14b64a36358004d9aea935d2b23 +size 384 diff --git a/margin_logs/step_0000595.npy b/margin_logs/step_0000595.npy new file mode 100644 index 0000000..b8a4b38 --- /dev/null +++ b/margin_logs/step_0000595.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1668a14fa98d872b907d169b90ffd9300cc77baef5cca0247545a04d15415790 +size 384 diff --git a/margin_logs/step_0000596.npy b/margin_logs/step_0000596.npy new file mode 100644 index 0000000..ee7f05f --- /dev/null +++ b/margin_logs/step_0000596.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c003cbe48df9e641b1e0f08379bb2cdc04990da61f32c6ab9a8b8c6d495ca601 +size 384 diff --git a/margin_logs/step_0000597.npy b/margin_logs/step_0000597.npy new file mode 100644 index 0000000..4cdfc08 --- /dev/null +++ b/margin_logs/step_0000597.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd0906a0e4c5c75a3ad3168a475310f15841cdd47da385fecaa3784ee3b67a6e +size 384 diff --git a/margin_logs/step_0000598.npy b/margin_logs/step_0000598.npy new file mode 100644 index 0000000..1d3f9fb --- /dev/null +++ b/margin_logs/step_0000598.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edb858bc8f98c78c256055a1b6439a7aeb48cbfe82c5ff004e699616515b9e21 +size 384 diff --git a/margin_logs/step_0000599.npy b/margin_logs/step_0000599.npy new file mode 100644 index 0000000..5f7bea7 --- /dev/null +++ b/margin_logs/step_0000599.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f9919142f9782921e43fd90a880a25f50b85d34a0140a99a327735ed0e3365e +size 384 diff --git a/margin_logs/step_0000600.npy b/margin_logs/step_0000600.npy new file mode 100644 index 0000000..571c60d --- /dev/null +++ b/margin_logs/step_0000600.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ee4e539a65c060cc53d1c076cc4eda88b9d2594aa4cc747c4c0d581b900d6af +size 384 diff --git a/margin_logs/step_0000601.npy b/margin_logs/step_0000601.npy new file mode 100644 index 0000000..5b74212 --- /dev/null +++ b/margin_logs/step_0000601.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07bb97c1c3046ec371a7693963e352f0672bfcf5f7b1890420f4faee20f88f62 +size 384 diff --git a/margin_logs/step_0000602.npy b/margin_logs/step_0000602.npy new file mode 100644 index 0000000..0c31eae --- /dev/null +++ b/margin_logs/step_0000602.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7290ab0a1a733744ff411e8f5886ff4f90448eeda88971015a74b9bccfd84ea +size 384 diff --git a/margin_logs/step_0000603.npy b/margin_logs/step_0000603.npy new file mode 100644 index 0000000..120452f --- /dev/null +++ b/margin_logs/step_0000603.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b64e0084877e910eeb15b2ecbee3fbf9015d730deb5efe1c25f90214fc4ffd4 +size 384 diff --git a/margin_logs/step_0000604.npy b/margin_logs/step_0000604.npy new file mode 100644 index 0000000..3509cf2 --- /dev/null +++ b/margin_logs/step_0000604.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9c8e9a9c48223ee1d0925fb73d31cdd61d43235f3f3fbaf3bb95ea8c8afadc3 +size 384 diff --git a/margin_logs/step_0000605.npy b/margin_logs/step_0000605.npy new file mode 100644 index 0000000..27162f7 --- /dev/null +++ b/margin_logs/step_0000605.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4b16ee72bcfb238e997c36159b641f8854360c87a923d2a86da027a4c70bdfd +size 384 diff --git a/margin_logs/step_0000606.npy b/margin_logs/step_0000606.npy new file mode 100644 index 0000000..61d2f8d --- /dev/null +++ b/margin_logs/step_0000606.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e99ab7ecb68b510f645bfb9f55f774cc96b247165531919d52169f173c78b37 +size 384 diff --git a/margin_logs/step_0000607.npy b/margin_logs/step_0000607.npy new file mode 100644 index 0000000..d5bfa76 --- /dev/null +++ b/margin_logs/step_0000607.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:277743adec40aa35292a81a5a84724832c824499620c5e987ff17fda3c9d1dd5 +size 384 diff --git a/margin_logs/step_0000608.npy b/margin_logs/step_0000608.npy new file mode 100644 index 0000000..76f1782 --- /dev/null +++ b/margin_logs/step_0000608.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3df3cf7ba36ea1c7e77e623019ddfc04e232d02f5912502dc38a37f67fc4d21 +size 384 diff --git a/margin_logs/step_0000609.npy b/margin_logs/step_0000609.npy new file mode 100644 index 0000000..c3750ca --- /dev/null +++ b/margin_logs/step_0000609.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e3f1045c1a1649c1d330faaf4f076238c23ad773ee9a4ba7988b694816a24c9 +size 384 diff --git a/margin_logs/step_0000610.npy b/margin_logs/step_0000610.npy new file mode 100644 index 0000000..dac2bcc --- /dev/null +++ b/margin_logs/step_0000610.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b55a2a2403aea197264de21c4bc2c3d81d91581ae105ec177c0ce0e76bcb2ae +size 384 diff --git a/margin_logs/step_0000611.npy b/margin_logs/step_0000611.npy new file mode 100644 index 0000000..7081b4a --- /dev/null +++ b/margin_logs/step_0000611.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08197dd9eaf6861f4805a05da2e1b0f19638f748e486c8e3d03066892a525869 +size 384 diff --git a/margin_logs/step_0000612.npy b/margin_logs/step_0000612.npy new file mode 100644 index 0000000..93c3f89 --- /dev/null +++ b/margin_logs/step_0000612.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:297d58b81b9d1aa170ddd0ff9b5183ed3422247f9d08425a62892af0ac8ea10e +size 384 diff --git a/margin_logs/step_0000613.npy b/margin_logs/step_0000613.npy new file mode 100644 index 0000000..1a68f2d --- /dev/null +++ b/margin_logs/step_0000613.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bcc94b7403318279e27157ec2e363c82e2cb7dd1cfa9f0d75bb246a24f2ffed +size 384 diff --git a/margin_logs/step_0000614.npy b/margin_logs/step_0000614.npy new file mode 100644 index 0000000..7dc05d6 --- /dev/null +++ b/margin_logs/step_0000614.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb959ba0d6a3a4e818943f7f46162706a0d258ed8d260e7cd887c19fb8aa172a +size 384 diff --git a/margin_logs/step_0000615.npy b/margin_logs/step_0000615.npy new file mode 100644 index 0000000..fa20799 --- /dev/null +++ b/margin_logs/step_0000615.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1576408fed9f46c342e3c9a558d22747b7169237e4045d0bca06e795526d7e1 +size 384 diff --git a/margin_logs/step_0000616.npy b/margin_logs/step_0000616.npy new file mode 100644 index 0000000..dadc707 --- /dev/null +++ b/margin_logs/step_0000616.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c47f6c4570e5d2b80d3a12d8ed477995ec816e73fa387d81da013612fd88df6b +size 384 diff --git a/margin_logs/step_0000617.npy b/margin_logs/step_0000617.npy new file mode 100644 index 0000000..72f0ca0 --- /dev/null +++ b/margin_logs/step_0000617.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c7bfddd04eee4c48db3d5c6db4b95b8ea42c1a26880308002251d047c5e4822 +size 384 diff --git a/margin_logs/step_0000618.npy b/margin_logs/step_0000618.npy new file mode 100644 index 0000000..6b79ad9 --- /dev/null +++ b/margin_logs/step_0000618.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7883c0293596a1daa6c63457a0e89fb6eebf08738535cc3685d6de89fdedee21 +size 384 diff --git a/margin_logs/step_0000619.npy b/margin_logs/step_0000619.npy new file mode 100644 index 0000000..8fc4af9 --- /dev/null +++ b/margin_logs/step_0000619.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:259d0d0e5a08653235fc9652830ef169ad9aee6cb7f74091ec853e8af43daee6 +size 384 diff --git a/margin_logs/step_0000620.npy b/margin_logs/step_0000620.npy new file mode 100644 index 0000000..c7ef983 --- /dev/null +++ b/margin_logs/step_0000620.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fb953063946ef4549daffc38cc2f07c1372fb4dd21be6cf94239ad10b3623c7 +size 384 diff --git a/margin_logs/step_0000621.npy b/margin_logs/step_0000621.npy new file mode 100644 index 0000000..8e47d95 --- /dev/null +++ b/margin_logs/step_0000621.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8811ab435d2261464cff3fd5a5c93de062f1f2add6a3fbfe8f8cf969545126f +size 384 diff --git a/margin_logs/step_0000622.npy b/margin_logs/step_0000622.npy new file mode 100644 index 0000000..a1f0f22 --- /dev/null +++ b/margin_logs/step_0000622.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c4e3681a6f894e86f3d27310d84ca077a4c376d571187d2505f8c9f3990f062 +size 384 diff --git a/margin_logs/step_0000623.npy b/margin_logs/step_0000623.npy new file mode 100644 index 0000000..fcc732e --- /dev/null +++ b/margin_logs/step_0000623.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e11c2f820a8f0875fae6a655308fc8d103ce0758a46dd1dd6926b38638ade6d +size 384 diff --git a/margin_logs/step_0000624.npy b/margin_logs/step_0000624.npy new file mode 100644 index 0000000..2b1fe7e --- /dev/null +++ b/margin_logs/step_0000624.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d78022c69e6914f2dce9a9f7d293633f81792e02894883818c4269fa1b61dbad +size 384 diff --git a/margin_logs/step_0000625.npy b/margin_logs/step_0000625.npy new file mode 100644 index 0000000..7f7b22b --- /dev/null +++ b/margin_logs/step_0000625.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f21db12beb56e68e2a0d5b76cd3e7f8862ed26b8b9a2bf9e01cb9a629177c26 +size 384 diff --git a/margin_logs/step_0000626.npy b/margin_logs/step_0000626.npy new file mode 100644 index 0000000..ccab678 --- /dev/null +++ b/margin_logs/step_0000626.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0873d7bb9a102fe938508617087136cad0fd30c626f57809be309636783be18 +size 384 diff --git a/margin_logs/step_0000627.npy b/margin_logs/step_0000627.npy new file mode 100644 index 0000000..0c690cc --- /dev/null +++ b/margin_logs/step_0000627.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f69af35b956ddbbc0d1ebafe5e9da2ac0c7fcdbafb38a3b6d6c26cd8381ad065 +size 384 diff --git a/margin_logs/step_0000628.npy b/margin_logs/step_0000628.npy new file mode 100644 index 0000000..b22ba96 --- /dev/null +++ b/margin_logs/step_0000628.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30594af6e8a7977b408704a485d83700262271b31848dce50580f76447ef7a82 +size 384 diff --git a/margin_logs/step_0000629.npy b/margin_logs/step_0000629.npy new file mode 100644 index 0000000..cfec176 --- /dev/null +++ b/margin_logs/step_0000629.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f467cfcd9c2b1483b261318e5870f621e6c9ce40dbb0200b9e4b89f869ccdb3 +size 384 diff --git a/margin_logs/step_0000630.npy b/margin_logs/step_0000630.npy new file mode 100644 index 0000000..9ee1526 --- /dev/null +++ b/margin_logs/step_0000630.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3df1677c0c9d26f0fe6f7aa54a7cdf32d4d17b0d8aa44a9ab30797d3aa91726e +size 384 diff --git a/margin_logs/step_0000631.npy b/margin_logs/step_0000631.npy new file mode 100644 index 0000000..e4d4865 --- /dev/null +++ b/margin_logs/step_0000631.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c0f26a0f9f520c90ff6cb1ca05cae574aee57b40e9ab0edd6badb85a28347c8 +size 384 diff --git a/margin_logs/step_0000632.npy b/margin_logs/step_0000632.npy new file mode 100644 index 0000000..b032d76 --- /dev/null +++ b/margin_logs/step_0000632.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eb13bc430e4acbbbcaa385da5414558dfeb214970d0bb3ff59b5afd6d476767 +size 384 diff --git a/margin_logs/step_0000633.npy b/margin_logs/step_0000633.npy new file mode 100644 index 0000000..3b50517 --- /dev/null +++ b/margin_logs/step_0000633.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37e9ee0e30ffac86f6fb974222a6c117d0c404075e67e5ef972e7ba63158fdf1 +size 384 diff --git a/margin_logs/step_0000634.npy b/margin_logs/step_0000634.npy new file mode 100644 index 0000000..ee25acf --- /dev/null +++ b/margin_logs/step_0000634.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79afaefe889787d18c250f8b02130810ff52e1c244dbb94946de5991c2c94569 +size 384 diff --git a/margin_logs/step_0000635.npy b/margin_logs/step_0000635.npy new file mode 100644 index 0000000..d9c80bf --- /dev/null +++ b/margin_logs/step_0000635.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88947a8db63fe2ab46dee7afee526f1847a4a7f55695476de0e5a58140277312 +size 384 diff --git a/margin_logs/step_0000636.npy b/margin_logs/step_0000636.npy new file mode 100644 index 0000000..b86e729 --- /dev/null +++ b/margin_logs/step_0000636.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9aae33f160512fca0cfa7b41e61bdd1d51b5fdd661829cce4e352e66c371f21 +size 384 diff --git a/margin_logs/step_0000637.npy b/margin_logs/step_0000637.npy new file mode 100644 index 0000000..5103a9a --- /dev/null +++ b/margin_logs/step_0000637.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5f5056a5fbde9b998d6721e7ba0595abcc9094a969dbb11531e38236c3ddd17 +size 384 diff --git a/margin_logs/step_0000638.npy b/margin_logs/step_0000638.npy new file mode 100644 index 0000000..7c2f59d --- /dev/null +++ b/margin_logs/step_0000638.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee093d2bfc3cfbf6fb91e2ee0663586cc23a43fb6171728daa8e2949ff532047 +size 384 diff --git a/margin_logs/step_0000639.npy b/margin_logs/step_0000639.npy new file mode 100644 index 0000000..7552eae --- /dev/null +++ b/margin_logs/step_0000639.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39351f50b32d9b341a9fc6c546961334e413c2c8f6ff1c7932a268fc00ec627e +size 384 diff --git a/margin_logs/step_0000640.npy b/margin_logs/step_0000640.npy new file mode 100644 index 0000000..5fb6cf6 --- /dev/null +++ b/margin_logs/step_0000640.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feae666cf888e1fde351a6d1fb5ede0b43131273d126e598f088db55a423b876 +size 384 diff --git a/margin_logs/step_0000641.npy b/margin_logs/step_0000641.npy new file mode 100644 index 0000000..1f85e95 --- /dev/null +++ b/margin_logs/step_0000641.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11c8931b083ae163a2630f15fd00451eccefe99f4c3069a12f8c1668543b132b +size 384 diff --git a/margin_logs/step_0000642.npy b/margin_logs/step_0000642.npy new file mode 100644 index 0000000..f8259be --- /dev/null +++ b/margin_logs/step_0000642.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fae97a7a63894b7195ddee4cef7630b1bb6be8f2f39aaf74400092273e8563b +size 384 diff --git a/margin_logs/step_0000643.npy b/margin_logs/step_0000643.npy new file mode 100644 index 0000000..2b0ad83 --- /dev/null +++ b/margin_logs/step_0000643.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46ba39102c3464fb41d4f14c4efdde5371031e58fbf0515901007e1126f2a521 +size 384 diff --git a/margin_logs/step_0000644.npy b/margin_logs/step_0000644.npy new file mode 100644 index 0000000..b8ffe8c --- /dev/null +++ b/margin_logs/step_0000644.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0b3d2772e440ce3c853a6cc093ff7d4b1a9953095703e1c1ed476381c692faf +size 384 diff --git a/margin_logs/step_0000645.npy b/margin_logs/step_0000645.npy new file mode 100644 index 0000000..22c7dad --- /dev/null +++ b/margin_logs/step_0000645.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28586dc1041bb54d1615ccdcc64bc45c25eea34b3764a3239b79d29326e8b148 +size 384 diff --git a/margin_logs/step_0000646.npy b/margin_logs/step_0000646.npy new file mode 100644 index 0000000..eed19ca --- /dev/null +++ b/margin_logs/step_0000646.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3faf0ab1820e18b55d7b22c770f015e9b7bc4429846df8a09325d96b5f7f4666 +size 384 diff --git a/margin_logs/step_0000647.npy b/margin_logs/step_0000647.npy new file mode 100644 index 0000000..294bc27 --- /dev/null +++ b/margin_logs/step_0000647.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6e8fe3530e50ec8972e86e52dd877c95522f3e88463056d714f565a747f3931 +size 384 diff --git a/margin_logs/step_0000648.npy b/margin_logs/step_0000648.npy new file mode 100644 index 0000000..54a29ff --- /dev/null +++ b/margin_logs/step_0000648.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fc393edf6bbd494f7a483b07776300b3f8b15916be826ebb9909732d7a0fa23 +size 384 diff --git a/margin_logs/step_0000649.npy b/margin_logs/step_0000649.npy new file mode 100644 index 0000000..e487871 --- /dev/null +++ b/margin_logs/step_0000649.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2f142b2485850c9455963a1e18eab6239e7cbcba204c30f8cba1eb413f734ea +size 384 diff --git a/margin_logs/step_0000650.npy b/margin_logs/step_0000650.npy new file mode 100644 index 0000000..1ea0405 --- /dev/null +++ b/margin_logs/step_0000650.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9204c70b094d84e89e7aaa4e307302efdaf462b106271c92117f406574806a8b +size 384 diff --git a/margin_logs/step_0000651.npy b/margin_logs/step_0000651.npy new file mode 100644 index 0000000..11b3ed3 --- /dev/null +++ b/margin_logs/step_0000651.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f568df37fcf5e7d6fc971bbd6547a708cef4a9b494fc32534cd686dd17f7c09c +size 384 diff --git a/margin_logs/step_0000652.npy b/margin_logs/step_0000652.npy new file mode 100644 index 0000000..f2b5a0f --- /dev/null +++ b/margin_logs/step_0000652.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:456ffa809f3a00bc800cb0e9d48424d4e2907b32260928fd419910df8acdcb5b +size 384 diff --git a/margin_logs/step_0000653.npy b/margin_logs/step_0000653.npy new file mode 100644 index 0000000..541317d --- /dev/null +++ b/margin_logs/step_0000653.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf4fcf10b71a2a41de9d210f31769cd3b4f236ece8adb081a835572b978e2881 +size 384 diff --git a/margin_logs/step_0000654.npy b/margin_logs/step_0000654.npy new file mode 100644 index 0000000..c968a51 --- /dev/null +++ b/margin_logs/step_0000654.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0fa10870f7a1188f24ecdaa0279c516561ebe0797fb3dabed94ee7f95071fad +size 384 diff --git a/margin_logs/step_0000655.npy b/margin_logs/step_0000655.npy new file mode 100644 index 0000000..fd6505d --- /dev/null +++ b/margin_logs/step_0000655.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d627cc0f291f5db93ad009cb063a0f1d503e65531ae430f3f4b2380679133fd6 +size 384 diff --git a/margin_logs/step_0000656.npy b/margin_logs/step_0000656.npy new file mode 100644 index 0000000..073b0ba --- /dev/null +++ b/margin_logs/step_0000656.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b400cc3e06120a71f21a472825455035f4113bc81615083d6b3741fc9f2e110 +size 384 diff --git a/margin_logs/step_0000657.npy b/margin_logs/step_0000657.npy new file mode 100644 index 0000000..551b333 --- /dev/null +++ b/margin_logs/step_0000657.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:853b3469c4e039a636e30f4842abc2de931473b63d32563c6f31ee7b3ec48952 +size 384 diff --git a/margin_logs/step_0000658.npy b/margin_logs/step_0000658.npy new file mode 100644 index 0000000..28ee26e --- /dev/null +++ b/margin_logs/step_0000658.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4910518d53f2f970d5ffeaf40668b20dd915123587d037dcf222ddb8a00e423f +size 384 diff --git a/margin_logs/step_0000659.npy b/margin_logs/step_0000659.npy new file mode 100644 index 0000000..7e1b32b --- /dev/null +++ b/margin_logs/step_0000659.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:049f43ce5e79b3689e95545bd276e914e013102d11da7914c12852e684ab991b +size 384 diff --git a/margin_logs/step_0000660.npy b/margin_logs/step_0000660.npy new file mode 100644 index 0000000..372440d --- /dev/null +++ b/margin_logs/step_0000660.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3d7f35b426f7ea83bd02eb41fb14666527f883480734d9a9951c158574206c4 +size 384 diff --git a/margin_logs/step_0000661.npy b/margin_logs/step_0000661.npy new file mode 100644 index 0000000..4c9480f --- /dev/null +++ b/margin_logs/step_0000661.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:118306d38298075a3002c7ab66730e75b9ba657cc14a8cf7ed754d36f7f63fb7 +size 384 diff --git a/margin_logs/step_0000662.npy b/margin_logs/step_0000662.npy new file mode 100644 index 0000000..19dda53 --- /dev/null +++ b/margin_logs/step_0000662.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d485b7cb0bf2e67233a0135c9896c38bd46b4008297984eeafa7d521e5e911e +size 384 diff --git a/margin_logs/step_0000663.npy b/margin_logs/step_0000663.npy new file mode 100644 index 0000000..392da54 --- /dev/null +++ b/margin_logs/step_0000663.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07dc9a5d3f2a0457943529c54ad8d69e85cd8718fe9b835d5458dce6484add5c +size 384 diff --git a/margin_logs/step_0000664.npy b/margin_logs/step_0000664.npy new file mode 100644 index 0000000..a4f4d61 --- /dev/null +++ b/margin_logs/step_0000664.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9807e19e5f3da05cba1a85d22e2db9bdc31d45c0e9b78d3e337dbfea0d02d719 +size 384 diff --git a/margin_logs/step_0000665.npy b/margin_logs/step_0000665.npy new file mode 100644 index 0000000..f8da659 --- /dev/null +++ b/margin_logs/step_0000665.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05640e8e0b8aed49cfb7c76d50dd2e7e244c57bb1c5828be31f95e899931d8d8 +size 384 diff --git a/margin_logs/step_0000666.npy b/margin_logs/step_0000666.npy new file mode 100644 index 0000000..3a7976f --- /dev/null +++ b/margin_logs/step_0000666.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d922504cda4871d38c160e10f3f7b057d891c12cd28b799c3c5d93e8a55f4b10 +size 384 diff --git a/margin_logs/step_0000667.npy b/margin_logs/step_0000667.npy new file mode 100644 index 0000000..226b4db --- /dev/null +++ b/margin_logs/step_0000667.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7e3114d48d34b2f6c98ca954daf913a9491c396bbdc4a184197615d0e3950c8 +size 384 diff --git a/margin_logs/step_0000668.npy b/margin_logs/step_0000668.npy new file mode 100644 index 0000000..cb041e3 --- /dev/null +++ b/margin_logs/step_0000668.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38fb92e4ac966c2b8f1f04cc6a0d0c150f55bf739c00371dcf240075e30fa0db +size 384 diff --git a/margin_logs/step_0000669.npy b/margin_logs/step_0000669.npy new file mode 100644 index 0000000..f7b8b56 --- /dev/null +++ b/margin_logs/step_0000669.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c02448a2c3c8af5dbdd41281a28d46d55db934d34276da96092d4d8eca51cc4e +size 384 diff --git a/margin_logs/step_0000670.npy b/margin_logs/step_0000670.npy new file mode 100644 index 0000000..1781ea8 --- /dev/null +++ b/margin_logs/step_0000670.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ad56a9dc04e341e9de8a90c7cc9d91c2ffe012b768941cbb9d46752eb57e21b +size 384 diff --git a/margin_logs/step_0000671.npy b/margin_logs/step_0000671.npy new file mode 100644 index 0000000..24e51bb --- /dev/null +++ b/margin_logs/step_0000671.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca537b8662bc9e8f9ccfa66094f0f24b16014ffcf813b9f3ee4f525a9592e95c +size 384 diff --git a/margin_logs/step_0000672.npy b/margin_logs/step_0000672.npy new file mode 100644 index 0000000..f5a8c1c --- /dev/null +++ b/margin_logs/step_0000672.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f4184928f6cf6ac7dfeacbb6d2f5642da3f4c2225a2e5020d6e2a48fd71e1a8 +size 384 diff --git a/margin_logs/step_0000673.npy b/margin_logs/step_0000673.npy new file mode 100644 index 0000000..7bdf9fc --- /dev/null +++ b/margin_logs/step_0000673.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e56b43f40f7685e39f7642784fdae1e3df66e788b23e4c0aaddfb38ba475f66 +size 384 diff --git a/margin_logs/step_0000674.npy b/margin_logs/step_0000674.npy new file mode 100644 index 0000000..978208c --- /dev/null +++ b/margin_logs/step_0000674.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ea785763fb3daa79d6d27975e91bed924cc23df9e293341218e28e9c57ed918 +size 384 diff --git a/margin_logs/step_0000675.npy b/margin_logs/step_0000675.npy new file mode 100644 index 0000000..0113ed3 --- /dev/null +++ b/margin_logs/step_0000675.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df695b58f83a7596c2a3fc012908fa616c6580d7755b0b69afc2757b6b2d0910 +size 384 diff --git a/margin_logs/step_0000676.npy b/margin_logs/step_0000676.npy new file mode 100644 index 0000000..41ee14e --- /dev/null +++ b/margin_logs/step_0000676.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4220ed50a53be068dd985dfc100b6594b5b6f84380f975c2b05dd7cff0da5d3 +size 384 diff --git a/margin_logs/step_0000677.npy b/margin_logs/step_0000677.npy new file mode 100644 index 0000000..46aef9c --- /dev/null +++ b/margin_logs/step_0000677.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db47ed7b38ac1b15ce275a23b3ec183994bae4ada577f548fbcaa1f90b3b5d58 +size 384 diff --git a/margin_logs/step_0000678.npy b/margin_logs/step_0000678.npy new file mode 100644 index 0000000..ef96970 --- /dev/null +++ b/margin_logs/step_0000678.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37a3075eba5c1527ded084fbe23afc09c6dbe8f173ec854fa14f04ab22738028 +size 384 diff --git a/margin_logs/step_0000679.npy b/margin_logs/step_0000679.npy new file mode 100644 index 0000000..864d20a --- /dev/null +++ b/margin_logs/step_0000679.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e044be83dc737f294059700345a977a43532c4ae2a50d54e5bd21b3fa232210a +size 384 diff --git a/margin_logs/step_0000680.npy b/margin_logs/step_0000680.npy new file mode 100644 index 0000000..afa6dc5 --- /dev/null +++ b/margin_logs/step_0000680.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30cf8b66269d6c4b577f7d0ce2baa902b10355f41f2d8437f0746efa2b3ef60a +size 384 diff --git a/margin_logs/step_0000681.npy b/margin_logs/step_0000681.npy new file mode 100644 index 0000000..dcc3771 --- /dev/null +++ b/margin_logs/step_0000681.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc73d30c364d30ff3fc63a1437094a79e46b43b4b8f64aee9d39ff5b1a45fd81 +size 384 diff --git a/model-00001-of-00007.safetensors b/model-00001-of-00007.safetensors new file mode 100644 index 0000000..57c4399 --- /dev/null +++ b/model-00001-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e5c47f62e06ec2c50d20f31b9b58240f690dbdfe5fe374d1791b9fe868f9f6f +size 4886466168 diff --git a/model-00002-of-00007.safetensors b/model-00002-of-00007.safetensors new file mode 100644 index 0000000..74a83a8 --- /dev/null +++ b/model-00002-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e382fba040b1445cc3f42d68b5e1d2ef048d7af1bf181a0e1b40e9db5bd027a +size 4832007448 diff --git a/model-00003-of-00007.safetensors b/model-00003-of-00007.safetensors new file mode 100644 index 0000000..22ec9bb --- /dev/null +++ b/model-00003-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c2da8136beb0bc4d3afc11c4094ae5637f91e12ac668ff156dcb3948eda601c +size 4999813112 diff --git a/model-00004-of-00007.safetensors b/model-00004-of-00007.safetensors new file mode 100644 index 0000000..dbfc9ab --- /dev/null +++ b/model-00004-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fda91bcb0d18dc1ad37edf00b7069a474a88e6874c032628adedf2bd6ea1611 +size 4999813128 diff --git a/model-00005-of-00007.safetensors b/model-00005-of-00007.safetensors new file mode 100644 index 0000000..1704dc8 --- /dev/null +++ b/model-00005-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0308c9963f5c82039d7bc6207136aa79134b0154c8f061ee02c4289b7eb52f0b +size 4832007496 diff --git a/model-00006-of-00007.safetensors b/model-00006-of-00007.safetensors new file mode 100644 index 0000000..3d5aca4 --- /dev/null +++ b/model-00006-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c86265e392c45bf7d00a2facbe23e1fe7be554d8f31860a602f8f1461fc43a5 +size 4999813120 diff --git a/model-00007-of-00007.safetensors b/model-00007-of-00007.safetensors new file mode 100644 index 0000000..09fd59d --- /dev/null +++ b/model-00007-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18c7bb8a7da4b8ef30865baaa51b5c77b0f88b52dc55dd9e15ff80e56b14a405 +size 2571158184 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..0985084 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,298 @@ +{ + "metadata": { + "total_size": 32121044992 + }, + "weight_map": { + "lm_head.weight": "model-00007-of-00007.safetensors", + "model.embed_tokens.weight": "model-00001-of-00007.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.10.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.15.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.20.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.21.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.26.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.3.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.30.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.input_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.4.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.9.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.norm.weight": "model-00007-of-00007.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..e5b39b6 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..86a3394 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..8c6916a --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 2048, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/train.log b/train.log new file mode 100644 index 0000000..b1173b1 --- /dev/null +++ b/train.log @@ -0,0 +1,1160 @@ +2026-04-29 17:16:46 - INFO - __main__ - Model parameters ModelArguments(base_model_revision=None, model_name_or_path='/workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200', model_revision='main', model_code_revision=None, torch_dtype='bfloat16', tokenizer_name_or_path=None, trust_remote_code=False, attn_implementation='flash_attention_2', use_peft=False, lora_r=16, lora_alpha=32, lora_dropout=0.05, lora_target_modules=None, lora_modules_to_save=None, load_in_8bit=False, load_in_4bit=False, bnb_4bit_quant_type='nf4', use_bnb_nested_quant=False, bnb_4bit_quant_storage='uint8') +2026-04-29 17:16:46 - INFO - __main__ - Data parameters DataArguments(chat_template=None, dataset_mixer={'Anthropic/hh-rlhf': 1.0}, text_column='text', dataset_splits=['train'], dataset_configs=['helpful-base'], dataset_dir=None, preprocessing_num_workers=12, use_persistent_hf_cache=True, hf_cache_dir='/workspace/dynamic-dpo-v4/hf/datasets', truncation_side=None, auto_insert_empty_system_msg=True, disable_thinking=False, preprocessing_log_samples=0, preprocessing_log_dir=None) +2026-04-29 17:16:46 - INFO - __main__ - Training/evaluation parameters NewDPOConfig( +_n_gpu=1, +accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False}, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +average_tokens_across_devices=False, +batch_eval_metrics=False, +beta=0.8, +bf16=True, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=True, +dataloader_num_workers=0, +dataloader_persistent_workers=False, +dataloader_pin_memory=True, +dataloader_prefetch_factor=None, +dataset_num_proc=12, +ddp_backend=None, +ddp_broadcast_buffers=None, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +ddp_timeout=1800, +debug=[], +deepspeed=None, +disable_dropout=True, +disable_tqdm=False, +do_eval=False, +do_predict=False, +do_train=False, +eta=0.1, +eval_accumulation_steps=None, +eval_delay=0, +eval_do_concat_batches=True, +eval_on_start=False, +eval_steps=200, +eval_strategy=IntervalStrategy.NO, +eval_use_gather_object=False, +f_alpha_divergence_coef=1.0, +f_divergence_type=reverse_kl, +force_use_ref_model=False, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +generate_during_eval=False, +gradient_accumulation_steps=2, +gradient_checkpointing=True, +gradient_checkpointing_kwargs={'use_reentrant': False}, +greater_is_better=None, +group_by_length=False, +half_precision_backend=auto, +hub_always_push=False, +hub_margin_dataset_id=None, +hub_model_id=W-61/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449, +hub_model_revision=main, +hub_private_repo=None, +hub_strategy=HubStrategy.EVERY_SAVE, +hub_token=, +ignore_data_skip=False, +include_for_metrics=[], +include_inputs_for_metrics=False, +include_num_input_tokens_seen=False, +include_tokens_per_second=False, +is_encoder_decoder=None, +jit_mode_eval=False, +label_names=None, +label_pad_token_id=-100, +label_smoothing=0.0, +label_smoothing_factor=0.0, +learning_rate=5e-07, +length_column_name=length, +load_best_model_at_end=False, +local_rank=0, +log_level=info, +log_level_replica=warning, +log_on_each_node=True, +logging_dir=/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/runs/Apr29_17-16-46_bc4ce3cd7c4e, +logging_first_step=True, +logging_nan_inf_filter=True, +logging_steps=1, +logging_strategy=IntervalStrategy.STEPS, +loss_type=sigmoid, +lr_scheduler_kwargs={}, +lr_scheduler_type=SchedulerType.COSINE, +margin_dataset_private=None, +margin_dataset_split=train, +margin_log_path=/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs, +margin_log_steps=1, +margin_save_full=True, +max_grad_norm=1.0, +max_length=512, +max_prompt_length=256, +max_steps=-1, +max_target_length=None, +metric_for_best_model=None, +model_adapter_name=None, +model_init_kwargs=None, +mp_parameters=, +neftune_noise_alpha=None, +no_cuda=False, +non_finite_logits_handling=error, +num_train_epochs=1, +optim=OptimizerNames.ADAMW_TORCH, +optim_args=None, +optim_target_modules=None, +output_dir=/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449, +overwrite_output_dir=False, +padding_value=None, +past_index=-1, +per_device_eval_batch_size=8, +per_device_train_batch_size=8, +post_tokenization_log_dir=None, +post_tokenization_log_samples=0, +precompute_ref_batch_size=None, +precompute_ref_eval_batch_size=None, +precompute_ref_log_probs=False, +prediction_loss_only=False, +push_margin_dataset=False, +push_to_hub=False, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +q_target=0.45, +ray_scope=last, +ref_adapter_name=None, +ref_model_init_kwargs=None, +ref_model_mixup_alpha=0.9, +ref_model_sync_steps=64, +reference_free=False, +remove_unused_columns=False, +report_to=['wandb'], +require_explicit_ref_model=True, +restore_callback_states_from_checkpoint=False, +resume_from_checkpoint=None, +reuse_tokenized_dataset=True, +rpo_alpha=None, +run_name=llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449, +s_star=0.4, +save_hf_model_artifacts=True, +save_on_each_node=False, +save_only_model=False, +save_safetensors=True, +save_steps=50, +save_strategy=SaveStrategy.NO, +save_total_limit=2, +seed=42, +sft_weight=0.0, +skip_memory_metrics=True, +sync_ref_model=False, +tf32=None, +tokenization_batch_size=128, +tokenization_mode=online, +tokenized_dataset_cache_dir=/workspace/dynamic-dpo-v4/tokenized_preferences, +torch_compile=False, +torch_compile_backend=None, +torch_compile_mode=None, +torch_empty_cache_steps=None, +torchdynamo=None, +tp_size=0, +tpu_metrics_debug=False, +tpu_num_cores=None, +trainer_type=new_dpo, +truncation_mode=keep_end, +use_cpu=False, +use_ipex=False, +use_legacy_prediction_loop=False, +use_liger_kernel=False, +use_mps_device=False, +wandb_project=llama3-hh-new-dpo-multi-beta-sweep, +warmup_ratio=0.1, +warmup_steps=0, +weight_decay=0.0, +) +2026-04-29 17:16:46 - INFO - __main__ - Using W&B project from training args: llama3-hh-new-dpo-multi-beta-sweep +wandb: Currently logged in as: can-not-fand (can-not-fand-northeastern-university). Use `wandb login --relogin` to force relogin +wandb: wandb version 0.26.1 is available! To upgrade, please run: +wandb: $ pip install wandb --upgrade +wandb: Tracking run with wandb version 0.17.5 +wandb: Run data is saved locally in /workspace/dynamic-dpo-v4/wandb/wandb/run-20260429_171649-sobwh2jg +wandb: Run `wandb offline` to turn off syncing. +wandb: Syncing run llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449 +wandb: ⭐️ View project at https://wandb.ai/can-not-fand-northeastern-university/llama3-hh-new-dpo-multi-beta-sweep +wandb: 🚀 View run at https://wandb.ai/can-not-fand-northeastern-university/llama3-hh-new-dpo-multi-beta-sweep/runs/sobwh2jg +2026-04-29 17:16:50 - INFO - __main__ - New-DPO parameters: beta=0.8, q_target=0.45, s_star=0.4, eta=0.1 +2026-04-29 17:16:50 - INFO - __main__ - Using persistent HF datasets cache at /workspace/dynamic-dpo-v4/hf/datasets + Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/7 [00:00> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. + Normalizing raw HH preferences (train): 27%|██▋ | 11651/43598 [00:01<00:03, 10525.13 examples/s] Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10544.33 examples/s] + Normalizing raw HH preferences (train): 68%|██████▊ | 29726/43598 [00:02<00:01, 11591.69 examples/s] Normalizing raw HH preferences (train): 29%|██▉ | 12822/43598 [00:01<00:02, 10859.35 examples/s] Normalizing raw HH preferences (train): 71%|███████ | 30906/43598 [00:02<00:01, 11643.47 examples/s] Normalizing raw HH preferences (train): 32%|███▏ | 13991/43598 [00:01<00:02, 11095.53 examples/s] Normalizing raw HH preferences (train): 75%|███████▍ | 32659/43598 [00:02<00:00, 11574.95 examples/s] Normalizing raw HH preferences (train): 36%|███▌ | 15694/43598 [00:01<00:02, 11191.04 examples/s] Normalizing raw HH preferences (train): 78%|███████▊ | 33826/43598 [00:03<00:00, 11596.44 examples/s] Normalizing raw HH preferences (train): 39%|███▊ | 16864/43598 [00:01<00:02, 11325.34 examples/s] Normalizing raw HH preferences (train): 80%|████████ | 35000/43598 [00:03<00:00, 11341.80 examples/s]/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you. + warnings.warn( +[WARNING|logging.py:328] 2026-04-29 17:16:57,204 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Normalizing raw HH preferences (train): 43%|████▎ | 18640/43598 [00:01<00:02, 11293.87 examples/s] Loading checkpoint shards: 0%| | 0/7 [00:00> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. + Normalizing raw HH preferences (train): 45%|████▌ | 19807/43598 [00:01<00:02, 11387.15 examples/s] Normalizing raw HH preferences (train): 86%|████████▌ | 37329/43598 [00:03<00:00, 11488.87 examples/s] Normalizing raw HH preferences (train): 48%|████▊ | 20969/43598 [00:01<00:01, 11446.12 examples/s] Normalizing raw HH preferences (train): 88%|████████▊ | 38502/43598 [00:03<00:00, 11556.15 examples/s] Normalizing raw HH preferences (train): 91%|█████████ | 39675/43598 [00:03<00:00, 11602.81 examples/s] Normalizing raw HH preferences (train): 52%|█████▏ | 22650/43598 [00:02<00:01, 11354.93 examples/s] Normalizing raw HH preferences (train): 94%|█████████▎| 40844/43598 [00:03<00:00, 11623.71 examples/s] Normalizing raw HH preferences (train): 55%|█████▍ | 23809/43598 [00:02<00:01, 11413.83 examples/s] Normalizing raw HH preferences (train): 58%|█████▊ | 25480/43598 [00:02<00:01, 11313.98 examples/s] Normalizing raw HH preferences (train): 98%|█████████▊| 42653/43598 [00:03<00:00, 11545.08 examples/s] Normalizing raw HH preferences (train): 61%|██████ | 26639/43598 [00:02<00:01, 11362.48 examples/s] Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:03<00:00, 10957.40 examples/s] + Normalizing raw HH preferences (train): 64%|██████▎ | 27789/43598 [00:02<00:01, 11396.46 examples/s] Normalizing raw HH preferences (train): 66%|██████▋ | 28968/43598 [00:02<00:01, 11501.48 examples/s] Normalizing raw HH preferences (train): 70%|███████ | 30677/43598 [00:02<00:01, 11454.75 examples/s] Normalizing raw HH preferences (train): 73%|███████▎ | 31848/43598 [00:02<00:01, 11519.44 examples/s]/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you. + warnings.warn( + Normalizing raw HH preferences (train): 77%|███████▋ | 33523/43598 [00:03<00:00, 11390.13 examples/s][WARNING|logging.py:328] 2026-04-29 17:16:58,546 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/7 [00:00> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. + Normalizing raw HH preferences (train): 83%|████████▎ | 36335/43598 [00:03<00:00, 11290.36 examples/s] Normalizing raw HH preferences (train): 86%|████████▌ | 37506/43598 [00:03<00:00, 11392.75 examples/s] Normalizing raw HH preferences (train): 89%|████████▊ | 38663/43598 [00:03<00:00, 11436.46 examples/s] Normalizing raw HH preferences (train): 92%|█████████▏| 40311/43598 [00:03<00:00, 11271.72 examples/s] Normalizing raw HH preferences (train): 95%|█████████▌| 41495/43598 [00:03<00:00, 11414.85 examples/s] Normalizing raw HH preferences (train): 98%|█████████▊| 42652/43598 [00:03<00:00, 11450.81 examples/s] Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:03<00:00, 10995.16 examples/s] +2026-04-29 17:16:59 - INFO - __main__ - Training on the following splits: ['train : 43598'] +[INFO|tokenization_utils_base.py:2058] 2026-04-29 17:16:59,463 >> loading file tokenizer.json +[INFO|tokenization_utils_base.py:2058] 2026-04-29 17:16:59,463 >> loading file tokenizer.model +[INFO|tokenization_utils_base.py:2058] 2026-04-29 17:16:59,463 >> loading file added_tokens.json +[INFO|tokenization_utils_base.py:2058] 2026-04-29 17:16:59,463 >> loading file special_tokens_map.json +[INFO|tokenization_utils_base.py:2058] 2026-04-29 17:16:59,463 >> loading file tokenizer_config.json +[INFO|tokenization_utils_base.py:2058] 2026-04-29 17:16:59,463 >> loading file chat_template.jinja +[INFO|tokenization_utils_base.py:2323] 2026-04-29 17:16:59,851 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. +2026-04-29 17:16:59 - INFO - __main__ - Processed train sample 41905: + +Prompt: +<|begin_of_text|><|start_header_id|>user<|end_header_id|> + +What is the best way to clean my refrigerator?<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +Well, I have a few ideas, but the best way to know what works best is to run some tests and then analyze the data to figure out which technique works best.<|eot_id|><|start_header_id|>user<|end_header_id|> + +What are your ideas?<|eot_id|> + +Chosen: +<|start_header_id|>assistant<|end_header_id|> + +Some of my ideas include wiping down the interior with a rag, using bleach and water to wipe down the door, emptying the contents of the fridge to scrub the shelves with a clean cloth, cleaning the interior of the fridge with vinegar and water, and using a vacuum cleaner to clean the interior.<|eot_id|> + +Rejected: +<|start_header_id|>assistant<|end_header_id|> + +I was thinking that you could try different types of soap, which could be a variation of “soap,” like antibacterial soap. I was also thinking that you could try using different tools, like a toothbrush, to clean the inside. Or I was thinking that you could try different ways to get the gunk out, like using a squeegee, or using a paper towel. I was also thinking you could try other types of cleaning, like vacuuming, but I think that could have the opposite of the desired effect.<|eot_id|> +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you. + warnings.warn( +[INFO|configuration_utils.py:691] 2026-04-29 17:16:59,966 >> loading configuration file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/config.json +[INFO|configuration_utils.py:765] 2026-04-29 17:16:59,967 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.0", + "use_cache": false, + "vocab_size": 128256 +} + +[INFO|modeling_utils.py:1121] 2026-04-29 17:16:59,980 >> loading weights file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/model.safetensors.index.json +[INFO|modeling_utils.py:2167] 2026-04-29 17:16:59,981 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16. +[WARNING|logging.py:328] 2026-04-29 17:16:59,984 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[INFO|configuration_utils.py:1142] 2026-04-29 17:16:59,986 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "eos_token_id": 128001, + "use_cache": false +} + + Loading checkpoint shards: 0%| | 0/7 [00:00> All model checkpoint weights were used when initializing LlamaForCausalLM. + +[INFO|modeling_utils.py:4934] 2026-04-29 17:17:11,636 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200. +If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. +[INFO|configuration_utils.py:1095] 2026-04-29 17:17:11,639 >> loading configuration file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/generation_config.json +[INFO|configuration_utils.py:1142] 2026-04-29 17:17:11,639 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9 +} + +[INFO|configuration_utils.py:691] 2026-04-29 17:17:11,641 >> loading configuration file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/config.json +[INFO|configuration_utils.py:765] 2026-04-29 17:17:11,641 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.0", + "use_cache": false, + "vocab_size": 128256 +} + +[INFO|modeling_utils.py:1121] 2026-04-29 17:17:11,643 >> loading weights file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/model.safetensors.index.json +[INFO|modeling_utils.py:2167] 2026-04-29 17:17:11,644 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16. +[INFO|configuration_utils.py:1142] 2026-04-29 17:17:11,648 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "eos_token_id": 128001, + "use_cache": false +} + + Loading checkpoint shards: 0%| | 0/7 [00:00> All model checkpoint weights were used when initializing LlamaForCausalLM. + +[INFO|modeling_utils.py:4934] 2026-04-29 17:17:22,950 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200. +If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. +[INFO|configuration_utils.py:1095] 2026-04-29 17:17:22,953 >> loading configuration file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/generation_config.json +[INFO|configuration_utils.py:1142] 2026-04-29 17:17:22,953 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9 +} + +[WARNING|trainer.py:821] 2026-04-29 17:17:22,955 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. +[WARNING|trainer.py:816] 2026-04-29 17:17:22,955 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-29 17:17:22,967 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:522: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `NewDPOTrainer.__init__`. Use `processing_class` instead. + super().__init__( +[WARNING|trainer.py:816] 2026-04-29 17:17:24,409 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-29 17:17:24,409 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-29 17:17:24,409 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-29 17:17:24,423 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:522: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `NewDPOTrainer.__init__`. Use `processing_class` instead. + super().__init__( +[WARNING|trainer.py:816] 2026-04-29 17:17:24,432 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:522: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `NewDPOTrainer.__init__`. Use `processing_class` instead. + super().__init__( +[WARNING|trainer.py:816] 2026-04-29 17:17:24,433 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:522: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `NewDPOTrainer.__init__`. Use `processing_class` instead. + super().__init__( +[INFO|trainer.py:748] 2026-04-29 17:17:24,698 >> Using auto half precision backend +/workspace/dynamic-dpo-v4/.venv/lib/python3.11/site-packages/accelerate/accelerator.py:1557: UserWarning: Upcasted low precision parameters in LlamaForCausalLM because mixed precision turned on in FSDP. Affects: model.embed_tokens.weight, model.norm.weight, lm_head.weight. + warnings.warn( +/workspace/dynamic-dpo-v4/.venv/lib/python3.11/site-packages/accelerate/accelerator.py:1557: UserWarning: Upcasted low precision parameters in LlamaDecoderLayer because mixed precision turned on in FSDP. Affects: self_attn.q_proj.weight, self_attn.k_proj.weight, self_attn.v_proj.weight, self_attn.o_proj.weight, mlp.gate_proj.weight, mlp.up_proj.weight, mlp.down_proj.weight, input_layernorm.weight, post_attention_layernorm.weight. + warnings.warn( +/workspace/dynamic-dpo-v4/.venv/lib/python3.11/site-packages/accelerate/accelerator.py:1563: UserWarning: FSDP upcast of low precision parameters may affect the precision of model checkpoints. + warnings.warn( +[INFO|trainer.py:2414] 2026-04-29 17:17:32,350 >> ***** Running training ***** +[INFO|trainer.py:2415] 2026-04-29 17:17:32,350 >> Num examples = 43,598 +[INFO|trainer.py:2416] 2026-04-29 17:17:32,350 >> Num Epochs = 1 +[INFO|trainer.py:2417] 2026-04-29 17:17:32,350 >> Instantaneous batch size per device = 8 +[INFO|trainer.py:2420] 2026-04-29 17:17:32,350 >> Total train batch size (w. parallel, distributed & accumulation) = 64 +[INFO|trainer.py:2421] 2026-04-29 17:17:32,350 >> Gradient Accumulation steps = 2 +[INFO|trainer.py:2422] 2026-04-29 17:17:32,350 >> Total optimization steps = 681 +[INFO|trainer.py:2423] 2026-04-29 17:17:32,351 >> Number of trainable parameters = 2,007,565,312 +[INFO|integration_utils.py:831] 2026-04-29 17:17:32,352 >> Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 0%| | 0/681 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:1713] 2026-04-29 17:17:33,891 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:1713] 2026-04-29 17:17:33,901 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:1713] 2026-04-29 17:17:33,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 1/681 [00:02<30:25, 2.69s/it] {'loss': 1.4324, 'grad_norm': 676.3800659179688, 'learning_rate': 0.0, 'fcm_dpo/beta': 0.800000011920929, 'fcm_dpo/q_t': 0.5040594935417175, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.02287006378173828, 'margin_dpo/margin_mean': -0.02287048101425171, 'margin_dpo/margin_std': 0.41920793056488037, 'logps/chosen': -50.1435661315918, 'logps/rejected': -74.09991455078125, 'logps/ref_chosen': -50.14883804321289, 'logps/ref_rejected': -74.1280517578125, 'KL/chosen_KL_mean': 0.00527191162109375, 'KL/rejected_KL_mean': 0.028141021728515625, 'KL/mean': 0.016706019639968872, 'KL/std': 0.272699236869812, 'logits/chosen': -0.4974287748336792, 'logits/rejected': -0.43299180269241333, 'epoch': 0.0} + 0%| | 1/681 [00:02<30:25, 2.69s/it] 0%| | 2/681 [00:05<29:26, 2.60s/it] {'loss': 1.4592, 'grad_norm': 589.6188354492188, 'learning_rate': 7.246376811594203e-09, 'fcm_dpo/beta': 0.800000011920929, 'fcm_dpo/q_t': 0.5128992795944214, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.06572261452674866, 'margin_dpo/margin_mean': -0.06572240591049194, 'margin_dpo/margin_std': 0.35048407316207886, 'logps/chosen': -52.65568923950195, 'logps/rejected': -75.27340698242188, 'logps/ref_chosen': -52.620704650878906, 'logps/ref_rejected': -75.30413818359375, 'KL/chosen_KL_mean': -0.03498649597167969, 'KL/rejected_KL_mean': 0.030735015869140625, 'KL/mean': -0.00212840735912323, 'KL/std': 0.24797174334526062, 'logits/chosen': -0.49536412954330444, 'logits/rejected': -0.4594460427761078, 'epoch': 0.0} + 0%| | 2/681 [00:05<29:26, 2.60s/it] 0%| | 3/681 [00:07<29:17, 2.59s/it] {'loss': 1.405, 'grad_norm': 575.7740478515625, 'learning_rate': 1.4492753623188406e-08, 'fcm_dpo/beta': 0.800000011920929, 'fcm_dpo/q_t': 0.5012327432632446, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.00632166862487793, 'margin_dpo/margin_mean': -0.0063214898109436035, 'margin_dpo/margin_std': 0.2866283059120178, 'logps/chosen': -60.98915481567383, 'logps/rejected': -68.67383575439453, 'logps/ref_chosen': -60.981597900390625, 'logps/ref_rejected': -68.67259216308594, 'KL/chosen_KL_mean': -0.0075588226318359375, 'KL/rejected_KL_mean': -0.001239776611328125, 'KL/mean': -0.0043991804122924805, 'KL/std': 0.22414085268974304, 'logits/chosen': -0.4817797839641571, 'logits/rejected': -0.44226667284965515, 'epoch': 0.0} + 0%| | 3/681 [00:07<29:17, 2.59s/it] 1%| | 4/681 [00:10<29:31, 2.62s/it] {'loss': 1.469, 'grad_norm': 598.5643920898438, 'learning_rate': 2.1739130434782606e-08, 'fcm_dpo/beta': 0.800000011920929, 'fcm_dpo/q_t': 0.5149009227752686, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.07595756649971008, 'margin_dpo/margin_mean': -0.0759580135345459, 'margin_dpo/margin_std': 0.36108309030532837, 'logps/chosen': -56.80902862548828, 'logps/rejected': -86.61247253417969, 'logps/ref_chosen': -56.7677116394043, 'logps/ref_rejected': -86.64710998535156, 'KL/chosen_KL_mean': -0.04131507873535156, 'KL/rejected_KL_mean': 0.034641265869140625, 'KL/mean': -0.0033356696367263794, 'KL/std': 0.25460168719291687, 'logits/chosen': -0.4682745039463043, 'logits/rejected': -0.44059938192367554, 'epoch': 0.01} + 1%| | 4/681 [00:10<29:31, 2.62s/it] 1%| | 5/681 [00:13<29:19, 2.60s/it] {'loss': 1.4327, 'grad_norm': 748.8038940429688, 'learning_rate': 2.898550724637681e-08, 'fcm_dpo/beta': 0.800000011920929, 'fcm_dpo/q_t': 0.5043825507164001, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.027328133583068848, 'margin_dpo/margin_mean': -0.02732786536216736, 'margin_dpo/margin_std': 0.39059120416641235, 'logps/chosen': -53.85413360595703, 'logps/rejected': -84.11660766601562, 'logps/ref_chosen': -53.859375, 'logps/ref_rejected': -84.14918518066406, 'KL/chosen_KL_mean': 0.0052433013916015625, 'KL/rejected_KL_mean': 0.032573699951171875, 'KL/mean': 0.018906593322753906, 'KL/std': 0.2835850417613983, 'logits/chosen': -0.5214688777923584, 'logits/rejected': -0.4782792031764984, 'epoch': 0.01} + 1%| | 5/681 [00:13<29:19, 2.60s/it] 1%| | 6/681 [00:15<27:44, 2.47s/it] {'loss': 1.3859, 'grad_norm': 761.1502685546875, 'learning_rate': 3.6231884057971014e-08, 'fcm_dpo/beta': 0.800000011920929, 'fcm_dpo/q_t': 0.49454063177108765, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.0276680588722229, 'margin_dpo/margin_mean': 0.027667373418807983, 'margin_dpo/margin_std': 0.35976481437683105, 'logps/chosen': -62.994285583496094, 'logps/rejected': -92.65982055664062, 'logps/ref_chosen': -63.007484436035156, 'logps/ref_rejected': -92.64534759521484, 'KL/chosen_KL_mean': 0.0131988525390625, 'KL/rejected_KL_mean': -0.014469146728515625, 'KL/mean': -0.0006367862224578857, 'KL/std': 0.2519422471523285, 'logits/chosen': -0.4976610243320465, 'logits/rejected': -0.4546470046043396, 'epoch': 0.01} + 1%| | 6/681 [00:15<27:44, 2.47s/it] 1%| | 7/681 [00:17<27:12, 2.42s/it] {'loss': 1.3851, 'grad_norm': 648.91259765625, 'learning_rate': 4.347826086956521e-08, 'fcm_dpo/beta': 0.800000011920929, 'fcm_dpo/q_t': 0.49415522813796997, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.030850648880004883, 'margin_dpo/margin_mean': 0.030851304531097412, 'margin_dpo/margin_std': 0.3817327618598938, 'logps/chosen': -57.73905944824219, 'logps/rejected': -103.91567993164062, 'logps/ref_chosen': -57.774818420410156, 'logps/ref_rejected': -103.92059326171875, 'KL/chosen_KL_mean': 0.035762786865234375, 'KL/rejected_KL_mean': 0.004913330078125, 'KL/mean': 0.020337015390396118, 'KL/std': 0.2789710462093353, 'logits/chosen': -0.5009369254112244, 'logits/rejected': -0.4670419692993164, 'epoch': 0.01} + 1%| | 7/681 [00:17<27:12, 2.42s/it] 1%| | 8/681 [00:19<26:54, 2.40s/it] {'loss': 1.4194, 'grad_norm': 638.4391479492188, 'learning_rate': 5.0724637681159424e-08, 'fcm_dpo/beta': 0.800000011920929, 'fcm_dpo/q_t': 0.5014467239379883, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.01059296727180481, 'margin_dpo/margin_mean': -0.010592788457870483, 'margin_dpo/margin_std': 0.3931761384010315, 'logps/chosen': -58.709442138671875, 'logps/rejected': -79.29423522949219, 'logps/ref_chosen': -58.716033935546875, 'logps/ref_rejected': -79.3114242553711, 'KL/chosen_KL_mean': 0.006595611572265625, 'KL/rejected_KL_mean': 0.01718902587890625, 'KL/mean': 0.011890605092048645, 'KL/std': 0.2876508831977844, 'logits/chosen': -0.5008213520050049, 'logits/rejected': -0.47419145703315735, 'epoch': 0.01} + 1%| | 8/681 [00:19<26:54, 2.40s/it] 1%|▏ | 9/681 [00:22<27:27, 2.45s/it] {'loss': 1.394, 'grad_norm': 672.1676025390625, 'learning_rate': 5.797101449275362e-08, 'fcm_dpo/beta': 0.800000011920929, 'fcm_dpo/q_t': 0.49435490369796753, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.028308242559432983, 'margin_dpo/margin_mean': 0.028307169675827026, 'margin_dpo/margin_std': 0.4165334105491638, 'logps/chosen': -69.82758331298828, 'logps/rejected': -99.59171295166016, 'logps/ref_chosen': -69.8668441772461, 'logps/ref_rejected': -99.6026611328125, 'KL/chosen_KL_mean': 0.03925895690917969, 'KL/rejected_KL_mean': 0.010951995849609375, 'KL/mean': 0.02510516345500946, 'KL/std': 0.30363646149635315, 'logits/chosen': -0.5118868350982666, 'logits/rejected': -0.4663264751434326, 'epoch': 0.01} + 1%|▏ | 9/681 [00:22<27:27, 2.45s/it] 1%|▏ | 10/681 [00:24<27:34, 2.47s/it] {'loss': 1.3752, 'grad_norm': 545.5849609375, 'learning_rate': 6.521739130434782e-08, 'fcm_dpo/beta': 0.800000011920929, 'fcm_dpo/q_t': 0.4914953112602234, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.04434821009635925, 'margin_dpo/margin_mean': 0.04434826970100403, 'margin_dpo/margin_std': 0.37585416436195374, 'logps/chosen': -48.35100555419922, 'logps/rejected': -80.40972900390625, 'logps/ref_chosen': -48.35768508911133, 'logps/ref_rejected': -80.37206268310547, 'KL/chosen_KL_mean': 0.0066814422607421875, 'KL/rejected_KL_mean': -0.037662506103515625, 'KL/mean': -0.015493467450141907, 'KL/std': 0.2837975323200226, 'logits/chosen': -0.4926380515098572, 'logits/rejected': -0.44934237003326416, 'epoch': 0.01} + 1%|▏ | 10/681 [00:25<27:34, 2.47s/it] 2%|▏ | 11/681 [00:27<28:24, 2.54s/it] {'loss': 1.3993, 'grad_norm': 539.252197265625, 'learning_rate': 7.246376811594203e-08, 'fcm_dpo/beta': 0.800000011920929, 'fcm_dpo/q_t': 0.49684467911720276, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.01619333028793335, 'margin_dpo/margin_mean': 0.016193389892578125, 'margin_dpo/margin_std': 0.378741979598999, 'logps/chosen': -53.01716613769531, 'logps/rejected': -87.796875, 'logps/ref_chosen': -53.01685333251953, 'logps/ref_rejected': -87.78038024902344, 'KL/chosen_KL_mean': -0.000308990478515625, 'KL/rejected_KL_mean': -0.01650238037109375, 'KL/mean': -0.00840708613395691, 'KL/std': 0.28651660680770874, 'logits/chosen': -0.4729078710079193, 'logits/rejected': -0.44843602180480957, 'epoch': 0.02} + 2%|▏ | 11/681 [00:27<28:24, 2.54s/it] 2%|▏ | 12/681 [00:30<28:34, 2.56s/it] {'loss': 1.4199, 'grad_norm': 731.9530639648438, 'learning_rate': 7.971014492753623e-08, 'fcm_dpo/beta': 0.800000011920929, 'fcm_dpo/q_t': 0.5020244717597961, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.01176394522190094, 'margin_dpo/margin_mean': -0.011764273047447205, 'margin_dpo/margin_std': 0.3872652053833008, 'logps/chosen': -61.8509521484375, 'logps/rejected': -104.89202117919922, 'logps/ref_chosen': -61.80543518066406, 'logps/ref_rejected': -104.8582763671875, 'KL/chosen_KL_mean': -0.0455169677734375, 'KL/rejected_KL_mean': -0.033748626708984375, 'KL/mean': -0.0396341010928154, 'KL/std': 0.27084100246429443, 'logits/chosen': -0.5192070007324219, 'logits/rejected': -0.48252177238464355, 'epoch': 0.02} + 2%|▏ | 12/681 [00:30<28:34, 2.56s/it] 2%|▏ | 13/681 [00:33<28:58, 2.60s/it] {'loss': 1.4451, 'grad_norm': 657.2701416015625, 'learning_rate': 8.695652173913042e-08, 'fcm_dpo/beta': 0.800000011920929, 'fcm_dpo/q_t': 0.5091712474822998, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.047011733055114746, 'margin_dpo/margin_mean': -0.047012150287628174, 'margin_dpo/margin_std': 0.3621995747089386, 'logps/chosen': -64.25914001464844, 'logps/rejected': -87.15484619140625, 'logps/ref_chosen': -64.2603530883789, 'logps/ref_rejected': -87.20307922363281, 'KL/chosen_KL_mean': 0.0012149810791015625, 'KL/rejected_KL_mean': 0.0482330322265625, 'KL/mean': 0.024721741676330566, 'KL/std': 0.2512255609035492, 'logits/chosen': -0.4995085597038269, 'logits/rejected': -0.47286656498908997, 'epoch': 0.02} + 2%|▏ | 13/681 [00:33<28:58, 2.60s/it] 2%|▏ | 14/681 [00:35<28:39, 2.58s/it] {'loss': 1.4211, 'grad_norm': 684.386962890625, 'learning_rate': 9.420289855072464e-08, 'fcm_dpo/beta': 0.800000011920929, 'fcm_dpo/q_t': 0.5020325183868408, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.009398609399795532, 'margin_dpo/margin_mean': -0.009398102760314941, 'margin_dpo/margin_std': 0.4144117534160614, 'logps/chosen': -58.1405029296875, 'logps/rejected': -104.06797790527344, 'logps/ref_chosen': -58.11021041870117, 'logps/ref_rejected': -104.04708099365234, 'KL/chosen_KL_mean': -0.030294418334960938, 'KL/rejected_KL_mean': -0.020893096923828125, 'KL/mean': -0.0255916565656662, 'KL/std': 0.27099841833114624, 'logits/chosen': -0.49813684821128845, 'logits/rejected': -0.4595490097999573, 'epoch': 0.02} + 2%|▏ | 14/681 [00:35<28:39, 2.58s/it] 2%|▏ | 15/681 [00:38<28:36, 2.58s/it] {'loss': 1.3835, 'grad_norm': 519.0724487304688, 'learning_rate': 1.0144927536231885e-07, 'fcm_dpo/beta': 0.800000011920929, 'fcm_dpo/q_t': 0.49189913272857666, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.038970112800598145, 'margin_dpo/margin_mean': 0.03897008299827576, 'margin_dpo/margin_std': 0.4108760356903076, 'logps/chosen': -56.99871063232422, 'logps/rejected': -80.87940979003906, 'logps/ref_chosen': -56.96691131591797, 'logps/ref_rejected': -80.80863952636719, 'KL/chosen_KL_mean': -0.031803131103515625, 'KL/rejected_KL_mean': -0.070770263671875, 'KL/mean': -0.05128836631774902, 'KL/std': 0.2677825093269348, 'logits/chosen': -0.4899941384792328, 'logits/rejected': -0.4712330996990204, 'epoch': 0.02} + 2%|▏ | 15/681 [00:38<28:36, 2.58s/it] 2%|▏ | 16/681 [00:40<28:13, 2.55s/it] {'loss': 1.3318, 'grad_norm': 658.9254150390625, 'learning_rate': 1.0869565217391303e-07, 'fcm_dpo/beta': 0.800000011920929, 'fcm_dpo/q_t': 0.4807215929031372, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.09726369380950928, 'margin_dpo/margin_mean': 0.09726375341415405, 'margin_dpo/margin_std': 0.3595857620239258, 'logps/chosen': -61.72896957397461, 'logps/rejected': -84.455810546875, 'logps/ref_chosen': -61.739891052246094, 'logps/ref_rejected': -84.36947631835938, 'KL/chosen_KL_mean': 0.010923385620117188, 'KL/rejected_KL_mean': -0.08633804321289062, 'KL/mean': -0.03770947456359863, 'KL/std': 0.25064218044281006, 'logits/chosen': -0.5206788182258606, 'logits/rejected': -0.4792103171348572, 'epoch': 0.02} + 2%|▏ | 16/681 [00:40<28:13, 2.55s/it] 2%|▏ | 17/681 [00:43<28:00, 2.53s/it] {'loss': 1.4017, 'grad_norm': 644.1885986328125, 'learning_rate': 1.1594202898550725e-07, 'fcm_dpo/beta': 0.800000011920929, 'fcm_dpo/q_t': 0.49689337611198425, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.015069246292114258, 'margin_dpo/margin_mean': 0.015069544315338135, 'margin_dpo/margin_std': 0.4079738259315491, 'logps/chosen': -67.70291137695312, 'logps/rejected': -85.38630676269531, 'logps/ref_chosen': -67.71033477783203, 'logps/ref_rejected': -85.37865447998047, 'KL/chosen_KL_mean': 0.007419586181640625, 'KL/rejected_KL_mean': -0.007648468017578125, 'KL/mean': -0.00011467933654785156, 'KL/std': 0.2673434615135193, 'logits/chosen': -0.4899910092353821, 'logits/rejected': -0.4514046311378479, 'epoch': 0.02} + 2%|▏ | 17/681 [00:43<28:00, 2.53s/it] 3%|▎ | 18/681 [00:45<27:51, 2.52s/it] {'loss': 1.3528, 'grad_norm': 632.3574829101562, 'learning_rate': 1.2318840579710146e-07, 'fcm_dpo/beta': 0.800000011920929, 'fcm_dpo/q_t': 0.4862971305847168, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.0684882402420044, 'margin_dpo/margin_mean': 0.06848806142807007, 'margin_dpo/margin_std': 0.3598102629184723, 'logps/chosen': -47.760032653808594, 'logps/rejected': -75.56132507324219, 'logps/ref_chosen': -47.7394905090332, 'logps/ref_rejected': -75.4722900390625, 'KL/chosen_KL_mean': -0.020544052124023438, 'KL/rejected_KL_mean': -0.0890350341796875, 'KL/mean': -0.054789185523986816, 'KL/std': 0.26502934098243713, 'logits/chosen': -0.48648545145988464, 'logits/rejected': -0.43023061752319336, 'epoch': 0.03} + 3%|▎ | 18/681 [00:45<27:51, 2.52s/it] 3%|▎ | 19/681 [00:48<27:59, 2.54s/it] {'loss': 1.3433, 'grad_norm': 576.9486083984375, 'learning_rate': 1.3043478260869563e-07, 'fcm_dpo/beta': 0.800000011920929, 'fcm_dpo/q_t': 0.4844985008239746, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.08037757873535156, 'margin_dpo/margin_mean': 0.08037763833999634, 'margin_dpo/margin_std': 0.3444629907608032, 'logps/chosen': -70.2092056274414, 'logps/rejected': -89.84181213378906, 'logps/ref_chosen': -70.20536041259766, 'logps/ref_rejected': -89.7575912475586, 'KL/chosen_KL_mean': -0.00384521484375, 'KL/rejected_KL_mean': -0.08422470092773438, 'KL/mean': -0.04403865337371826, 'KL/std': 0.24243327975273132, 'logits/chosen': -0.528351902961731, 'logits/rejected': -0.4815219044685364, 'epoch': 0.03} + 3%|▎ | 19/681 [00:48<27:59, 2.54s/it] 3%|▎ | 20/681 [00:50<28:03, 2.55s/it] {'loss': 1.3713, 'grad_norm': 575.07275390625, 'learning_rate': 1.3768115942028986e-07, 'fcm_dpo/beta': 0.800000011920929, 'fcm_dpo/q_t': 0.49041998386383057, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.04667180776596069, 'margin_dpo/margin_mean': 0.046672046184539795, 'margin_dpo/margin_std': 0.37012988328933716, 'logps/chosen': -50.84715270996094, 'logps/rejected': -78.91392517089844, 'logps/ref_chosen': -50.80324172973633, 'logps/ref_rejected': -78.82334899902344, 'KL/chosen_KL_mean': -0.043910980224609375, 'KL/rejected_KL_mean': -0.09057998657226562, 'KL/mean': -0.06724703311920166, 'KL/std': 0.2558104395866394, 'logits/chosen': -0.5057722330093384, 'logits/rejected': -0.44449201226234436, 'epoch': 0.03} + 3%|▎ | 20/681 [00:50<28:03, 2.55s/it] 3%|▎ | 21/681 [00:53<27:46, 2.52s/it] {'loss': 1.2925, 'grad_norm': 567.56494140625, 'learning_rate': 1.4492753623188405e-07, 'fcm_dpo/beta': 0.800000011920929, 'fcm_dpo/q_t': 0.47142279148101807, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.14749857783317566, 'margin_dpo/margin_mean': 0.1474984586238861, 'margin_dpo/margin_std': 0.3555169403553009, 'logps/chosen': -50.0704345703125, 'logps/rejected': -78.02371215820312, 'logps/ref_chosen': -50.063018798828125, 'logps/ref_rejected': -77.86878967285156, 'KL/chosen_KL_mean': -0.007419586181640625, 'KL/rejected_KL_mean': -0.15491485595703125, 'KL/mean': -0.08116798102855682, 'KL/std': 0.27864497900009155, 'logits/chosen': -0.471624493598938, 'logits/rejected': -0.4458872079849243, 'epoch': 0.03} + 3%|▎ | 21/681 [00:53<27:46, 2.52s/it] 3%|▎ | 22/681 [00:55<27:46, 2.53s/it] {'loss': 1.3046, 'grad_norm': 609.1174926757812, 'learning_rate': 1.5217391304347825e-07, 'fcm_dpo/beta': 0.800000011920929, 'fcm_dpo/q_t': 0.47342291474342346, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.13674354553222656, 'margin_dpo/margin_mean': 0.1367432177066803, 'margin_dpo/margin_std': 0.38546815514564514, 'logps/chosen': -59.02802276611328, 'logps/rejected': -97.61180114746094, 'logps/ref_chosen': -59.05763626098633, 'logps/ref_rejected': -97.50466918945312, 'KL/chosen_KL_mean': 0.029613494873046875, 'KL/rejected_KL_mean': -0.10712814331054688, 'KL/mean': -0.03875645995140076, 'KL/std': 0.2763916254043579, 'logits/chosen': -0.4810870885848999, 'logits/rejected': -0.43719351291656494, 'epoch': 0.03} + 3%|▎ | 22/681 [00:55<27:46, 2.53s/it] 3%|▎ | 23/681 [00:58<28:54, 2.64s/it] {'loss': 1.2669, 'grad_norm': 545.2821044921875, 'learning_rate': 1.5942028985507245e-07, 'fcm_dpo/beta': 0.800000011920929, 'fcm_dpo/q_t': 0.46310946345329285, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.19482764601707458, 'margin_dpo/margin_mean': 0.19482776522636414, 'margin_dpo/margin_std': 0.43239736557006836, 'logps/chosen': -60.01018524169922, 'logps/rejected': -81.2668685913086, 'logps/ref_chosen': -60.07769775390625, 'logps/ref_rejected': -81.13955688476562, 'KL/chosen_KL_mean': 0.06751251220703125, 'KL/rejected_KL_mean': -0.1273174285888672, 'KL/mean': -0.029902145266532898, 'KL/std': 0.32757315039634705, 'logits/chosen': -0.4882713258266449, 'logits/rejected': -0.4656675159931183, 'epoch': 0.03} + 3%|▎ | 23/681 [00:58<28:54, 2.64s/it] 4%|▎ | 24/681 [01:01<28:58, 2.65s/it] {'loss': 1.2205, 'grad_norm': 589.2479248046875, 'learning_rate': 1.6666666666666665e-07, 'fcm_dpo/beta': 0.8059060573577881, 'fcm_dpo/q_t': 0.45153895020484924, 'fcm_dpo/delta': 0.0732855275273323, 'fcm_dpo/margin': 0.2468125820159912, 'margin_dpo/margin_mean': 0.24681302905082703, 'margin_dpo/margin_std': 0.3777139186859131, 'logps/chosen': -44.23401641845703, 'logps/rejected': -99.31501007080078, 'logps/ref_chosen': -44.29103469848633, 'logps/ref_rejected': -99.12521362304688, 'KL/chosen_KL_mean': 0.057018280029296875, 'KL/rejected_KL_mean': -0.18979644775390625, 'KL/mean': -0.06638666987419128, 'KL/std': 0.3026999235153198, 'logits/chosen': -0.4822083115577698, 'logits/rejected': -0.46618789434432983, 'epoch': 0.04} + 4%|▎ | 24/681 [01:01<28:58, 2.65s/it] 4%|▎ | 25/681 [01:03<28:53, 2.64s/it] {'loss': 1.2696, 'grad_norm': 538.94677734375, 'learning_rate': 1.7391304347826085e-07, 'fcm_dpo/beta': 0.8118120431900024, 'fcm_dpo/q_t': 0.4633547067642212, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.18760046362876892, 'margin_dpo/margin_mean': 0.18760085105895996, 'margin_dpo/margin_std': 0.4301965832710266, 'logps/chosen': -52.52052307128906, 'logps/rejected': -89.51325988769531, 'logps/ref_chosen': -52.537052154541016, 'logps/ref_rejected': -89.34219360351562, 'KL/chosen_KL_mean': 0.016529083251953125, 'KL/rejected_KL_mean': -0.17107009887695312, 'KL/mean': -0.07727153599262238, 'KL/std': 0.34193894267082214, 'logits/chosen': -0.5210130214691162, 'logits/rejected': -0.4919084310531616, 'epoch': 0.04} + 4%|▎ | 25/681 [01:03<28:53, 2.64s/it] 4%|▍ | 26/681 [01:06<27:36, 2.53s/it] {'loss': 1.1923, 'grad_norm': 565.0730590820312, 'learning_rate': 1.8115942028985507e-07, 'fcm_dpo/beta': 0.8153971433639526, 'fcm_dpo/q_t': 0.43973731994628906, 'fcm_dpo/delta': 0.04396749660372734, 'fcm_dpo/margin': 0.3171558976173401, 'margin_dpo/margin_mean': 0.31715625524520874, 'margin_dpo/margin_std': 0.5414035320281982, 'logps/chosen': -53.859130859375, 'logps/rejected': -103.61318969726562, 'logps/ref_chosen': -53.92280578613281, 'logps/ref_rejected': -103.35971069335938, 'KL/chosen_KL_mean': 0.06367874145507812, 'KL/rejected_KL_mean': -0.25347900390625, 'KL/mean': -0.09489929676055908, 'KL/std': 0.40129321813583374, 'logits/chosen': -0.5216317176818848, 'logits/rejected': -0.4897175133228302, 'epoch': 0.04} + 4%|▍ | 26/681 [01:06<27:36, 2.53s/it] 4%|▍ | 27/681 [01:08<27:25, 2.52s/it] {'loss': 1.0763, 'grad_norm': 575.7881469726562, 'learning_rate': 1.8840579710144927e-07, 'fcm_dpo/beta': 0.8174295425415039, 'fcm_dpo/q_t': 0.4092448949813843, 'fcm_dpo/delta': 0.00940924696624279, 'fcm_dpo/margin': 0.4780521094799042, 'margin_dpo/margin_mean': 0.47805216908454895, 'margin_dpo/margin_std': 0.5317339897155762, 'logps/chosen': -42.76671600341797, 'logps/rejected': -99.07044219970703, 'logps/ref_chosen': -42.898529052734375, 'logps/ref_rejected': -98.72419738769531, 'KL/chosen_KL_mean': 0.13181304931640625, 'KL/rejected_KL_mean': -0.3462409973144531, 'KL/mean': -0.10721321403980255, 'KL/std': 0.445562481880188, 'logits/chosen': -0.5156636238098145, 'logits/rejected': -0.47926321625709534, 'epoch': 0.04} + 4%|▍ | 27/681 [01:08<27:25, 2.52s/it] 4%|▍ | 28/681 [01:11<27:31, 2.53s/it] {'loss': 1.1808, 'grad_norm': 499.330810546875, 'learning_rate': 1.9565217391304347e-07, 'fcm_dpo/beta': 0.8387187123298645, 'fcm_dpo/q_t': 0.4359322488307953, 'fcm_dpo/delta': 0.12880420684814453, 'fcm_dpo/margin': 0.3269842267036438, 'margin_dpo/margin_mean': 0.3269844055175781, 'margin_dpo/margin_std': 0.5416440367698669, 'logps/chosen': -60.5274658203125, 'logps/rejected': -91.69906616210938, 'logps/ref_chosen': -60.55650329589844, 'logps/ref_rejected': -91.40111541748047, 'KL/chosen_KL_mean': 0.029035568237304688, 'KL/rejected_KL_mean': -0.29795074462890625, 'KL/mean': -0.13445699214935303, 'KL/std': 0.4010791778564453, 'logits/chosen': -0.5236212015151978, 'logits/rejected': -0.4699610471725464, 'epoch': 0.04} + 4%|▍ | 28/681 [01:11<27:31, 2.53s/it] 4%|▍ | 29/681 [01:13<26:29, 2.44s/it] {'loss': 1.0209, 'grad_norm': 554.7158813476562, 'learning_rate': 2.028985507246377e-07, 'fcm_dpo/beta': 0.8350539207458496, 'fcm_dpo/q_t': 0.39213281869888306, 'fcm_dpo/delta': -0.056411802768707275, 'fcm_dpo/margin': 0.5435106754302979, 'margin_dpo/margin_mean': 0.5435110330581665, 'margin_dpo/margin_std': 0.47872793674468994, 'logps/chosen': -57.67644500732422, 'logps/rejected': -97.8065185546875, 'logps/ref_chosen': -57.80778503417969, 'logps/ref_rejected': -97.39434814453125, 'KL/chosen_KL_mean': 0.13134193420410156, 'KL/rejected_KL_mean': -0.4121665954589844, 'KL/mean': -0.14041244983673096, 'KL/std': 0.44271203875541687, 'logits/chosen': -0.5539100170135498, 'logits/rejected': -0.5077922344207764, 'epoch': 0.04} + 4%|▍ | 29/681 [01:13<26:29, 2.44s/it] 4%|▍ | 30/681 [01:16<27:07, 2.50s/it] {'loss': 0.9594, 'grad_norm': 459.3413391113281, 'learning_rate': 2.1014492753623187e-07, 'fcm_dpo/beta': 0.804972231388092, 'fcm_dpo/q_t': 0.36796897649765015, 'fcm_dpo/delta': -0.19466958940029144, 'fcm_dpo/margin': 0.7230579853057861, 'margin_dpo/margin_mean': 0.7230584621429443, 'margin_dpo/margin_std': 0.712450385093689, 'logps/chosen': -52.4459228515625, 'logps/rejected': -99.08081817626953, 'logps/ref_chosen': -52.577369689941406, 'logps/ref_rejected': -98.48920440673828, 'KL/chosen_KL_mean': 0.13145065307617188, 'KL/rejected_KL_mean': -0.59161376953125, 'KL/mean': -0.23007872700691223, 'KL/std': 0.6256662607192993, 'logits/chosen': -0.5020028948783875, 'logits/rejected': -0.47157809138298035, 'epoch': 0.04} + 4%|▍ | 30/681 [01:16<27:07, 2.50s/it] 5%|▍ | 31/681 [01:18<27:40, 2.55s/it] {'loss': 1.0803, 'grad_norm': 401.8319091796875, 'learning_rate': 2.1739130434782607e-07, 'fcm_dpo/beta': 0.7972604632377625, 'fcm_dpo/q_t': 0.4051462411880493, 'fcm_dpo/delta': -0.0215899795293808, 'fcm_dpo/margin': 0.5276015996932983, 'margin_dpo/margin_mean': 0.5276015996932983, 'margin_dpo/margin_std': 0.7064246535301208, 'logps/chosen': -63.67369842529297, 'logps/rejected': -73.28838348388672, 'logps/ref_chosen': -63.806922912597656, 'logps/ref_rejected': -72.89400482177734, 'KL/chosen_KL_mean': 0.1332244873046875, 'KL/rejected_KL_mean': -0.394378662109375, 'KL/mean': -0.13057458400726318, 'KL/std': 0.5608391165733337, 'logits/chosen': -0.509661853313446, 'logits/rejected': -0.46542733907699585, 'epoch': 0.05} + 5%|▍ | 31/681 [01:18<27:40, 2.55s/it] 5%|▍ | 32/681 [01:21<28:13, 2.61s/it] {'loss': 0.979, 'grad_norm': 401.576416015625, 'learning_rate': 2.2463768115942027e-07, 'fcm_dpo/beta': 0.772221565246582, 'fcm_dpo/q_t': 0.3716619610786438, 'fcm_dpo/delta': -0.21296542882919312, 'fcm_dpo/margin': 0.7771282196044922, 'margin_dpo/margin_mean': 0.7771281003952026, 'margin_dpo/margin_std': 0.9537783861160278, 'logps/chosen': -62.532981872558594, 'logps/rejected': -89.88809204101562, 'logps/ref_chosen': -62.739524841308594, 'logps/ref_rejected': -89.3175048828125, 'KL/chosen_KL_mean': 0.20654296875, 'KL/rejected_KL_mean': -0.570587158203125, 'KL/mean': -0.18202102184295654, 'KL/std': 0.7511119842529297, 'logits/chosen': -0.5210152864456177, 'logits/rejected': -0.4803611934185028, 'epoch': 0.05} + 5%|▍ | 32/681 [01:21<28:13, 2.61s/it] 5%|▍ | 33/681 [01:23<27:34, 2.55s/it] {'loss': 1.0079, 'grad_norm': 389.24462890625, 'learning_rate': 2.318840579710145e-07, 'fcm_dpo/beta': 0.7558040022850037, 'fcm_dpo/q_t': 0.38533806800842285, 'fcm_dpo/delta': -0.09609463810920715, 'fcm_dpo/margin': 0.6502407789230347, 'margin_dpo/margin_mean': 0.6502406597137451, 'margin_dpo/margin_std': 0.6353799104690552, 'logps/chosen': -53.144142150878906, 'logps/rejected': -88.41854858398438, 'logps/ref_chosen': -53.26097106933594, 'logps/ref_rejected': -87.8851318359375, 'KL/chosen_KL_mean': 0.11682891845703125, 'KL/rejected_KL_mean': -0.5334129333496094, 'KL/mean': -0.20829498767852783, 'KL/std': 0.5831528902053833, 'logits/chosen': -0.48112988471984863, 'logits/rejected': -0.4547329545021057, 'epoch': 0.05} + 5%|▍ | 33/681 [01:23<27:34, 2.55s/it] 5%|▍ | 34/681 [01:26<27:46, 2.58s/it] {'loss': 0.9903, 'grad_norm': 378.3127136230469, 'learning_rate': 2.391304347826087e-07, 'fcm_dpo/beta': 0.7317001819610596, 'fcm_dpo/q_t': 0.3767717480659485, 'fcm_dpo/delta': -0.15499642491340637, 'fcm_dpo/margin': 0.7467071413993835, 'margin_dpo/margin_mean': 0.746705949306488, 'margin_dpo/margin_std': 0.8056973218917847, 'logps/chosen': -50.71562194824219, 'logps/rejected': -102.56684875488281, 'logps/ref_chosen': -50.81732940673828, 'logps/ref_rejected': -101.92184448242188, 'KL/chosen_KL_mean': 0.10170745849609375, 'KL/rejected_KL_mean': -0.6450004577636719, 'KL/mean': -0.2716452181339264, 'KL/std': 0.6980259418487549, 'logits/chosen': -0.5065457224845886, 'logits/rejected': -0.48904159665107727, 'epoch': 0.05} + 5%|▍ | 34/681 [01:26<27:46, 2.58s/it] 5%|▌ | 35/681 [01:29<28:02, 2.60s/it] {'loss': 0.8254, 'grad_norm': 309.58441162109375, 'learning_rate': 2.463768115942029e-07, 'fcm_dpo/beta': 0.6779334545135498, 'fcm_dpo/q_t': 0.3209930658340454, 'fcm_dpo/delta': -0.4493417739868164, 'fcm_dpo/margin': 1.1969325542449951, 'margin_dpo/margin_mean': 1.1969324350357056, 'margin_dpo/margin_std': 1.0367286205291748, 'logps/chosen': -50.837669372558594, 'logps/rejected': -107.83454895019531, 'logps/ref_chosen': -51.02449035644531, 'logps/ref_rejected': -106.82443237304688, 'KL/chosen_KL_mean': 0.18681907653808594, 'KL/rejected_KL_mean': -1.0101165771484375, 'KL/mean': -0.41164833307266235, 'KL/std': 0.9510899782180786, 'logits/chosen': -0.502815306186676, 'logits/rejected': -0.46622055768966675, 'epoch': 0.05} + 5%|▌ | 35/681 [01:29<28:02, 2.60s/it] 5%|▌ | 36/681 [01:31<28:10, 2.62s/it] {'loss': 0.949, 'grad_norm': 261.8072814941406, 'learning_rate': 2.536231884057971e-07, 'fcm_dpo/beta': 0.6399196982383728, 'fcm_dpo/q_t': 0.35396426916122437, 'fcm_dpo/delta': -0.3091672658920288, 'fcm_dpo/margin': 1.0765844583511353, 'margin_dpo/margin_mean': 1.0765833854675293, 'margin_dpo/margin_std': 1.2341694831848145, 'logps/chosen': -51.9403190612793, 'logps/rejected': -87.0660400390625, 'logps/ref_chosen': -51.991493225097656, 'logps/ref_rejected': -86.0406265258789, 'KL/chosen_KL_mean': 0.05117225646972656, 'KL/rejected_KL_mean': -1.0254096984863281, 'KL/mean': -0.48711907863616943, 'KL/std': 1.0425536632537842, 'logits/chosen': -0.5571258068084717, 'logits/rejected': -0.5207737684249878, 'epoch': 0.05} + 5%|▌ | 36/681 [01:31<28:10, 2.62s/it] 5%|▌ | 37/681 [01:34<28:12, 2.63s/it] {'loss': 1.0035, 'grad_norm': 244.11151123046875, 'learning_rate': 2.6086956521739126e-07, 'fcm_dpo/beta': 0.5977625846862793, 'fcm_dpo/q_t': 0.3746863603591919, 'fcm_dpo/delta': -0.2139551043510437, 'fcm_dpo/margin': 0.9973729252815247, 'margin_dpo/margin_mean': 0.9973729848861694, 'margin_dpo/margin_std': 1.285217046737671, 'logps/chosen': -62.787498474121094, 'logps/rejected': -78.87284088134766, 'logps/ref_chosen': -62.807106018066406, 'logps/ref_rejected': -77.89507293701172, 'KL/chosen_KL_mean': 0.019609451293945312, 'KL/rejected_KL_mean': -0.9777679443359375, 'KL/mean': -0.4790758192539215, 'KL/std': 1.0161794424057007, 'logits/chosen': -0.4970467984676361, 'logits/rejected': -0.45224228501319885, 'epoch': 0.05} + 5%|▌ | 37/681 [01:34<28:12, 2.63s/it] 6%|▌ | 38/681 [01:36<26:51, 2.51s/it] {'loss': 0.9045, 'grad_norm': 240.15562438964844, 'learning_rate': 2.681159420289855e-07, 'fcm_dpo/beta': 0.5653368830680847, 'fcm_dpo/q_t': 0.3435903489589691, 'fcm_dpo/delta': -0.4023910164833069, 'fcm_dpo/margin': 1.365942120552063, 'margin_dpo/margin_mean': 1.3659417629241943, 'margin_dpo/margin_std': 1.6197600364685059, 'logps/chosen': -48.176414489746094, 'logps/rejected': -99.06427764892578, 'logps/ref_chosen': -48.39051818847656, 'logps/ref_rejected': -97.91244506835938, 'KL/chosen_KL_mean': 0.21410560607910156, 'KL/rejected_KL_mean': -1.1518363952636719, 'KL/mean': -0.46886640787124634, 'KL/std': 1.316064476966858, 'logits/chosen': -0.5118378400802612, 'logits/rejected': -0.4790714979171753, 'epoch': 0.06} + 6%|▌ | 38/681 [01:36<26:51, 2.51s/it] 6%|▌ | 39/681 [01:39<26:53, 2.51s/it] {'loss': 0.8415, 'grad_norm': 256.7136535644531, 'learning_rate': 2.753623188405797e-07, 'fcm_dpo/beta': 0.5120701193809509, 'fcm_dpo/q_t': 0.3189411163330078, 'fcm_dpo/delta': -0.4571428894996643, 'fcm_dpo/margin': 1.5928757190704346, 'margin_dpo/margin_mean': 1.5928757190704346, 'margin_dpo/margin_std': 1.4120266437530518, 'logps/chosen': -50.664669036865234, 'logps/rejected': -80.07658386230469, 'logps/ref_chosen': -50.75047302246094, 'logps/ref_rejected': -78.56951141357422, 'KL/chosen_KL_mean': 0.08580398559570312, 'KL/rejected_KL_mean': -1.5070762634277344, 'KL/mean': -0.7106390595436096, 'KL/std': 1.289241075515747, 'logits/chosen': -0.5451552867889404, 'logits/rejected': -0.5046231746673584, 'epoch': 0.06} + 6%|▌ | 39/681 [01:39<26:53, 2.51s/it] 6%|▌ | 40/681 [01:42<27:29, 2.57s/it] {'loss': 0.922, 'grad_norm': 182.71023559570312, 'learning_rate': 2.8260869565217386e-07, 'fcm_dpo/beta': 0.4821917414665222, 'fcm_dpo/q_t': 0.35090136528015137, 'fcm_dpo/delta': -0.32341742515563965, 'fcm_dpo/margin': 1.4550951719284058, 'margin_dpo/margin_mean': 1.455095887184143, 'margin_dpo/margin_std': 1.5767593383789062, 'logps/chosen': -57.79913330078125, 'logps/rejected': -75.5692367553711, 'logps/ref_chosen': -57.985069274902344, 'logps/ref_rejected': -74.3000717163086, 'KL/chosen_KL_mean': 0.18593215942382812, 'KL/rejected_KL_mean': -1.2691650390625, 'KL/mean': -0.5416154861450195, 'KL/std': 1.3004155158996582, 'logits/chosen': -0.5080227255821228, 'logits/rejected': -0.47728431224823, 'epoch': 0.06} + 6%|▌ | 40/681 [01:42<27:29, 2.57s/it] 6%|▌ | 41/681 [01:44<27:27, 2.57s/it] {'loss': 0.867, 'grad_norm': 186.04129028320312, 'learning_rate': 2.898550724637681e-07, 'fcm_dpo/beta': 0.44186830520629883, 'fcm_dpo/q_t': 0.3276433050632477, 'fcm_dpo/delta': -0.45917147397994995, 'fcm_dpo/margin': 1.8601810932159424, 'margin_dpo/margin_mean': 1.8601820468902588, 'margin_dpo/margin_std': 1.9144206047058105, 'logps/chosen': -62.624176025390625, 'logps/rejected': -98.81207275390625, 'logps/ref_chosen': -62.69581604003906, 'logps/ref_rejected': -97.02352905273438, 'KL/chosen_KL_mean': 0.07164192199707031, 'KL/rejected_KL_mean': -1.7885398864746094, 'KL/mean': -0.858450174331665, 'KL/std': 1.770848274230957, 'logits/chosen': -0.5379878282546997, 'logits/rejected': -0.5013633370399475, 'epoch': 0.06} + 6%|▌ | 41/681 [01:44<27:27, 2.57s/it] 6%|▌ | 42/681 [01:47<27:16, 2.56s/it] {'loss': 0.792, 'grad_norm': 167.1330108642578, 'learning_rate': 2.971014492753623e-07, 'fcm_dpo/beta': 0.39385828375816345, 'fcm_dpo/q_t': 0.3034874200820923, 'fcm_dpo/delta': -0.6033186912536621, 'fcm_dpo/margin': 2.401261806488037, 'margin_dpo/margin_mean': 2.4012622833251953, 'margin_dpo/margin_std': 2.241847276687622, 'logps/chosen': -58.74729919433594, 'logps/rejected': -112.09050750732422, 'logps/ref_chosen': -58.966426849365234, 'logps/ref_rejected': -109.90837097167969, 'KL/chosen_KL_mean': 0.21912765502929688, 'KL/rejected_KL_mean': -2.1821327209472656, 'KL/mean': -0.9815043210983276, 'KL/std': 1.9854331016540527, 'logits/chosen': -0.5730389356613159, 'logits/rejected': -0.5269917249679565, 'epoch': 0.06} + 6%|▌ | 42/681 [01:47<27:16, 2.56s/it] 6%|▋ | 43/681 [01:49<27:20, 2.57s/it] {'loss': 0.8149, 'grad_norm': 157.29473876953125, 'learning_rate': 3.043478260869565e-07, 'fcm_dpo/beta': 0.3543139696121216, 'fcm_dpo/q_t': 0.31728753447532654, 'fcm_dpo/delta': -0.4530714154243469, 'fcm_dpo/margin': 2.291576385498047, 'margin_dpo/margin_mean': 2.2915759086608887, 'margin_dpo/margin_std': 1.8112150430679321, 'logps/chosen': -53.62226104736328, 'logps/rejected': -98.238037109375, 'logps/ref_chosen': -54.15599822998047, 'logps/ref_rejected': -96.48019409179688, 'KL/chosen_KL_mean': 0.5337352752685547, 'KL/rejected_KL_mean': -1.757843017578125, 'KL/mean': -0.6120513677597046, 'KL/std': 1.6964552402496338, 'logits/chosen': -0.5307985544204712, 'logits/rejected': -0.505626916885376, 'epoch': 0.06} + 6%|▋ | 43/681 [01:49<27:20, 2.57s/it] 6%|▋ | 44/681 [01:52<27:19, 2.57s/it] {'loss': 0.7988, 'grad_norm': 155.63697814941406, 'learning_rate': 3.115942028985507e-07, 'fcm_dpo/beta': 0.3235365152359009, 'fcm_dpo/q_t': 0.31211215257644653, 'fcm_dpo/delta': -0.48091480135917664, 'fcm_dpo/margin': 2.591776132583618, 'margin_dpo/margin_mean': 2.59177565574646, 'margin_dpo/margin_std': 2.0130257606506348, 'logps/chosen': -49.85892105102539, 'logps/rejected': -111.15596008300781, 'logps/ref_chosen': -50.07849884033203, 'logps/ref_rejected': -108.78376007080078, 'KL/chosen_KL_mean': 0.21957778930664062, 'KL/rejected_KL_mean': -2.3721961975097656, 'KL/mean': -1.076310157775879, 'KL/std': 2.057605266571045, 'logits/chosen': -0.4682161509990692, 'logits/rejected': -0.4482540488243103, 'epoch': 0.06} + 6%|▋ | 44/681 [01:52<27:19, 2.57s/it] 7%|▋ | 45/681 [01:54<27:26, 2.59s/it] {'loss': 0.9597, 'grad_norm': 127.76445007324219, 'learning_rate': 3.188405797101449e-07, 'fcm_dpo/beta': 0.3062588572502136, 'fcm_dpo/q_t': 0.36411553621292114, 'fcm_dpo/delta': -0.24527329206466675, 'fcm_dpo/margin': 2.0580525398254395, 'margin_dpo/margin_mean': 2.0580527782440186, 'margin_dpo/margin_std': 2.3514111042022705, 'logps/chosen': -48.279014587402344, 'logps/rejected': -79.85856628417969, 'logps/ref_chosen': -48.4149284362793, 'logps/ref_rejected': -77.93643188476562, 'KL/chosen_KL_mean': 0.1359119415283203, 'KL/rejected_KL_mean': -1.9221420288085938, 'KL/mean': -0.8931126594543457, 'KL/std': 1.8634648323059082, 'logits/chosen': -0.4810647964477539, 'logits/rejected': -0.46846526861190796, 'epoch': 0.07} + 7%|▋ | 45/681 [01:54<27:26, 2.59s/it] 7%|▋ | 46/681 [01:57<27:38, 2.61s/it] {'loss': 0.8899, 'grad_norm': 132.63743591308594, 'learning_rate': 3.260869565217391e-07, 'fcm_dpo/beta': 0.2851349711418152, 'fcm_dpo/q_t': 0.33826661109924316, 'fcm_dpo/delta': -0.3946601152420044, 'fcm_dpo/margin': 2.6835451126098633, 'margin_dpo/margin_mean': 2.683544158935547, 'margin_dpo/margin_std': 2.860718250274658, 'logps/chosen': -55.794944763183594, 'logps/rejected': -98.13165283203125, 'logps/ref_chosen': -55.999427795410156, 'logps/ref_rejected': -95.652587890625, 'KL/chosen_KL_mean': 0.20447921752929688, 'KL/rejected_KL_mean': -2.4790611267089844, 'KL/mean': -1.1372920274734497, 'KL/std': 2.375460624694824, 'logits/chosen': -0.5322977900505066, 'logits/rejected': -0.4815298914909363, 'epoch': 0.07} + 7%|▋ | 46/681 [01:57<27:38, 2.61s/it] 7%|▋ | 47/681 [02:00<27:41, 2.62s/it] {'loss': 0.8904, 'grad_norm': 126.02400207519531, 'learning_rate': 3.333333333333333e-07, 'fcm_dpo/beta': 0.2670041620731354, 'fcm_dpo/q_t': 0.3423752188682556, 'fcm_dpo/delta': -0.328019380569458, 'fcm_dpo/margin': 2.6440370082855225, 'margin_dpo/margin_mean': 2.6440372467041016, 'margin_dpo/margin_std': 2.386019229888916, 'logps/chosen': -57.54547882080078, 'logps/rejected': -96.94264221191406, 'logps/ref_chosen': -57.92607879638672, 'logps/ref_rejected': -94.67920684814453, 'KL/chosen_KL_mean': 0.3806018829345703, 'KL/rejected_KL_mean': -2.2634353637695312, 'KL/mean': -0.9414160251617432, 'KL/std': 2.2800047397613525, 'logits/chosen': -0.5748400688171387, 'logits/rejected': -0.5222221612930298, 'epoch': 0.07} + 7%|▋ | 47/681 [02:00<27:41, 2.62s/it] 7%|▋ | 48/681 [02:02<27:57, 2.65s/it] {'loss': 0.9335, 'grad_norm': 138.55894470214844, 'learning_rate': 3.4057971014492755e-07, 'fcm_dpo/beta': 0.2488497495651245, 'fcm_dpo/q_t': 0.35341036319732666, 'fcm_dpo/delta': -0.27214479446411133, 'fcm_dpo/margin': 2.618363857269287, 'margin_dpo/margin_mean': 2.618363618850708, 'margin_dpo/margin_std': 2.488217353820801, 'logps/chosen': -57.103981018066406, 'logps/rejected': -90.55087280273438, 'logps/ref_chosen': -57.188072204589844, 'logps/ref_rejected': -88.0166015625, 'KL/chosen_KL_mean': 0.08409309387207031, 'KL/rejected_KL_mean': -2.534271240234375, 'KL/mean': -1.2250878810882568, 'KL/std': 2.2343883514404297, 'logits/chosen': -0.5984026193618774, 'logits/rejected': -0.5413084030151367, 'epoch': 0.07} + 7%|▋ | 48/681 [02:02<27:57, 2.65s/it] 7%|▋ | 49/681 [02:05<27:40, 2.63s/it] {'loss': 0.8935, 'grad_norm': 104.59957122802734, 'learning_rate': 3.478260869565217e-07, 'fcm_dpo/beta': 0.23550444841384888, 'fcm_dpo/q_t': 0.34103497862815857, 'fcm_dpo/delta': -0.3564870357513428, 'fcm_dpo/margin': 3.106010913848877, 'margin_dpo/margin_mean': 3.106010675430298, 'margin_dpo/margin_std': 3.1022186279296875, 'logps/chosen': -61.2507209777832, 'logps/rejected': -86.4389419555664, 'logps/ref_chosen': -61.685272216796875, 'logps/ref_rejected': -83.76747131347656, 'KL/chosen_KL_mean': 0.43454933166503906, 'KL/rejected_KL_mean': -2.671466827392578, 'KL/mean': -1.1184592247009277, 'KL/std': 2.7026281356811523, 'logits/chosen': -0.5330841541290283, 'logits/rejected': -0.47351568937301636, 'epoch': 0.07} + 7%|▋ | 49/681 [02:05<27:40, 2.63s/it] 7%|▋ | 50/681 [02:08<27:47, 2.64s/it] {'loss': 0.8683, 'grad_norm': 104.01911163330078, 'learning_rate': 3.5507246376811595e-07, 'fcm_dpo/beta': 0.21684028208255768, 'fcm_dpo/q_t': 0.333289235830307, 'fcm_dpo/delta': -0.38183990120887756, 'fcm_dpo/margin': 3.4672958850860596, 'margin_dpo/margin_mean': 3.4672961235046387, 'margin_dpo/margin_std': 3.1691863536834717, 'logps/chosen': -58.690879821777344, 'logps/rejected': -99.79219055175781, 'logps/ref_chosen': -58.72413635253906, 'logps/ref_rejected': -96.35814666748047, 'KL/chosen_KL_mean': 0.03325843811035156, 'KL/rejected_KL_mean': -3.434040069580078, 'KL/mean': -1.700391173362732, 'KL/std': 2.8286612033843994, 'logits/chosen': -0.5359183549880981, 'logits/rejected': -0.499971866607666, 'epoch': 0.07} + 7%|▋ | 50/681 [02:08<27:47, 2.64s/it] 7%|▋ | 51/681 [02:10<27:41, 2.64s/it] {'loss': 0.9468, 'grad_norm': 80.95540618896484, 'learning_rate': 3.6231884057971015e-07, 'fcm_dpo/beta': 0.20151767134666443, 'fcm_dpo/q_t': 0.3550441563129425, 'fcm_dpo/delta': -0.32323533296585083, 'fcm_dpo/margin': 3.4574427604675293, 'margin_dpo/margin_mean': 3.4574432373046875, 'margin_dpo/margin_std': 4.162242889404297, 'logps/chosen': -61.46109390258789, 'logps/rejected': -79.546875, 'logps/ref_chosen': -61.3736686706543, 'logps/ref_rejected': -76.00199890136719, 'KL/chosen_KL_mean': -0.08742523193359375, 'KL/rejected_KL_mean': -3.5448684692382812, 'KL/mean': -1.8161455392837524, 'KL/std': 3.425395965576172, 'logits/chosen': -0.4952942132949829, 'logits/rejected': -0.46139243245124817, 'epoch': 0.07} + 7%|▋ | 51/681 [02:10<27:41, 2.64s/it] 8%|▊ | 52/681 [02:13<27:00, 2.58s/it] {'loss': 0.7499, 'grad_norm': 79.82083129882812, 'learning_rate': 3.695652173913043e-07, 'fcm_dpo/beta': 0.181796133518219, 'fcm_dpo/q_t': 0.2899671792984009, 'fcm_dpo/delta': -0.6337956190109253, 'fcm_dpo/margin': 5.342073440551758, 'margin_dpo/margin_mean': 5.342073440551758, 'margin_dpo/margin_std': 4.25220251083374, 'logps/chosen': -51.770721435546875, 'logps/rejected': -84.74935150146484, 'logps/ref_chosen': -52.33735656738281, 'logps/ref_rejected': -79.97391510009766, 'KL/chosen_KL_mean': 0.5666332244873047, 'KL/rejected_KL_mean': -4.7754364013671875, 'KL/mean': -2.1044023036956787, 'KL/std': 4.042649269104004, 'logits/chosen': -0.5962961912155151, 'logits/rejected': -0.543999433517456, 'epoch': 0.08} + 8%|▊ | 52/681 [02:13<27:00, 2.58s/it] 8%|▊ | 53/681 [02:15<26:54, 2.57s/it] {'loss': 0.8463, 'grad_norm': 81.50251007080078, 'learning_rate': 3.7681159420289855e-07, 'fcm_dpo/beta': 0.16499710083007812, 'fcm_dpo/q_t': 0.3234487771987915, 'fcm_dpo/delta': -0.5135352611541748, 'fcm_dpo/margin': 5.282422065734863, 'margin_dpo/margin_mean': 5.282422065734863, 'margin_dpo/margin_std': 5.204236030578613, 'logps/chosen': -53.352622985839844, 'logps/rejected': -97.1039810180664, 'logps/ref_chosen': -53.31465148925781, 'logps/ref_rejected': -91.78359985351562, 'KL/chosen_KL_mean': -0.03797149658203125, 'KL/rejected_KL_mean': -5.320384979248047, 'KL/mean': -2.6791794300079346, 'KL/std': 4.5573835372924805, 'logits/chosen': -0.6104946136474609, 'logits/rejected': -0.588903546333313, 'epoch': 0.08} + 8%|▊ | 53/681 [02:15<26:54, 2.57s/it] 8%|▊ | 54/681 [02:18<26:19, 2.52s/it] {'loss': 0.8869, 'grad_norm': 71.17793273925781, 'learning_rate': 3.8405797101449274e-07, 'fcm_dpo/beta': 0.15150442719459534, 'fcm_dpo/q_t': 0.34295719861984253, 'fcm_dpo/delta': -0.33330458402633667, 'fcm_dpo/margin': 4.68695068359375, 'margin_dpo/margin_mean': 4.68695068359375, 'margin_dpo/margin_std': 4.375544548034668, 'logps/chosen': -50.8241081237793, 'logps/rejected': -96.53778839111328, 'logps/ref_chosen': -50.68865966796875, 'logps/ref_rejected': -91.71539306640625, 'KL/chosen_KL_mean': -0.13544654846191406, 'KL/rejected_KL_mean': -4.822395324707031, 'KL/mean': -2.47892427444458, 'KL/std': 4.284974098205566, 'logits/chosen': -0.5825854539871216, 'logits/rejected': -0.528401255607605, 'epoch': 0.08} + 8%|▊ | 54/681 [02:18<26:19, 2.52s/it] 8%|▊ | 55/681 [02:20<25:24, 2.44s/it] {'loss': 0.9007, 'grad_norm': 67.65316772460938, 'learning_rate': 3.9130434782608694e-07, 'fcm_dpo/beta': 0.14030683040618896, 'fcm_dpo/q_t': 0.33590346574783325, 'fcm_dpo/delta': -0.4165322184562683, 'fcm_dpo/margin': 5.594724178314209, 'margin_dpo/margin_mean': 5.594723701477051, 'margin_dpo/margin_std': 6.235048294067383, 'logps/chosen': -63.16775131225586, 'logps/rejected': -95.1407470703125, 'logps/ref_chosen': -62.615234375, 'logps/ref_rejected': -88.99349975585938, 'KL/chosen_KL_mean': -0.5525169372558594, 'KL/rejected_KL_mean': -6.147243499755859, 'KL/mean': -3.349881887435913, 'KL/std': 5.250433921813965, 'logits/chosen': -0.6309506893157959, 'logits/rejected': -0.5673823952674866, 'epoch': 0.08} + 8%|▊ | 55/681 [02:20<25:24, 2.44s/it] 8%|▊ | 56/681 [02:23<25:56, 2.49s/it] {'loss': 0.9377, 'grad_norm': 56.44548034667969, 'learning_rate': 3.9855072463768114e-07, 'fcm_dpo/beta': 0.13035638630390167, 'fcm_dpo/q_t': 0.3499238193035126, 'fcm_dpo/delta': -0.32700973749160767, 'fcm_dpo/margin': 5.401305198669434, 'margin_dpo/margin_mean': 5.401305198669434, 'margin_dpo/margin_std': 6.176411151885986, 'logps/chosen': -58.2624626159668, 'logps/rejected': -99.90547180175781, 'logps/ref_chosen': -57.9327278137207, 'logps/ref_rejected': -94.1744384765625, 'KL/chosen_KL_mean': -0.32973480224609375, 'KL/rejected_KL_mean': -5.7310333251953125, 'KL/mean': -3.0303850173950195, 'KL/std': 5.075455665588379, 'logits/chosen': -0.5875349044799805, 'logits/rejected': -0.5435941815376282, 'epoch': 0.08} + 8%|▊ | 56/681 [02:23<25:56, 2.49s/it] 8%|▊ | 57/681 [02:25<26:00, 2.50s/it] {'loss': 0.8807, 'grad_norm': 62.98070526123047, 'learning_rate': 4.057971014492754e-07, 'fcm_dpo/beta': 0.12205598503351212, 'fcm_dpo/q_t': 0.3362714648246765, 'fcm_dpo/delta': -0.35622814297676086, 'fcm_dpo/margin': 5.9908952713012695, 'margin_dpo/margin_mean': 5.9908952713012695, 'margin_dpo/margin_std': 5.452801704406738, 'logps/chosen': -70.89765930175781, 'logps/rejected': -101.958740234375, 'logps/ref_chosen': -70.49528503417969, 'logps/ref_rejected': -95.56546020507812, 'KL/chosen_KL_mean': -0.4023780822753906, 'KL/rejected_KL_mean': -6.393280029296875, 'KL/mean': -3.3978283405303955, 'KL/std': 5.025920867919922, 'logits/chosen': -0.5721724033355713, 'logits/rejected': -0.544060230255127, 'epoch': 0.08} + 8%|▊ | 57/681 [02:25<26:00, 2.50s/it] 9%|▊ | 58/681 [02:28<26:29, 2.55s/it] {'loss': 0.8841, 'grad_norm': 63.307865142822266, 'learning_rate': 4.1304347826086954e-07, 'fcm_dpo/beta': 0.11291979253292084, 'fcm_dpo/q_t': 0.33498990535736084, 'fcm_dpo/delta': -0.4023542106151581, 'fcm_dpo/margin': 6.84135627746582, 'margin_dpo/margin_mean': 6.841357231140137, 'margin_dpo/margin_std': 6.957030296325684, 'logps/chosen': -62.59377670288086, 'logps/rejected': -91.91949462890625, 'logps/ref_chosen': -62.13294219970703, 'logps/ref_rejected': -84.61729431152344, 'KL/chosen_KL_mean': -0.46083641052246094, 'KL/rejected_KL_mean': -7.3022003173828125, 'KL/mean': -3.8815183639526367, 'KL/std': 5.823391437530518, 'logits/chosen': -0.574745237827301, 'logits/rejected': -0.4966890215873718, 'epoch': 0.09} + 9%|▊ | 58/681 [02:28<26:29, 2.55s/it] 9%|▊ | 59/681 [02:30<26:31, 2.56s/it] {'loss': 0.8868, 'grad_norm': 60.765480041503906, 'learning_rate': 4.2028985507246374e-07, 'fcm_dpo/beta': 0.10300938785076141, 'fcm_dpo/q_t': 0.33733034133911133, 'fcm_dpo/delta': -0.3802601099014282, 'fcm_dpo/margin': 7.246167182922363, 'margin_dpo/margin_mean': 7.246167182922363, 'margin_dpo/margin_std': 7.133745193481445, 'logps/chosen': -52.88475036621094, 'logps/rejected': -97.08358764648438, 'logps/ref_chosen': -51.932525634765625, 'logps/ref_rejected': -88.88520050048828, 'KL/chosen_KL_mean': -0.9522266387939453, 'KL/rejected_KL_mean': -8.19839096069336, 'KL/mean': -4.575308799743652, 'KL/std': 6.170098304748535, 'logits/chosen': -0.6427372694015503, 'logits/rejected': -0.6026915311813354, 'epoch': 0.09} + 9%|▊ | 59/681 [02:30<26:31, 2.56s/it] 9%|▉ | 60/681 [02:33<26:24, 2.55s/it] {'loss': 0.9761, 'grad_norm': 63.796966552734375, 'learning_rate': 4.2753623188405794e-07, 'fcm_dpo/beta': 0.09867256879806519, 'fcm_dpo/q_t': 0.36642712354660034, 'fcm_dpo/delta': -0.19654613733291626, 'fcm_dpo/margin': 5.918500900268555, 'margin_dpo/margin_mean': 5.918500900268555, 'margin_dpo/margin_std': 6.41326379776001, 'logps/chosen': -62.80546569824219, 'logps/rejected': -93.17517852783203, 'logps/ref_chosen': -60.94218826293945, 'logps/ref_rejected': -85.39340209960938, 'KL/chosen_KL_mean': -1.8632774353027344, 'KL/rejected_KL_mean': -7.781780242919922, 'KL/mean': -4.822530746459961, 'KL/std': 5.658910751342773, 'logits/chosen': -0.6072988510131836, 'logits/rejected': -0.5472843050956726, 'epoch': 0.09} + 9%|▉ | 60/681 [02:33<26:24, 2.55s/it] 9%|▉ | 61/681 [02:35<26:34, 2.57s/it] {'loss': 0.9652, 'grad_norm': 51.70878982543945, 'learning_rate': 4.3478260869565214e-07, 'fcm_dpo/beta': 0.09338235855102539, 'fcm_dpo/q_t': 0.3608711063861847, 'fcm_dpo/delta': -0.2975808084011078, 'fcm_dpo/margin': 7.234995365142822, 'margin_dpo/margin_mean': 7.234993934631348, 'margin_dpo/margin_std': 9.499799728393555, 'logps/chosen': -61.510894775390625, 'logps/rejected': -97.96485900878906, 'logps/ref_chosen': -60.633522033691406, 'logps/ref_rejected': -89.85249328613281, 'KL/chosen_KL_mean': -0.8773689270019531, 'KL/rejected_KL_mean': -8.11236572265625, 'KL/mean': -4.494866371154785, 'KL/std': 7.751307487487793, 'logits/chosen': -0.5910314321517944, 'logits/rejected': -0.5556684732437134, 'epoch': 0.09} + 9%|▉ | 61/681 [02:36<26:34, 2.57s/it] 9%|▉ | 62/681 [02:38<27:01, 2.62s/it] {'loss': 1.0326, 'grad_norm': 50.53475570678711, 'learning_rate': 4.420289855072464e-07, 'fcm_dpo/beta': 0.09132882952690125, 'fcm_dpo/q_t': 0.3879823684692383, 'fcm_dpo/delta': -0.10761071741580963, 'fcm_dpo/margin': 5.499805927276611, 'margin_dpo/margin_mean': 5.499805450439453, 'margin_dpo/margin_std': 6.748025894165039, 'logps/chosen': -57.23579788208008, 'logps/rejected': -82.15103149414062, 'logps/ref_chosen': -56.15077209472656, 'logps/ref_rejected': -75.56619262695312, 'KL/chosen_KL_mean': -1.0850257873535156, 'KL/rejected_KL_mean': -6.584831237792969, 'KL/mean': -3.8349273204803467, 'KL/std': 5.598065376281738, 'logits/chosen': -0.6022673845291138, 'logits/rejected': -0.5683047771453857, 'epoch': 0.09} + 9%|▉ | 62/681 [02:38<27:01, 2.62s/it] 9%|▉ | 63/681 [02:41<26:42, 2.59s/it] {'loss': 0.9392, 'grad_norm': 50.32809829711914, 'learning_rate': 4.4927536231884053e-07, 'fcm_dpo/beta': 0.08644914627075195, 'fcm_dpo/q_t': 0.35505515336990356, 'fcm_dpo/delta': -0.26188862323760986, 'fcm_dpo/margin': 7.43485164642334, 'margin_dpo/margin_mean': 7.434851169586182, 'margin_dpo/margin_std': 7.581734657287598, 'logps/chosen': -75.032958984375, 'logps/rejected': -106.93048095703125, 'logps/ref_chosen': -73.14739227294922, 'logps/ref_rejected': -97.61006164550781, 'KL/chosen_KL_mean': -1.885568618774414, 'KL/rejected_KL_mean': -9.320415496826172, 'KL/mean': -5.602993011474609, 'KL/std': 7.238819122314453, 'logits/chosen': -0.5845507383346558, 'logits/rejected': -0.5376572012901306, 'epoch': 0.09} + 9%|▉ | 63/681 [02:41<26:42, 2.59s/it] 9%|▉ | 64/681 [02:43<26:14, 2.55s/it] {'loss': 0.9256, 'grad_norm': 45.80192947387695, 'learning_rate': 4.5652173913043473e-07, 'fcm_dpo/beta': 0.08089442551136017, 'fcm_dpo/q_t': 0.3481459617614746, 'fcm_dpo/delta': -0.3278960585594177, 'fcm_dpo/margin': 8.647797584533691, 'margin_dpo/margin_mean': 8.647798538208008, 'margin_dpo/margin_std': 9.425071716308594, 'logps/chosen': -54.534053802490234, 'logps/rejected': -102.71345520019531, 'logps/ref_chosen': -53.998600006103516, 'logps/ref_rejected': -93.53019714355469, 'KL/chosen_KL_mean': -0.5354537963867188, 'KL/rejected_KL_mean': -9.183258056640625, 'KL/mean': -4.859354496002197, 'KL/std': 7.766883850097656, 'logits/chosen': -0.5578076243400574, 'logits/rejected': -0.5246820449829102, 'epoch': 0.09} + 9%|▉ | 64/681 [02:43<26:14, 2.55s/it] 10%|▉ | 65/681 [02:46<26:23, 2.57s/it] {'loss': 0.9421, 'grad_norm': 46.513771057128906, 'learning_rate': 4.63768115942029e-07, 'fcm_dpo/beta': 0.076596200466156, 'fcm_dpo/q_t': 0.35113364458084106, 'fcm_dpo/delta': -0.2893035411834717, 'fcm_dpo/margin': 8.720873832702637, 'margin_dpo/margin_mean': 8.720873832702637, 'margin_dpo/margin_std': 9.467870712280273, 'logps/chosen': -67.2239761352539, 'logps/rejected': -121.05531311035156, 'logps/ref_chosen': -64.83599853515625, 'logps/ref_rejected': -109.94645690917969, 'KL/chosen_KL_mean': -2.3879737854003906, 'KL/rejected_KL_mean': -11.108848571777344, 'KL/mean': -6.748409271240234, 'KL/std': 8.675431251525879, 'logits/chosen': -0.6502401828765869, 'logits/rejected': -0.636010468006134, 'epoch': 0.1} + 10%|▉ | 65/681 [02:46<26:23, 2.57s/it] 10%|▉ | 66/681 [02:48<26:35, 2.60s/it] {'loss': 0.9797, 'grad_norm': 40.38670349121094, 'learning_rate': 4.7101449275362313e-07, 'fcm_dpo/beta': 0.07299195230007172, 'fcm_dpo/q_t': 0.3702622056007385, 'fcm_dpo/delta': -0.20810872316360474, 'fcm_dpo/margin': 8.126091957092285, 'margin_dpo/margin_mean': 8.126091003417969, 'margin_dpo/margin_std': 9.491106986999512, 'logps/chosen': -53.66209411621094, 'logps/rejected': -85.98096466064453, 'logps/ref_chosen': -51.44352722167969, 'logps/ref_rejected': -75.63629913330078, 'KL/chosen_KL_mean': -2.218568801879883, 'KL/rejected_KL_mean': -10.34466552734375, 'KL/mean': -6.281618118286133, 'KL/std': 7.9723615646362305, 'logits/chosen': -0.6466140747070312, 'logits/rejected': -0.613680362701416, 'epoch': 0.1} + 10%|▉ | 66/681 [02:49<26:35, 2.60s/it] 10%|▉ | 67/681 [02:51<25:34, 2.50s/it] {'loss': 0.9742, 'grad_norm': 41.76984786987305, 'learning_rate': 4.782608695652174e-07, 'fcm_dpo/beta': 0.07074415683746338, 'fcm_dpo/q_t': 0.3689546287059784, 'fcm_dpo/delta': -0.20659056305885315, 'fcm_dpo/margin': 8.405787467956543, 'margin_dpo/margin_mean': 8.405787467956543, 'margin_dpo/margin_std': 9.565013885498047, 'logps/chosen': -61.260093688964844, 'logps/rejected': -83.11235809326172, 'logps/ref_chosen': -59.34080505371094, 'logps/ref_rejected': -72.78728485107422, 'KL/chosen_KL_mean': -1.9192867279052734, 'KL/rejected_KL_mean': -10.3250732421875, 'KL/mean': -6.12217903137207, 'KL/std': 7.945716857910156, 'logits/chosen': -0.5952399969100952, 'logits/rejected': -0.5532902479171753, 'epoch': 0.1} + 10%|▉ | 67/681 [02:51<25:34, 2.50s/it] 10%|▉ | 68/681 [02:53<25:16, 2.47s/it] {'loss': 0.9769, 'grad_norm': 39.69685363769531, 'learning_rate': 4.855072463768116e-07, 'fcm_dpo/beta': 0.06832877546548843, 'fcm_dpo/q_t': 0.37427279353141785, 'fcm_dpo/delta': -0.16015967726707458, 'fcm_dpo/margin': 8.072154998779297, 'margin_dpo/margin_mean': 8.072154998779297, 'margin_dpo/margin_std': 8.083388328552246, 'logps/chosen': -67.28584289550781, 'logps/rejected': -87.35942077636719, 'logps/ref_chosen': -65.2058334350586, 'logps/ref_rejected': -77.20724487304688, 'KL/chosen_KL_mean': -2.0800132751464844, 'KL/rejected_KL_mean': -10.152172088623047, 'KL/mean': -6.116093635559082, 'KL/std': 7.245296478271484, 'logits/chosen': -0.6283758878707886, 'logits/rejected': -0.5697811841964722, 'epoch': 0.1} + 10%|▉ | 68/681 [02:53<25:16, 2.47s/it] 10%|█ | 69/681 [02:56<25:57, 2.55s/it] {'loss': 0.9315, 'grad_norm': 42.51207733154297, 'learning_rate': 4.927536231884058e-07, 'fcm_dpo/beta': 0.06493359059095383, 'fcm_dpo/q_t': 0.35869020223617554, 'fcm_dpo/delta': -0.24463072419166565, 'fcm_dpo/margin': 9.670412063598633, 'margin_dpo/margin_mean': 9.67041301727295, 'margin_dpo/margin_std': 9.319877624511719, 'logps/chosen': -62.40225601196289, 'logps/rejected': -115.64228820800781, 'logps/ref_chosen': -59.81924057006836, 'logps/ref_rejected': -103.38886260986328, 'KL/chosen_KL_mean': -2.5830154418945312, 'KL/rejected_KL_mean': -12.253425598144531, 'KL/mean': -7.418220520019531, 'KL/std': 8.492610931396484, 'logits/chosen': -0.587549090385437, 'logits/rejected': -0.5634763240814209, 'epoch': 0.1} + 10%|█ | 69/681 [02:56<25:57, 2.55s/it] 10%|█ | 70/681 [02:58<25:40, 2.52s/it] {'loss': 0.9407, 'grad_norm': 42.537837982177734, 'learning_rate': 5e-07, 'fcm_dpo/beta': 0.06210237741470337, 'fcm_dpo/q_t': 0.3578363060951233, 'fcm_dpo/delta': -0.2620813846588135, 'fcm_dpo/margin': 10.396703720092773, 'margin_dpo/margin_mean': 10.396702766418457, 'margin_dpo/margin_std': 10.94558048248291, 'logps/chosen': -65.86672973632812, 'logps/rejected': -105.39356994628906, 'logps/ref_chosen': -61.930641174316406, 'logps/ref_rejected': -91.06078338623047, 'KL/chosen_KL_mean': -3.936086654663086, 'KL/rejected_KL_mean': -14.33279037475586, 'KL/mean': -9.134437561035156, 'KL/std': 10.255716323852539, 'logits/chosen': -0.6045395731925964, 'logits/rejected': -0.5682834386825562, 'epoch': 0.1} + 10%|█ | 70/681 [02:58<25:40, 2.52s/it] 10%|█ | 71/681 [03:01<25:42, 2.53s/it] {'loss': 0.8977, 'grad_norm': 39.08191680908203, 'learning_rate': 4.999967061337492e-07, 'fcm_dpo/beta': 0.05811074376106262, 'fcm_dpo/q_t': 0.34550318121910095, 'fcm_dpo/delta': -0.32751208543777466, 'fcm_dpo/margin': 12.11224365234375, 'margin_dpo/margin_mean': 12.112241744995117, 'margin_dpo/margin_std': 11.558072090148926, 'logps/chosen': -65.2624740600586, 'logps/rejected': -112.96099853515625, 'logps/ref_chosen': -61.750335693359375, 'logps/ref_rejected': -97.33662414550781, 'KL/chosen_KL_mean': -3.512136459350586, 'KL/rejected_KL_mean': -15.624378204345703, 'KL/mean': -9.568258285522461, 'KL/std': 10.597580909729004, 'logits/chosen': -0.6395025253295898, 'logits/rejected': -0.5973723530769348, 'epoch': 0.1} + 10%|█ | 71/681 [03:01<25:42, 2.53s/it] 11%|█ | 72/681 [03:04<25:58, 2.56s/it] {'loss': 0.9069, 'grad_norm': 39.94166564941406, 'learning_rate': 4.999868246217933e-07, 'fcm_dpo/beta': 0.0543680340051651, 'fcm_dpo/q_t': 0.34477001428604126, 'fcm_dpo/delta': -0.32582148909568787, 'fcm_dpo/margin': 12.908900260925293, 'margin_dpo/margin_mean': 12.908900260925293, 'margin_dpo/margin_std': 13.00861930847168, 'logps/chosen': -70.10586547851562, 'logps/rejected': -112.24834442138672, 'logps/ref_chosen': -66.05341339111328, 'logps/ref_rejected': -95.2869873046875, 'KL/chosen_KL_mean': -4.052457809448242, 'KL/rejected_KL_mean': -16.96135711669922, 'KL/mean': -10.506906509399414, 'KL/std': 11.406668663024902, 'logits/chosen': -0.6469070911407471, 'logits/rejected': -0.6107661128044128, 'epoch': 0.11} + 11%|█ | 72/681 [03:04<25:58, 2.56s/it] 11%|█ | 73/681 [03:06<26:23, 2.60s/it] {'loss': 1.0119, 'grad_norm': 38.19060134887695, 'learning_rate': 4.999703557245192e-07, 'fcm_dpo/beta': 0.05116545781493187, 'fcm_dpo/q_t': 0.36612752079963684, 'fcm_dpo/delta': -0.28520524501800537, 'fcm_dpo/margin': 12.989622116088867, 'margin_dpo/margin_mean': 12.989622116088867, 'margin_dpo/margin_std': 18.78784942626953, 'logps/chosen': -72.19349670410156, 'logps/rejected': -109.38298034667969, 'logps/ref_chosen': -66.25627136230469, 'logps/ref_rejected': -90.45613098144531, 'KL/chosen_KL_mean': -5.937223434448242, 'KL/rejected_KL_mean': -18.92684555053711, 'KL/mean': -12.432035446166992, 'KL/std': 15.173410415649414, 'logits/chosen': -0.6732739806175232, 'logits/rejected': -0.6307477951049805, 'epoch': 0.11} + 11%|█ | 73/681 [03:06<26:23, 2.60s/it] 11%|█ | 74/681 [03:09<26:07, 2.58s/it] {'loss': 0.9834, 'grad_norm': 39.493221282958984, 'learning_rate': 4.999472998758977e-07, 'fcm_dpo/beta': 0.048250216990709305, 'fcm_dpo/q_t': 0.3610179126262665, 'fcm_dpo/delta': -0.3053615391254425, 'fcm_dpo/margin': 14.162809371948242, 'margin_dpo/margin_mean': 14.162809371948242, 'margin_dpo/margin_std': 20.69675636291504, 'logps/chosen': -60.067047119140625, 'logps/rejected': -116.75191497802734, 'logps/ref_chosen': -53.42488098144531, 'logps/ref_rejected': -95.94693756103516, 'KL/chosen_KL_mean': -6.64216423034668, 'KL/rejected_KL_mean': -20.804977416992188, 'KL/mean': -13.723569869995117, 'KL/std': 16.866836547851562, 'logits/chosen': -0.6567627191543579, 'logits/rejected': -0.6456471681594849, 'epoch': 0.11} + 11%|█ | 74/681 [03:09<26:07, 2.58s/it] 11%|█ | 75/681 [03:11<26:17, 2.60s/it] {'loss': 0.832, 'grad_norm': 34.22724533081055, 'learning_rate': 4.999176576834721e-07, 'fcm_dpo/beta': 0.04431544989347458, 'fcm_dpo/q_t': 0.31586113572120667, 'fcm_dpo/delta': -0.5371890068054199, 'fcm_dpo/margin': 20.119972229003906, 'margin_dpo/margin_mean': 20.119970321655273, 'margin_dpo/margin_std': 19.77768325805664, 'logps/chosen': -58.43212890625, 'logps/rejected': -137.9444122314453, 'logps/ref_chosen': -51.861663818359375, 'logps/ref_rejected': -111.25398254394531, 'KL/chosen_KL_mean': -6.570465087890625, 'KL/rejected_KL_mean': -26.6904296875, 'KL/mean': -16.630451202392578, 'KL/std': 17.67294692993164, 'logits/chosen': -0.6665393114089966, 'logits/rejected': -0.656915009021759, 'epoch': 0.11} + 11%|█ | 75/681 [03:11<26:17, 2.60s/it] 11%|█ | 76/681 [03:14<26:09, 2.59s/it] {'loss': 1.0058, 'grad_norm': 34.15813446044922, 'learning_rate': 4.998814299283415e-07, 'fcm_dpo/beta': 0.041812874376773834, 'fcm_dpo/q_t': 0.3751528859138489, 'fcm_dpo/delta': -0.1574609875679016, 'fcm_dpo/margin': 13.126352310180664, 'margin_dpo/margin_mean': 13.126352310180664, 'margin_dpo/margin_std': 15.78203010559082, 'logps/chosen': -61.45259094238281, 'logps/rejected': -99.52952575683594, 'logps/ref_chosen': -53.26603698730469, 'logps/ref_rejected': -78.21662902832031, 'KL/chosen_KL_mean': -8.186553955078125, 'KL/rejected_KL_mean': -21.31290054321289, 'KL/mean': -14.74972915649414, 'KL/std': 14.5498685836792, 'logits/chosen': -0.6963686943054199, 'logits/rejected': -0.6531896591186523, 'epoch': 0.11} + 11%|█ | 76/681 [03:14<26:09, 2.59s/it] 11%|█▏ | 77/681 [03:16<25:07, 2.50s/it] {'loss': 0.8772, 'grad_norm': 34.96170425415039, 'learning_rate': 4.998386175651409e-07, 'fcm_dpo/beta': 0.03852991759777069, 'fcm_dpo/q_t': 0.3251643776893616, 'fcm_dpo/delta': -0.4501330256462097, 'fcm_dpo/margin': 21.019004821777344, 'margin_dpo/margin_mean': 21.019004821777344, 'margin_dpo/margin_std': 21.777103424072266, 'logps/chosen': -65.40584564208984, 'logps/rejected': -122.10179138183594, 'logps/ref_chosen': -58.0966796875, 'logps/ref_rejected': -93.77361297607422, 'KL/chosen_KL_mean': -7.309167861938477, 'KL/rejected_KL_mean': -28.328174591064453, 'KL/mean': -17.81867218017578, 'KL/std': 19.93675994873047, 'logits/chosen': -0.6893630623817444, 'logits/rejected': -0.6500611305236816, 'epoch': 0.11} + 11%|█▏ | 77/681 [03:16<25:07, 2.50s/it] 11%|█▏ | 78/681 [03:19<25:34, 2.55s/it] {'loss': 0.976, 'grad_norm': 31.654874801635742, 'learning_rate': 4.997892217220159e-07, 'fcm_dpo/beta': 0.03683791682124138, 'fcm_dpo/q_t': 0.36780738830566406, 'fcm_dpo/delta': -0.21150818467140198, 'fcm_dpo/margin': 16.267370223999023, 'margin_dpo/margin_mean': 16.267372131347656, 'margin_dpo/margin_std': 18.412311553955078, 'logps/chosen': -63.101287841796875, 'logps/rejected': -108.68923950195312, 'logps/ref_chosen': -55.61378479003906, 'logps/ref_rejected': -84.93436431884766, 'KL/chosen_KL_mean': -7.4875030517578125, 'KL/rejected_KL_mean': -23.754878997802734, 'KL/mean': -15.621191024780273, 'KL/std': 16.25094223022461, 'logits/chosen': -0.6330477595329285, 'logits/rejected': -0.6059073209762573, 'epoch': 0.11} + 11%|█▏ | 78/681 [03:19<25:34, 2.55s/it] 12%|█▏ | 79/681 [03:22<25:47, 2.57s/it] {'loss': 0.9887, 'grad_norm': 28.002782821655273, 'learning_rate': 4.997332437005931e-07, 'fcm_dpo/beta': 0.03503450006246567, 'fcm_dpo/q_t': 0.36850211024284363, 'fcm_dpo/delta': -0.24067077040672302, 'fcm_dpo/margin': 17.853469848632812, 'margin_dpo/margin_mean': 17.85346794128418, 'margin_dpo/margin_std': 22.688926696777344, 'logps/chosen': -63.323951721191406, 'logps/rejected': -113.37451171875, 'logps/ref_chosen': -55.45048522949219, 'logps/ref_rejected': -87.64756774902344, 'KL/chosen_KL_mean': -7.873466491699219, 'KL/rejected_KL_mean': -25.726943969726562, 'KL/mean': -16.800203323364258, 'KL/std': 18.54357147216797, 'logits/chosen': -0.67842698097229, 'logits/rejected': -0.6490979194641113, 'epoch': 0.12} + 12%|█▏ | 79/681 [03:22<25:47, 2.57s/it] 12%|█▏ | 80/681 [03:24<25:39, 2.56s/it] {'loss': 1.0334, 'grad_norm': 30.331220626831055, 'learning_rate': 4.996706849759452e-07, 'fcm_dpo/beta': 0.03348912298679352, 'fcm_dpo/q_t': 0.3835224509239197, 'fcm_dpo/delta': -0.16750264167785645, 'fcm_dpo/margin': 16.589031219482422, 'margin_dpo/margin_mean': 16.589031219482422, 'margin_dpo/margin_std': 22.471614837646484, 'logps/chosen': -69.18228149414062, 'logps/rejected': -114.79953002929688, 'logps/ref_chosen': -58.519290924072266, 'logps/ref_rejected': -87.54750061035156, 'KL/chosen_KL_mean': -10.66299057006836, 'KL/rejected_KL_mean': -27.252025604248047, 'KL/mean': -18.957509994506836, 'KL/std': 19.518186569213867, 'logits/chosen': -0.7170759439468384, 'logits/rejected': -0.6725642085075378, 'epoch': 0.12} + 12%|█▏ | 80/681 [03:24<25:39, 2.56s/it] 12%|█▏ | 81/681 [03:27<25:59, 2.60s/it] {'loss': 0.9328, 'grad_norm': 30.84009552001953, 'learning_rate': 4.996015471965529e-07, 'fcm_dpo/beta': 0.03153174743056297, 'fcm_dpo/q_t': 0.347603440284729, 'fcm_dpo/delta': -0.3608952760696411, 'fcm_dpo/margin': 23.198793411254883, 'margin_dpo/margin_mean': 23.198793411254883, 'margin_dpo/margin_std': 27.952896118164062, 'logps/chosen': -76.25526428222656, 'logps/rejected': -162.66790771484375, 'logps/ref_chosen': -66.44886779785156, 'logps/ref_rejected': -129.66270446777344, 'KL/chosen_KL_mean': -9.806392669677734, 'KL/rejected_KL_mean': -33.00519561767578, 'KL/mean': -21.405792236328125, 'KL/std': 23.604204177856445, 'logits/chosen': -0.710389256477356, 'logits/rejected': -0.679502010345459, 'epoch': 0.12} + 12%|█▏ | 81/681 [03:27<25:59, 2.60s/it] 12%|█▏ | 82/681 [03:29<25:29, 2.55s/it] {'loss': 1.0751, 'grad_norm': 33.19075012207031, 'learning_rate': 4.995258321842611e-07, 'fcm_dpo/beta': 0.030675500631332397, 'fcm_dpo/q_t': 0.38255757093429565, 'fcm_dpo/delta': -0.17443646490573883, 'fcm_dpo/margin': 18.392139434814453, 'margin_dpo/margin_mean': 18.392139434814453, 'margin_dpo/margin_std': 29.462291717529297, 'logps/chosen': -64.35211181640625, 'logps/rejected': -121.25511932373047, 'logps/ref_chosen': -52.232383728027344, 'logps/ref_rejected': -90.74325561523438, 'KL/chosen_KL_mean': -12.119726181030273, 'KL/rejected_KL_mean': -30.51186752319336, 'KL/mean': -21.315797805786133, 'KL/std': 21.45583724975586, 'logits/chosen': -0.6418750286102295, 'logits/rejected': -0.628775954246521, 'epoch': 0.12} + 12%|█▏ | 82/681 [03:29<25:29, 2.55s/it] 12%|█▏ | 83/681 [03:32<25:03, 2.51s/it] {'loss': 0.978, 'grad_norm': 31.546810150146484, 'learning_rate': 4.994435419342304e-07, 'fcm_dpo/beta': 0.028894957154989243, 'fcm_dpo/q_t': 0.3641561269760132, 'fcm_dpo/delta': -0.2471744269132614, 'fcm_dpo/margin': 21.821533203125, 'margin_dpo/margin_mean': 21.821533203125, 'margin_dpo/margin_std': 26.563232421875, 'logps/chosen': -68.34278869628906, 'logps/rejected': -138.05282592773438, 'logps/ref_chosen': -55.82738494873047, 'logps/ref_rejected': -103.71589660644531, 'KL/chosen_KL_mean': -12.515398025512695, 'KL/rejected_KL_mean': -34.3369255065918, 'KL/mean': -23.42616081237793, 'KL/std': 22.522113800048828, 'logits/chosen': -0.6872934103012085, 'logits/rejected': -0.6507028937339783, 'epoch': 0.12} + 12%|█▏ | 83/681 [03:32<25:03, 2.51s/it] 12%|█▏ | 84/681 [03:34<25:43, 2.59s/it] {'loss': 0.9982, 'grad_norm': 27.340347290039062, 'learning_rate': 4.993546786148857e-07, 'fcm_dpo/beta': 0.027821559458971024, 'fcm_dpo/q_t': 0.3775210976600647, 'fcm_dpo/delta': -0.13951639831066132, 'fcm_dpo/margin': 19.046039581298828, 'margin_dpo/margin_mean': 19.046039581298828, 'margin_dpo/margin_std': 19.851577758789062, 'logps/chosen': -78.69538879394531, 'logps/rejected': -117.86385345458984, 'logps/ref_chosen': -67.1761703491211, 'logps/ref_rejected': -87.29859924316406, 'KL/chosen_KL_mean': -11.519216537475586, 'KL/rejected_KL_mean': -30.565250396728516, 'KL/mean': -21.042236328125, 'KL/std': 19.391069412231445, 'logits/chosen': -0.6515509486198425, 'logits/rejected': -0.6115979552268982, 'epoch': 0.12} + 12%|█▏ | 84/681 [03:34<25:43, 2.59s/it] 12%|█▏ | 85/681 [03:37<26:04, 2.62s/it] {'loss': 1.0232, 'grad_norm': 27.906219482421875, 'learning_rate': 4.992592445678582e-07, 'fcm_dpo/beta': 0.02746494486927986, 'fcm_dpo/q_t': 0.38153764605522156, 'fcm_dpo/delta': -0.14406868815422058, 'fcm_dpo/margin': 19.524394989013672, 'margin_dpo/margin_mean': 19.524394989013672, 'margin_dpo/margin_std': 23.956645965576172, 'logps/chosen': -70.55440521240234, 'logps/rejected': -110.31098175048828, 'logps/ref_chosen': -58.4066162109375, 'logps/ref_rejected': -78.63880157470703, 'KL/chosen_KL_mean': -12.147787094116211, 'KL/rejected_KL_mean': -31.67218017578125, 'KL/mean': -21.90998077392578, 'KL/std': 20.009681701660156, 'logits/chosen': -0.6355807185173035, 'logits/rejected': -0.6021965742111206, 'epoch': 0.12} + 12%|█▏ | 85/681 [03:37<26:04, 2.62s/it] 13%|█▎ | 86/681 [03:40<26:14, 2.65s/it] {'loss': 1.1039, 'grad_norm': 31.247163772583008, 'learning_rate': 4.991572423079235e-07, 'fcm_dpo/beta': 0.026533078402280807, 'fcm_dpo/q_t': 0.393225759267807, 'fcm_dpo/delta': -0.15769629180431366, 'fcm_dpo/margin': 20.703153610229492, 'margin_dpo/margin_mean': 20.703155517578125, 'margin_dpo/margin_std': 37.23381042480469, 'logps/chosen': -72.06341552734375, 'logps/rejected': -124.7507553100586, 'logps/ref_chosen': -56.13746643066406, 'logps/ref_rejected': -88.12165069580078, 'KL/chosen_KL_mean': -15.925954818725586, 'KL/rejected_KL_mean': -36.62910461425781, 'KL/mean': -26.277530670166016, 'KL/std': 27.010652542114258, 'logits/chosen': -0.6769875288009644, 'logits/rejected': -0.6635218858718872, 'epoch': 0.13} + 13%|█▎ | 86/681 [03:40<26:14, 2.65s/it] 13%|█▎ | 87/681 [03:42<26:08, 2.64s/it] {'loss': 1.0143, 'grad_norm': 26.473875045776367, 'learning_rate': 4.990486745229364e-07, 'fcm_dpo/beta': 0.025222256779670715, 'fcm_dpo/q_t': 0.3686726689338684, 'fcm_dpo/delta': -0.22208669781684875, 'fcm_dpo/margin': 24.061767578125, 'margin_dpo/margin_mean': 24.061767578125, 'margin_dpo/margin_std': 32.34611892700195, 'logps/chosen': -71.33383178710938, 'logps/rejected': -135.22708129882812, 'logps/ref_chosen': -55.63609313964844, 'logps/ref_rejected': -95.46757507324219, 'KL/chosen_KL_mean': -15.697734832763672, 'KL/rejected_KL_mean': -39.7595100402832, 'KL/mean': -27.728618621826172, 'KL/std': 26.763015747070312, 'logits/chosen': -0.7123448252677917, 'logits/rejected': -0.6856144666671753, 'epoch': 0.13} + 13%|█▎ | 87/681 [03:42<26:08, 2.64s/it] 13%|█▎ | 88/681 [03:45<26:08, 2.65s/it] {'loss': 1.118, 'grad_norm': 27.7163028717041, 'learning_rate': 4.989335440737586e-07, 'fcm_dpo/beta': 0.02473517321050167, 'fcm_dpo/q_t': 0.40377742052078247, 'fcm_dpo/delta': -0.05327114462852478, 'fcm_dpo/margin': 18.192121505737305, 'margin_dpo/margin_mean': 18.192121505737305, 'margin_dpo/margin_std': 29.348196029663086, 'logps/chosen': -93.40048217773438, 'logps/rejected': -144.62994384765625, 'logps/ref_chosen': -73.67115020751953, 'logps/ref_rejected': -106.70849609375, 'KL/chosen_KL_mean': -19.72933578491211, 'KL/rejected_KL_mean': -37.921451568603516, 'KL/mean': -28.825397491455078, 'KL/std': 27.96208953857422, 'logits/chosen': -0.6769958734512329, 'logits/rejected': -0.6691812872886658, 'epoch': 0.13} + 13%|█▎ | 88/681 [03:45<26:08, 2.65s/it] 13%|█▎ | 89/681 [03:48<25:35, 2.59s/it] {'loss': 1.0395, 'grad_norm': 25.34421730041504, 'learning_rate': 4.988118539941847e-07, 'fcm_dpo/beta': 0.024530138820409775, 'fcm_dpo/q_t': 0.3884883522987366, 'fcm_dpo/delta': -0.10719307512044907, 'fcm_dpo/margin': 20.458881378173828, 'margin_dpo/margin_mean': 20.45888328552246, 'margin_dpo/margin_std': 26.786773681640625, 'logps/chosen': -72.7660140991211, 'logps/rejected': -114.68353271484375, 'logps/ref_chosen': -60.624916076660156, 'logps/ref_rejected': -82.08354949951172, 'KL/chosen_KL_mean': -12.14109992980957, 'KL/rejected_KL_mean': -32.59998321533203, 'KL/mean': -22.370540618896484, 'KL/std': 23.696063995361328, 'logits/chosen': -0.7158247232437134, 'logits/rejected': -0.6819084882736206, 'epoch': 0.13} + 13%|█▎ | 89/681 [03:48<25:35, 2.59s/it] 13%|█▎ | 90/681 [03:50<25:07, 2.55s/it] {'loss': 1.0206, 'grad_norm': 27.808216094970703, 'learning_rate': 4.986836074908615e-07, 'fcm_dpo/beta': 0.02354896441102028, 'fcm_dpo/q_t': 0.3705536723136902, 'fcm_dpo/delta': -0.2722369432449341, 'fcm_dpo/margin': 27.817413330078125, 'margin_dpo/margin_mean': 27.817413330078125, 'margin_dpo/margin_std': 40.34447479248047, 'logps/chosen': -68.91744995117188, 'logps/rejected': -154.9942626953125, 'logps/ref_chosen': -53.285308837890625, 'logps/ref_rejected': -111.54470825195312, 'KL/chosen_KL_mean': -15.632135391235352, 'KL/rejected_KL_mean': -43.449546813964844, 'KL/mean': -29.54084014892578, 'KL/std': 32.48262023925781, 'logits/chosen': -0.6693460941314697, 'logits/rejected': -0.6799524426460266, 'epoch': 0.13} + 13%|█▎ | 90/681 [03:50<25:07, 2.55s/it] 13%|█▎ | 91/681 [03:53<25:17, 2.57s/it] {'loss': 1.059, 'grad_norm': 25.545448303222656, 'learning_rate': 4.985488079432037e-07, 'fcm_dpo/beta': 0.022640112787485123, 'fcm_dpo/q_t': 0.38871896266937256, 'fcm_dpo/delta': -0.1277696192264557, 'fcm_dpo/margin': 23.0252685546875, 'margin_dpo/margin_mean': 23.025266647338867, 'margin_dpo/margin_std': 33.728050231933594, 'logps/chosen': -78.12120056152344, 'logps/rejected': -127.21746826171875, 'logps/ref_chosen': -61.802955627441406, 'logps/ref_rejected': -87.87395477294922, 'KL/chosen_KL_mean': -16.3182430267334, 'KL/rejected_KL_mean': -39.34351348876953, 'KL/mean': -27.830875396728516, 'KL/std': 26.04579734802246, 'logits/chosen': -0.6979465484619141, 'logits/rejected': -0.6646697521209717, 'epoch': 0.13} + 13%|█▎ | 91/681 [03:53<25:17, 2.57s/it] 14%|█▎ | 92/681 [03:55<24:59, 2.55s/it] {'loss': 1.0518, 'grad_norm': 23.78729248046875, 'learning_rate': 4.984074589033043e-07, 'fcm_dpo/beta': 0.022130444645881653, 'fcm_dpo/q_t': 0.38861751556396484, 'fcm_dpo/delta': -0.12148790061473846, 'fcm_dpo/margin': 23.284168243408203, 'margin_dpo/margin_mean': 23.284168243408203, 'margin_dpo/margin_std': 32.566123962402344, 'logps/chosen': -66.44992065429688, 'logps/rejected': -115.97450256347656, 'logps/ref_chosen': -51.640769958496094, 'logps/ref_rejected': -77.88117980957031, 'KL/chosen_KL_mean': -14.809152603149414, 'KL/rejected_KL_mean': -38.093318939208984, 'KL/mean': -26.451236724853516, 'KL/std': 26.83241081237793, 'logits/chosen': -0.7112252712249756, 'logits/rejected': -0.6874991655349731, 'epoch': 0.14} + 14%|█▎ | 92/681 [03:55<24:59, 2.55s/it] 14%|█▎ | 93/681 [03:57<23:47, 2.43s/it] {'loss': 1.0277, 'grad_norm': 24.3586368560791, 'learning_rate': 4.982595640958425e-07, 'fcm_dpo/beta': 0.021535798907279968, 'fcm_dpo/q_t': 0.3877296447753906, 'fcm_dpo/delta': -0.11011452972888947, 'fcm_dpo/margin': 23.431915283203125, 'margin_dpo/margin_mean': 23.431915283203125, 'margin_dpo/margin_std': 29.139026641845703, 'logps/chosen': -69.14620971679688, 'logps/rejected': -117.20962524414062, 'logps/ref_chosen': -52.529239654541016, 'logps/ref_rejected': -77.16075134277344, 'KL/chosen_KL_mean': -16.61697006225586, 'KL/rejected_KL_mean': -40.04888153076172, 'KL/mean': -28.332927703857422, 'KL/std': 25.330059051513672, 'logits/chosen': -0.7280140519142151, 'logits/rejected': -0.676377534866333, 'epoch': 0.14} + 14%|█▎ | 93/681 [03:57<23:47, 2.43s/it] 14%|█▍ | 94/681 [04:00<24:40, 2.52s/it] {'loss': 0.9928, 'grad_norm': 24.17359733581543, 'learning_rate': 4.98105127417984e-07, 'fcm_dpo/beta': 0.020746299996972084, 'fcm_dpo/q_t': 0.3750302195549011, 'fcm_dpo/delta': -0.1698264628648758, 'fcm_dpo/margin': 26.934350967407227, 'margin_dpo/margin_mean': 26.934350967407227, 'margin_dpo/margin_std': 30.304607391357422, 'logps/chosen': -79.17451477050781, 'logps/rejected': -144.48529052734375, 'logps/ref_chosen': -61.22261047363281, 'logps/ref_rejected': -99.59902954101562, 'KL/chosen_KL_mean': -17.951906204223633, 'KL/rejected_KL_mean': -44.88625717163086, 'KL/mean': -31.419082641601562, 'KL/std': 29.03601837158203, 'logits/chosen': -0.6785788536071777, 'logits/rejected': -0.6629636287689209, 'epoch': 0.14} + 14%|█▍ | 94/681 [04:00<24:40, 2.52s/it] 14%|█▍ | 95/681 [04:02<24:25, 2.50s/it] {'loss': 1.0678, 'grad_norm': 22.47222900390625, 'learning_rate': 4.979441529392784e-07, 'fcm_dpo/beta': 0.02045309543609619, 'fcm_dpo/q_t': 0.3971632122993469, 'fcm_dpo/delta': -0.05145730823278427, 'fcm_dpo/margin': 21.939393997192383, 'margin_dpo/margin_mean': 21.939393997192383, 'margin_dpo/margin_std': 28.80935287475586, 'logps/chosen': -69.5000228881836, 'logps/rejected': -114.79611206054688, 'logps/ref_chosen': -52.523643493652344, 'logps/ref_rejected': -75.8803482055664, 'KL/chosen_KL_mean': -16.976377487182617, 'KL/rejected_KL_mean': -38.91576385498047, 'KL/mean': -27.94607162475586, 'KL/std': 27.511489868164062, 'logits/chosen': -0.7069982290267944, 'logits/rejected': -0.6784754991531372, 'epoch': 0.14} + 14%|█▍ | 95/681 [04:03<24:25, 2.50s/it] 14%|█▍ | 96/681 [04:05<24:30, 2.51s/it] {'loss': 0.9801, 'grad_norm': 23.20262336730957, 'learning_rate': 4.977766449015534e-07, 'fcm_dpo/beta': 0.019721299409866333, 'fcm_dpo/q_t': 0.37095409631729126, 'fcm_dpo/delta': -0.20427075028419495, 'fcm_dpo/margin': 29.912555694580078, 'margin_dpo/margin_mean': 29.912555694580078, 'margin_dpo/margin_std': 34.870269775390625, 'logps/chosen': -78.56965637207031, 'logps/rejected': -142.9212646484375, 'logps/ref_chosen': -62.15697479248047, 'logps/ref_rejected': -96.59601593017578, 'KL/chosen_KL_mean': -16.412687301635742, 'KL/rejected_KL_mean': -46.32524108886719, 'KL/mean': -31.368961334228516, 'KL/std': 31.75434112548828, 'logits/chosen': -0.7112823128700256, 'logits/rejected': -0.6814401149749756, 'epoch': 0.14} + 14%|█▍ | 96/681 [04:05<24:30, 2.51s/it] 14%|█▍ | 97/681 [04:08<24:33, 2.52s/it] {'loss': 1.0535, 'grad_norm': 23.92318344116211, 'learning_rate': 4.976026077188012e-07, 'fcm_dpo/beta': 0.019657842814922333, 'fcm_dpo/q_t': 0.395273894071579, 'fcm_dpo/delta': -0.05561104789376259, 'fcm_dpo/margin': 23.000316619873047, 'margin_dpo/margin_mean': 23.000316619873047, 'margin_dpo/margin_std': 26.310195922851562, 'logps/chosen': -72.44036102294922, 'logps/rejected': -117.75906372070312, 'logps/ref_chosen': -54.646366119384766, 'logps/ref_rejected': -76.96475219726562, 'KL/chosen_KL_mean': -17.793996810913086, 'KL/rejected_KL_mean': -40.7943115234375, 'KL/mean': -29.294153213500977, 'KL/std': 26.314815521240234, 'logits/chosen': -0.6561405658721924, 'logits/rejected': -0.6128396987915039, 'epoch': 0.14} + 14%|█▍ | 97/681 [04:08<24:33, 2.52s/it] 14%|█▍ | 98/681 [04:10<24:23, 2.51s/it] {'loss': 1.0538, 'grad_norm': 24.65069580078125, 'learning_rate': 4.974220459770639e-07, 'fcm_dpo/beta': 0.019086042419075966, 'fcm_dpo/q_t': 0.3849552869796753, 'fcm_dpo/delta': -0.11044582724571228, 'fcm_dpo/margin': 26.44017791748047, 'margin_dpo/margin_mean': 26.44017791748047, 'margin_dpo/margin_std': 36.05833053588867, 'logps/chosen': -87.24374389648438, 'logps/rejected': -144.95278930664062, 'logps/ref_chosen': -65.25862884521484, 'logps/ref_rejected': -96.5274887084961, 'KL/chosen_KL_mean': -21.98511505126953, 'KL/rejected_KL_mean': -48.42529296875, 'KL/mean': -35.2052001953125, 'KL/std': 30.05870819091797, 'logits/chosen': -0.6879181265830994, 'logits/rejected': -0.6715967059135437, 'epoch': 0.14} + 14%|█▍ | 98/681 [04:10<24:23, 2.51s/it] 15%|█▍ | 99/681 [04:12<23:26, 2.42s/it] {'loss': 0.9905, 'grad_norm': 21.651269912719727, 'learning_rate': 4.972349644343108e-07, 'fcm_dpo/beta': 0.01838843896985054, 'fcm_dpo/q_t': 0.373318076133728, 'fcm_dpo/delta': -0.20529845356941223, 'fcm_dpo/margin': 32.21813201904297, 'margin_dpo/margin_mean': 32.21813201904297, 'margin_dpo/margin_std': 40.488311767578125, 'logps/chosen': -63.00075912475586, 'logps/rejected': -136.01834106445312, 'logps/ref_chosen': -45.638484954833984, 'logps/ref_rejected': -86.43793487548828, 'KL/chosen_KL_mean': -17.362274169921875, 'KL/rejected_KL_mean': -49.580406188964844, 'KL/mean': -33.471343994140625, 'KL/std': 32.57749938964844, 'logits/chosen': -0.6547946929931641, 'logits/rejected': -0.6550130248069763, 'epoch': 0.15} + 15%|█▍ | 99/681 [04:12<23:26, 2.42s/it] 15%|█▍ | 100/681 [04:15<24:01, 2.48s/it] {'loss': 1.1631, 'grad_norm': 24.000566482543945, 'learning_rate': 4.970413680203148e-07, 'fcm_dpo/beta': 0.01846114918589592, 'fcm_dpo/q_t': 0.42105787992477417, 'fcm_dpo/delta': 0.05179622396826744, 'fcm_dpo/margin': 18.93136978149414, 'margin_dpo/margin_mean': 18.93136978149414, 'margin_dpo/margin_std': 33.863746643066406, 'logps/chosen': -77.2769775390625, 'logps/rejected': -112.67457580566406, 'logps/ref_chosen': -57.59397888183594, 'logps/ref_rejected': -74.06021118164062, 'KL/chosen_KL_mean': -19.682992935180664, 'KL/rejected_KL_mean': -38.6143684387207, 'KL/mean': -29.148677825927734, 'KL/std': 26.299453735351562, 'logits/chosen': -0.6635209321975708, 'logits/rejected': -0.6204158663749695, 'epoch': 0.15} + 15%|█▍ | 100/681 [04:15<24:01, 2.48s/it] 15%|█▍ | 101/681 [04:17<23:44, 2.46s/it] {'loss': 1.125, 'grad_norm': 23.815645217895508, 'learning_rate': 4.968412618365215e-07, 'fcm_dpo/beta': 0.018270574510097504, 'fcm_dpo/q_t': 0.4101085960865021, 'fcm_dpo/delta': -0.022218167781829834, 'fcm_dpo/margin': 23.0167293548584, 'margin_dpo/margin_mean': 23.01673126220703, 'margin_dpo/margin_std': 39.10064697265625, 'logps/chosen': -85.77684020996094, 'logps/rejected': -130.33441162109375, 'logps/ref_chosen': -61.64885330200195, 'logps/ref_rejected': -83.18968200683594, 'KL/chosen_KL_mean': -24.127986907958984, 'KL/rejected_KL_mean': -47.14472198486328, 'KL/mean': -35.636356353759766, 'KL/std': 32.51232147216797, 'logits/chosen': -0.696588933467865, 'logits/rejected': -0.6663883924484253, 'epoch': 0.15} + 15%|█▍ | 101/681 [04:17<23:44, 2.46s/it] 15%|█▍ | 102/681 [04:20<23:31, 2.44s/it] {'loss': 1.2061, 'grad_norm': 26.869401931762695, 'learning_rate': 4.966346511559149e-07, 'fcm_dpo/beta': 0.018309336155653, 'fcm_dpo/q_t': 0.43124186992645264, 'fcm_dpo/delta': -0.024285031482577324, 'fcm_dpo/margin': 16.883745193481445, 'margin_dpo/margin_mean': 16.883745193481445, 'margin_dpo/margin_std': 35.99235534667969, 'logps/chosen': -90.30126953125, 'logps/rejected': -111.293212890625, 'logps/ref_chosen': -64.0788803100586, 'logps/ref_rejected': -68.18707275390625, 'KL/chosen_KL_mean': -26.22239112854004, 'KL/rejected_KL_mean': -43.10613250732422, 'KL/mean': -34.66426086425781, 'KL/std': 30.4959659576416, 'logits/chosen': -0.7078909873962402, 'logits/rejected': -0.6638644933700562, 'epoch': 0.15} + 15%|█▍ | 102/681 [04:20<23:31, 2.44s/it] 15%|█▌ | 103/681 [04:22<23:11, 2.41s/it] {'loss': 0.9829, 'grad_norm': 23.006120681762695, 'learning_rate': 4.964215414228785e-07, 'fcm_dpo/beta': 0.017673712223768234, 'fcm_dpo/q_t': 0.37030476331710815, 'fcm_dpo/delta': -0.2095203697681427, 'fcm_dpo/margin': 33.75600814819336, 'margin_dpo/margin_mean': 33.756011962890625, 'margin_dpo/margin_std': 40.593528747558594, 'logps/chosen': -81.9129638671875, 'logps/rejected': -147.94241333007812, 'logps/ref_chosen': -61.299278259277344, 'logps/ref_rejected': -93.57270812988281, 'KL/chosen_KL_mean': -20.61368179321289, 'KL/rejected_KL_mean': -54.369693756103516, 'KL/mean': -37.49169158935547, 'KL/std': 33.84938049316406, 'logits/chosen': -0.6496819257736206, 'logits/rejected': -0.6131845116615295, 'epoch': 0.15} + 15%|█▌ | 103/681 [04:22<23:11, 2.41s/it] 15%|█▌ | 104/681 [04:24<22:38, 2.35s/it] {'loss': 1.0402, 'grad_norm': 22.44390296936035, 'learning_rate': 4.96201938253052e-07, 'fcm_dpo/beta': 0.017140310257673264, 'fcm_dpo/q_t': 0.3846966028213501, 'fcm_dpo/delta': -0.15842567384243011, 'fcm_dpo/margin': 32.083656311035156, 'margin_dpo/margin_mean': 32.083656311035156, 'margin_dpo/margin_std': 45.53196334838867, 'logps/chosen': -76.96102905273438, 'logps/rejected': -144.23663330078125, 'logps/ref_chosen': -54.372772216796875, 'logps/ref_rejected': -89.5647201538086, 'KL/chosen_KL_mean': -22.588258743286133, 'KL/rejected_KL_mean': -54.67192077636719, 'KL/mean': -38.630088806152344, 'KL/std': 37.43622589111328, 'logits/chosen': -0.7081250548362732, 'logits/rejected': -0.6775051355361938, 'epoch': 0.15} + 15%|█▌ | 104/681 [04:24<22:38, 2.35s/it] 15%|█▌ | 105/681 [04:27<23:19, 2.43s/it] {'loss': 0.8698, 'grad_norm': 23.292463302612305, 'learning_rate': 4.959758474331832e-07, 'fcm_dpo/beta': 0.016101296991109848, 'fcm_dpo/q_t': 0.33412182331085205, 'fcm_dpo/delta': -0.36145851016044617, 'fcm_dpo/margin': 45.65831756591797, 'margin_dpo/margin_mean': 45.65831756591797, 'margin_dpo/margin_std': 40.13270568847656, 'logps/chosen': -75.83988189697266, 'logps/rejected': -164.832763671875, 'logps/ref_chosen': -54.638946533203125, 'logps/ref_rejected': -97.97351837158203, 'KL/chosen_KL_mean': -21.200937271118164, 'KL/rejected_KL_mean': -66.8592529296875, 'KL/mean': -44.030094146728516, 'KL/std': 36.557212829589844, 'logits/chosen': -0.6853151321411133, 'logits/rejected': -0.6624854803085327, 'epoch': 0.15} + 15%|█▌ | 105/681 [04:27<23:19, 2.43s/it] 16%|█▌ | 106/681 [04:29<23:26, 2.45s/it] {'loss': 1.0582, 'grad_norm': 22.089712142944336, 'learning_rate': 4.957432749209755e-07, 'fcm_dpo/beta': 0.015641074627637863, 'fcm_dpo/q_t': 0.3972783088684082, 'fcm_dpo/delta': -0.04885800927877426, 'fcm_dpo/margin': 28.547529220581055, 'margin_dpo/margin_mean': 28.547531127929688, 'margin_dpo/margin_std': 34.79780197143555, 'logps/chosen': -78.58416748046875, 'logps/rejected': -137.52342224121094, 'logps/ref_chosen': -54.83289337158203, 'logps/ref_rejected': -85.22461700439453, 'KL/chosen_KL_mean': -23.751272201538086, 'KL/rejected_KL_mean': -52.298805236816406, 'KL/mean': -38.02503967285156, 'KL/std': 31.860212326049805, 'logits/chosen': -0.6288084983825684, 'logits/rejected': -0.5959875583648682, 'epoch': 0.16} + 16%|█▌ | 106/681 [04:29<23:26, 2.45s/it] 16%|█▌ | 107/681 [04:32<23:54, 2.50s/it] {'loss': 1.0433, 'grad_norm': 21.205881118774414, 'learning_rate': 4.955042268449307e-07, 'fcm_dpo/beta': 0.015334920957684517, 'fcm_dpo/q_t': 0.3877396583557129, 'fcm_dpo/delta': -0.10102778673171997, 'fcm_dpo/margin': 32.29783630371094, 'margin_dpo/margin_mean': 32.29783630371094, 'margin_dpo/margin_std': 41.090171813964844, 'logps/chosen': -98.18677520751953, 'logps/rejected': -155.5163116455078, 'logps/ref_chosen': -69.70780944824219, 'logps/ref_rejected': -94.73950958251953, 'KL/chosen_KL_mean': -28.478961944580078, 'KL/rejected_KL_mean': -60.77680206298828, 'KL/mean': -44.62788009643555, 'KL/std': 38.920860290527344, 'logits/chosen': -0.6800429821014404, 'logits/rejected': -0.6334393620491028, 'epoch': 0.16} + 16%|█▌ | 107/681 [04:32<23:54, 2.50s/it] 16%|█▌ | 108/681 [04:34<23:29, 2.46s/it] {'loss': 1.039, 'grad_norm': 21.520112991333008, 'learning_rate': 4.952587095041881e-07, 'fcm_dpo/beta': 0.014912154525518417, 'fcm_dpo/q_t': 0.3810121417045593, 'fcm_dpo/delta': -0.18271130323410034, 'fcm_dpo/margin': 38.376766204833984, 'margin_dpo/margin_mean': 38.376766204833984, 'margin_dpo/margin_std': 55.44823455810547, 'logps/chosen': -81.74688720703125, 'logps/rejected': -159.9097900390625, 'logps/ref_chosen': -56.0098876953125, 'logps/ref_rejected': -95.79601287841797, 'KL/chosen_KL_mean': -25.73700523376465, 'KL/rejected_KL_mean': -64.11376953125, 'KL/mean': -44.92538833618164, 'KL/std': 42.71458435058594, 'logits/chosen': -0.6687978506088257, 'logits/rejected': -0.646253228187561, 'epoch': 0.16} + 16%|█▌ | 108/681 [04:34<23:29, 2.46s/it] 16%|█▌ | 109/681 [04:37<24:38, 2.59s/it] {'loss': 0.9868, 'grad_norm': 21.89470100402832, 'learning_rate': 4.95006729368358e-07, 'fcm_dpo/beta': 0.014279071241617203, 'fcm_dpo/q_t': 0.3684207797050476, 'fcm_dpo/delta': -0.20386075973510742, 'fcm_dpo/margin': 41.38175964355469, 'margin_dpo/margin_mean': 41.38175964355469, 'margin_dpo/margin_std': 48.550384521484375, 'logps/chosen': -86.96247863769531, 'logps/rejected': -164.1444854736328, 'logps/ref_chosen': -62.88549041748047, 'logps/ref_rejected': -98.68573760986328, 'KL/chosen_KL_mean': -24.07698631286621, 'KL/rejected_KL_mean': -65.45874786376953, 'KL/mean': -44.76786804199219, 'KL/std': 41.37312316894531, 'logits/chosen': -0.6142909526824951, 'logits/rejected': -0.5930590629577637, 'epoch': 0.16} + 16%|█▌ | 109/681 [04:37<24:38, 2.59s/it] 16%|█▌ | 110/681 [04:40<24:49, 2.61s/it] {'loss': 1.0488, 'grad_norm': 19.2232608795166, 'learning_rate': 4.947482930773511e-07, 'fcm_dpo/beta': 0.013750969432294369, 'fcm_dpo/q_t': 0.3864942193031311, 'fcm_dpo/delta': -0.12534289062023163, 'fcm_dpo/margin': 37.42394256591797, 'margin_dpo/margin_mean': 37.42394256591797, 'margin_dpo/margin_std': 49.388160705566406, 'logps/chosen': -83.375244140625, 'logps/rejected': -141.79551696777344, 'logps/ref_chosen': -58.753684997558594, 'logps/ref_rejected': -79.75001525878906, 'KL/chosen_KL_mean': -24.62155532836914, 'KL/rejected_KL_mean': -62.04550552368164, 'KL/mean': -43.333526611328125, 'KL/std': 41.19173049926758, 'logits/chosen': -0.608720064163208, 'logits/rejected': -0.5711803436279297, 'epoch': 0.16} + 16%|█▌ | 110/681 [04:40<24:49, 2.61s/it] 16%|█▋ | 111/681 [04:42<24:33, 2.58s/it] {'loss': 1.0303, 'grad_norm': 21.545623779296875, 'learning_rate': 4.944834074412042e-07, 'fcm_dpo/beta': 0.01339393574744463, 'fcm_dpo/q_t': 0.37644529342651367, 'fcm_dpo/delta': -0.17819947004318237, 'fcm_dpo/margin': 42.30280303955078, 'margin_dpo/margin_mean': 42.30280303955078, 'margin_dpo/margin_std': 57.269569396972656, 'logps/chosen': -96.9381103515625, 'logps/rejected': -169.04566955566406, 'logps/ref_chosen': -68.62410736083984, 'logps/ref_rejected': -98.42886352539062, 'KL/chosen_KL_mean': -28.314008712768555, 'KL/rejected_KL_mean': -70.61680603027344, 'KL/mean': -49.46540832519531, 'KL/std': 46.69132995605469, 'logits/chosen': -0.6583347320556641, 'logits/rejected': -0.6357636451721191, 'epoch': 0.16} + 16%|█▋ | 111/681 [04:42<24:33, 2.58s/it] 16%|█▋ | 112/681 [04:45<23:33, 2.48s/it] {'loss': 1.1238, 'grad_norm': 19.833799362182617, 'learning_rate': 4.942120794399002e-07, 'fcm_dpo/beta': 0.0133673045784235, 'fcm_dpo/q_t': 0.41706275939941406, 'fcm_dpo/delta': 0.03976195305585861, 'fcm_dpo/margin': 27.04251480102539, 'margin_dpo/margin_mean': 27.042512893676758, 'margin_dpo/margin_std': 39.28888702392578, 'logps/chosen': -76.42552185058594, 'logps/rejected': -117.99282836914062, 'logps/ref_chosen': -50.24964141845703, 'logps/ref_rejected': -64.77442932128906, 'KL/chosen_KL_mean': -26.175884246826172, 'KL/rejected_KL_mean': -53.2183952331543, 'KL/mean': -39.6971435546875, 'KL/std': 32.65059280395508, 'logits/chosen': -0.6356754302978516, 'logits/rejected': -0.5966525077819824, 'epoch': 0.16} + 16%|█▋ | 112/681 [04:45<23:33, 2.48s/it] 17%|█▋ | 113/681 [04:47<23:51, 2.52s/it] {'loss': 1.0911, 'grad_norm': 20.532014846801758, 'learning_rate': 4.939343162231841e-07, 'fcm_dpo/beta': 0.013465975411236286, 'fcm_dpo/q_t': 0.40915369987487793, 'fcm_dpo/delta': 0.006106908433139324, 'fcm_dpo/margin': 29.26871109008789, 'margin_dpo/margin_mean': 29.26871109008789, 'margin_dpo/margin_std': 37.83106231689453, 'logps/chosen': -99.6154556274414, 'logps/rejected': -140.13992309570312, 'logps/ref_chosen': -66.71295166015625, 'logps/ref_rejected': -77.96870422363281, 'KL/chosen_KL_mean': -32.902503967285156, 'KL/rejected_KL_mean': -62.17121887207031, 'KL/mean': -47.536865234375, 'KL/std': 34.14165496826172, 'logits/chosen': -0.5950823426246643, 'logits/rejected': -0.5499939322471619, 'epoch': 0.17} + 17%|█▋ | 113/681 [04:47<23:51, 2.52s/it] 17%|█▋ | 114/681 [04:50<23:45, 2.51s/it] {'loss': 0.9983, 'grad_norm': 21.725406646728516, 'learning_rate': 4.936501251103751e-07, 'fcm_dpo/beta': 0.012956779450178146, 'fcm_dpo/q_t': 0.3740847706794739, 'fcm_dpo/delta': -0.21660971641540527, 'fcm_dpo/margin': 46.41866683959961, 'margin_dpo/margin_mean': 46.41866683959961, 'margin_dpo/margin_std': 62.333763122558594, 'logps/chosen': -88.02439880371094, 'logps/rejected': -163.7676544189453, 'logps/ref_chosen': -57.78507995605469, 'logps/ref_rejected': -87.10966491699219, 'KL/chosen_KL_mean': -30.239316940307617, 'KL/rejected_KL_mean': -76.65798950195312, 'KL/mean': -53.44865417480469, 'KL/std': 51.340576171875, 'logits/chosen': -0.6112878918647766, 'logits/rejected': -0.5779776573181152, 'epoch': 0.17} + 17%|█▋ | 114/681 [04:50<23:45, 2.51s/it] 17%|█▋ | 115/681 [04:52<24:06, 2.55s/it] {'loss': 1.163, 'grad_norm': 31.30410385131836, 'learning_rate': 4.933595135901732e-07, 'fcm_dpo/beta': 0.01290312223136425, 'fcm_dpo/q_t': 0.41342228651046753, 'fcm_dpo/delta': -0.015496611595153809, 'fcm_dpo/margin': 32.142608642578125, 'margin_dpo/margin_mean': 32.142608642578125, 'margin_dpo/margin_std': 64.65020751953125, 'logps/chosen': -105.18177795410156, 'logps/rejected': -170.3072509765625, 'logps/ref_chosen': -65.5826416015625, 'logps/ref_rejected': -98.56552124023438, 'KL/chosen_KL_mean': -39.5991325378418, 'KL/rejected_KL_mean': -71.74173736572266, 'KL/mean': -55.670440673828125, 'KL/std': 49.06171417236328, 'logits/chosen': -0.6504275798797607, 'logits/rejected': -0.6302141547203064, 'epoch': 0.17} + 17%|█▋ | 115/681 [04:52<24:06, 2.55s/it] 17%|█▋ | 116/681 [04:55<23:25, 2.49s/it] {'loss': 1.0577, 'grad_norm': 21.841028213500977, 'learning_rate': 4.930624893204624e-07, 'fcm_dpo/beta': 0.012806812301278114, 'fcm_dpo/q_t': 0.397646963596344, 'fcm_dpo/delta': -0.05582479387521744, 'fcm_dpo/margin': 35.38352966308594, 'margin_dpo/margin_mean': 35.38352966308594, 'margin_dpo/margin_std': 44.40971374511719, 'logps/chosen': -81.30925750732422, 'logps/rejected': -145.8143310546875, 'logps/ref_chosen': -51.40031433105469, 'logps/ref_rejected': -80.5218505859375, 'KL/chosen_KL_mean': -29.908945083618164, 'KL/rejected_KL_mean': -65.29248809814453, 'KL/mean': -47.60071563720703, 'KL/std': 41.23687744140625, 'logits/chosen': -0.6186249256134033, 'logits/rejected': -0.608126163482666, 'epoch': 0.17} + 17%|█▋ | 116/681 [04:55<23:25, 2.49s/it] 17%|█▋ | 117/681 [04:57<23:15, 2.47s/it] {'loss': 1.1456, 'grad_norm': 28.672874450683594, 'learning_rate': 4.927590601281083e-07, 'fcm_dpo/beta': 0.012780335731804371, 'fcm_dpo/q_t': 0.41756001114845276, 'fcm_dpo/delta': 0.03322294354438782, 'fcm_dpo/margin': 28.793424606323242, 'margin_dpo/margin_mean': 28.793426513671875, 'margin_dpo/margin_std': 50.167694091796875, 'logps/chosen': -107.83016967773438, 'logps/rejected': -133.9091796875, 'logps/ref_chosen': -69.29840850830078, 'logps/ref_rejected': -66.583984375, 'KL/chosen_KL_mean': -38.531761169433594, 'KL/rejected_KL_mean': -67.3251953125, 'KL/mean': -52.92847442626953, 'KL/std': 44.12018585205078, 'logits/chosen': -0.590896487236023, 'logits/rejected': -0.5536011457443237, 'epoch': 0.17} + 17%|█▋ | 117/681 [04:57<23:15, 2.47s/it] 17%|█▋ | 118/681 [05:00<23:17, 2.48s/it] {'loss': 1.0621, 'grad_norm': 21.261783599853516, 'learning_rate': 4.924492340087524e-07, 'fcm_dpo/beta': 0.01270340196788311, 'fcm_dpo/q_t': 0.3987045884132385, 'fcm_dpo/delta': -0.05310482531785965, 'fcm_dpo/margin': 35.48102569580078, 'margin_dpo/margin_mean': 35.48102569580078, 'margin_dpo/margin_std': 45.933509826660156, 'logps/chosen': -86.33668518066406, 'logps/rejected': -141.84579467773438, 'logps/ref_chosen': -55.6409797668457, 'logps/ref_rejected': -75.66905975341797, 'KL/chosen_KL_mean': -30.69570541381836, 'KL/rejected_KL_mean': -66.17672729492188, 'KL/mean': -48.43621826171875, 'KL/std': 40.15031814575195, 'logits/chosen': -0.6285079717636108, 'logits/rejected': -0.6085944771766663, 'epoch': 0.17} + 17%|█▋ | 118/681 [05:00<23:17, 2.48s/it] 17%|█▋ | 119/681 [05:02<23:50, 2.55s/it] {'loss': 1.1028, 'grad_norm': 23.512651443481445, 'learning_rate': 4.92133019126601e-07, 'fcm_dpo/beta': 0.012506300583481789, 'fcm_dpo/q_t': 0.4051211178302765, 'fcm_dpo/delta': -0.04011544585227966, 'fcm_dpo/margin': 34.964866638183594, 'margin_dpo/margin_mean': 34.964866638183594, 'margin_dpo/margin_std': 54.86541748046875, 'logps/chosen': -115.73030090332031, 'logps/rejected': -180.16226196289062, 'logps/ref_chosen': -73.51019287109375, 'logps/ref_rejected': -102.977294921875, 'KL/chosen_KL_mean': -42.2201042175293, 'KL/rejected_KL_mean': -77.18496704101562, 'KL/mean': -59.702537536621094, 'KL/std': 45.075523376464844, 'logits/chosen': -0.5862429738044739, 'logits/rejected': -0.5710224509239197, 'epoch': 0.17} + 17%|█▋ | 119/681 [05:02<23:50, 2.55s/it] 18%|█▊ | 120/681 [05:05<24:16, 2.60s/it] {'loss': 0.9945, 'grad_norm': 22.41628074645996, 'learning_rate': 4.918104238142103e-07, 'fcm_dpo/beta': 0.012185569852590561, 'fcm_dpo/q_t': 0.3711671531200409, 'fcm_dpo/delta': -0.20024745166301727, 'fcm_dpo/margin': 48.279685974121094, 'margin_dpo/margin_mean': 48.27968215942383, 'margin_dpo/margin_std': 59.22111892700195, 'logps/chosen': -120.1808853149414, 'logps/rejected': -199.7034912109375, 'logps/ref_chosen': -76.78083801269531, 'logps/ref_rejected': -108.02374267578125, 'KL/chosen_KL_mean': -43.40005111694336, 'KL/rejected_KL_mean': -91.67974853515625, 'KL/mean': -67.5398941040039, 'KL/std': 56.29936218261719, 'logits/chosen': -0.629610002040863, 'logits/rejected': -0.5978201031684875, 'epoch': 0.18} + 18%|█▊ | 120/681 [05:05<24:16, 2.60s/it] 18%|█▊ | 121/681 [05:08<23:57, 2.57s/it] {'loss': 0.996, 'grad_norm': 25.108898162841797, 'learning_rate': 4.91481456572267e-07, 'fcm_dpo/beta': 0.011589834466576576, 'fcm_dpo/q_t': 0.36686164140701294, 'fcm_dpo/delta': -0.23640641570091248, 'fcm_dpo/margin': 53.492347717285156, 'margin_dpo/margin_mean': 53.492347717285156, 'margin_dpo/margin_std': 68.95616912841797, 'logps/chosen': -103.55471801757812, 'logps/rejected': -205.25173950195312, 'logps/ref_chosen': -61.789894104003906, 'logps/ref_rejected': -109.99456787109375, 'KL/chosen_KL_mean': -41.76482391357422, 'KL/rejected_KL_mean': -95.25717163085938, 'KL/mean': -68.51100158691406, 'KL/std': 55.03094482421875, 'logits/chosen': -0.6011873483657837, 'logits/rejected': -0.5975435972213745, 'epoch': 0.18} + 18%|█▊ | 121/681 [05:08<23:57, 2.57s/it] 18%|█▊ | 122/681 [05:10<23:14, 2.50s/it] {'loss': 0.893, 'grad_norm': 23.686704635620117, 'learning_rate': 4.911461260693638e-07, 'fcm_dpo/beta': 0.010934034362435341, 'fcm_dpo/q_t': 0.3402714133262634, 'fcm_dpo/delta': -0.352782666683197, 'fcm_dpo/margin': 66.51848602294922, 'margin_dpo/margin_mean': 66.51847839355469, 'margin_dpo/margin_std': 65.36463928222656, 'logps/chosen': -84.64803314208984, 'logps/rejected': -210.97850036621094, 'logps/ref_chosen': -46.9022102355957, 'logps/ref_rejected': -106.71418762207031, 'KL/chosen_KL_mean': -37.74582290649414, 'KL/rejected_KL_mean': -104.26431274414062, 'KL/mean': -71.00506591796875, 'KL/std': 62.45606994628906, 'logits/chosen': -0.5722811222076416, 'logits/rejected': -0.5880405306816101, 'epoch': 0.18} + 18%|█▊ | 122/681 [05:10<23:14, 2.50s/it] 18%|█▊ | 123/681 [05:13<23:48, 2.56s/it] {'loss': 1.1066, 'grad_norm': 20.792469024658203, 'learning_rate': 4.908044411417711e-07, 'fcm_dpo/beta': 0.010566072538495064, 'fcm_dpo/q_t': 0.40092289447784424, 'fcm_dpo/delta': -0.06529982388019562, 'fcm_dpo/margin': 43.66535186767578, 'margin_dpo/margin_mean': 43.66535186767578, 'margin_dpo/margin_std': 71.86073303222656, 'logps/chosen': -103.10311126708984, 'logps/rejected': -173.20523071289062, 'logps/ref_chosen': -61.33863830566406, 'logps/ref_rejected': -87.775390625, 'KL/chosen_KL_mean': -41.76447296142578, 'KL/rejected_KL_mean': -85.42982482910156, 'KL/mean': -63.59715270996094, 'KL/std': 53.64918518066406, 'logits/chosen': -0.5638459920883179, 'logits/rejected': -0.5461542010307312, 'epoch': 0.18} + 18%|█▊ | 123/681 [05:13<23:48, 2.56s/it] 18%|█▊ | 124/681 [05:15<23:50, 2.57s/it] {'loss': 1.0168, 'grad_norm': 22.633270263671875, 'learning_rate': 4.904564107932048e-07, 'fcm_dpo/beta': 0.010147863999009132, 'fcm_dpo/q_t': 0.3697102665901184, 'fcm_dpo/delta': -0.2676956057548523, 'fcm_dpo/margin': 63.99842834472656, 'margin_dpo/margin_mean': 63.998435974121094, 'margin_dpo/margin_std': 92.30806732177734, 'logps/chosen': -118.8822021484375, 'logps/rejected': -229.01287841796875, 'logps/ref_chosen': -71.44833374023438, 'logps/ref_rejected': -117.58056640625, 'KL/chosen_KL_mean': -47.43387222290039, 'KL/rejected_KL_mean': -111.43231201171875, 'KL/mean': -79.43309020996094, 'KL/std': 73.24830627441406, 'logits/chosen': -0.5460699796676636, 'logits/rejected': -0.5479286909103394, 'epoch': 0.18} + 18%|█▊ | 124/681 [05:15<23:50, 2.57s/it] 18%|█▊ | 125/681 [05:18<23:33, 2.54s/it] {'loss': 1.0252, 'grad_norm': 19.055896759033203, 'learning_rate': 4.90102044194588e-07, 'fcm_dpo/beta': 0.009806671179831028, 'fcm_dpo/q_t': 0.3796992301940918, 'fcm_dpo/delta': -0.1694386899471283, 'fcm_dpo/margin': 57.12309646606445, 'margin_dpo/margin_mean': 57.12309646606445, 'margin_dpo/margin_std': 76.22639465332031, 'logps/chosen': -89.2720947265625, 'logps/rejected': -180.24685668945312, 'logps/ref_chosen': -50.136940002441406, 'logps/ref_rejected': -83.98861694335938, 'KL/chosen_KL_mean': -39.13515090942383, 'KL/rejected_KL_mean': -96.25823974609375, 'KL/mean': -67.69670104980469, 'KL/std': 61.70295333862305, 'logits/chosen': -0.5038829445838928, 'logits/rejected': -0.5049155950546265, 'epoch': 0.18} + 18%|█▊ | 125/681 [05:18<23:33, 2.54s/it] 19%|█▊ | 126/681 [05:20<23:48, 2.57s/it] {'loss': 1.0422, 'grad_norm': 20.268415451049805, 'learning_rate': 4.897413506838102e-07, 'fcm_dpo/beta': 0.009523214772343636, 'fcm_dpo/q_t': 0.38864102959632874, 'fcm_dpo/delta': -0.11256399005651474, 'fcm_dpo/margin': 53.19603729248047, 'margin_dpo/margin_mean': 53.1960334777832, 'margin_dpo/margin_std': 70.52363586425781, 'logps/chosen': -97.94435119628906, 'logps/rejected': -193.6030731201172, 'logps/ref_chosen': -55.66706848144531, 'logps/ref_rejected': -98.1297607421875, 'KL/chosen_KL_mean': -42.27728271484375, 'KL/rejected_KL_mean': -95.47331237792969, 'KL/mean': -68.87530517578125, 'KL/std': 55.436668395996094, 'logits/chosen': -0.5412114858627319, 'logits/rejected': -0.5357469320297241, 'epoch': 0.19} + 19%|█▊ | 126/681 [05:20<23:48, 2.57s/it] 19%|█▊ | 127/681 [05:23<23:58, 2.60s/it] {'loss': 1.128, 'grad_norm': 20.927669525146484, 'learning_rate': 4.89374339765481e-07, 'fcm_dpo/beta': 0.009529907256364822, 'fcm_dpo/q_t': 0.4137781858444214, 'fcm_dpo/delta': 0.027420198544859886, 'fcm_dpo/margin': 39.201202392578125, 'margin_dpo/margin_mean': 39.201202392578125, 'margin_dpo/margin_std': 61.48206329345703, 'logps/chosen': -97.65654754638672, 'logps/rejected': -157.09884643554688, 'logps/ref_chosen': -56.55467987060547, 'logps/ref_rejected': -76.7957763671875, 'KL/chosen_KL_mean': -41.10186767578125, 'KL/rejected_KL_mean': -80.30307006835938, 'KL/mean': -60.70246887207031, 'KL/std': 48.769588470458984, 'logits/chosen': -0.5445564985275269, 'logits/rejected': -0.5252886414527893, 'epoch': 0.19} + 19%|█▊ | 127/681 [05:23<23:58, 2.60s/it] 19%|█▉ | 128/681 [05:26<24:13, 2.63s/it] {'loss': 1.1401, 'grad_norm': 30.452770233154297, 'learning_rate': 4.890010211106795e-07, 'fcm_dpo/beta': 0.009586036205291748, 'fcm_dpo/q_t': 0.41241586208343506, 'fcm_dpo/delta': 0.005548093467950821, 'fcm_dpo/margin': 41.13318634033203, 'margin_dpo/margin_mean': 41.13318634033203, 'margin_dpo/margin_std': 71.68881225585938, 'logps/chosen': -101.99433898925781, 'logps/rejected': -161.44552612304688, 'logps/ref_chosen': -58.12095642089844, 'logps/ref_rejected': -76.43896484375, 'KL/chosen_KL_mean': -43.87337875366211, 'KL/rejected_KL_mean': -85.00656127929688, 'KL/mean': -64.43997192382812, 'KL/std': 57.35568618774414, 'logits/chosen': -0.5140960812568665, 'logits/rejected': -0.4917343258857727, 'epoch': 0.19} + 19%|█▉ | 128/681 [05:26<24:13, 2.63s/it] 19%|█▉ | 129/681 [05:28<24:01, 2.61s/it] {'loss': 1.1471, 'grad_norm': 21.074941635131836, 'learning_rate': 4.88621404556699e-07, 'fcm_dpo/beta': 0.009535422548651695, 'fcm_dpo/q_t': 0.41339975595474243, 'fcm_dpo/delta': -0.014929811470210552, 'fcm_dpo/margin': 43.44950866699219, 'margin_dpo/margin_mean': 43.44950866699219, 'margin_dpo/margin_std': 82.56187438964844, 'logps/chosen': -120.46183776855469, 'logps/rejected': -193.63717651367188, 'logps/ref_chosen': -66.91637420654297, 'logps/ref_rejected': -96.6422119140625, 'KL/chosen_KL_mean': -53.54546356201172, 'KL/rejected_KL_mean': -96.9949722290039, 'KL/mean': -75.27021789550781, 'KL/std': 63.507652282714844, 'logits/chosen': -0.5416771769523621, 'logits/rejected': -0.5308274626731873, 'epoch': 0.19} + 19%|█▉ | 129/681 [05:28<24:01, 2.61s/it] 19%|█▉ | 130/681 [05:31<23:21, 2.54s/it] {'loss': 0.9939, 'grad_norm': 20.190004348754883, 'learning_rate': 4.882355001067891e-07, 'fcm_dpo/beta': 0.009305297397077084, 'fcm_dpo/q_t': 0.36885032057762146, 'fcm_dpo/delta': -0.2256622165441513, 'fcm_dpo/margin': 65.76427459716797, 'margin_dpo/margin_mean': 65.76426696777344, 'margin_dpo/margin_std': 80.05824279785156, 'logps/chosen': -84.4376220703125, 'logps/rejected': -188.31668090820312, 'logps/ref_chosen': -44.66685104370117, 'logps/ref_rejected': -82.78165435791016, 'KL/chosen_KL_mean': -39.77076721191406, 'KL/rejected_KL_mean': -105.5350341796875, 'KL/mean': -72.65290069580078, 'KL/std': 65.77041625976562, 'logits/chosen': -0.47879669070243835, 'logits/rejected': -0.4725271463394165, 'epoch': 0.19} + 19%|█▉ | 130/681 [05:31<23:21, 2.54s/it] 19%|█▉ | 131/681 [05:33<23:26, 2.56s/it] {'loss': 0.9793, 'grad_norm': 29.07965850830078, 'learning_rate': 4.878433179298909e-07, 'fcm_dpo/beta': 0.00883854366838932, 'fcm_dpo/q_t': 0.36890602111816406, 'fcm_dpo/delta': -0.18700018525123596, 'fcm_dpo/margin': 65.16259002685547, 'margin_dpo/margin_mean': 65.16259002685547, 'margin_dpo/margin_std': 70.59186553955078, 'logps/chosen': -80.13856506347656, 'logps/rejected': -188.82058715820312, 'logps/ref_chosen': -44.924591064453125, 'logps/ref_rejected': -88.44401550292969, 'KL/chosen_KL_mean': -35.21397399902344, 'KL/rejected_KL_mean': -100.37657165527344, 'KL/mean': -67.79527282714844, 'KL/std': 64.40512084960938, 'logits/chosen': -0.49699753522872925, 'logits/rejected': -0.5037678480148315, 'epoch': 0.19} + 19%|█▉ | 131/681 [05:33<23:26, 2.56s/it] 19%|█▉ | 132/681 [05:36<23:25, 2.56s/it] {'loss': 1.0858, 'grad_norm': 19.968883514404297, 'learning_rate': 4.874448683603694e-07, 'fcm_dpo/beta': 0.008657930418848991, 'fcm_dpo/q_t': 0.40050774812698364, 'fcm_dpo/delta': -0.06971244513988495, 'fcm_dpo/margin': 53.85203552246094, 'margin_dpo/margin_mean': 53.85203552246094, 'margin_dpo/margin_std': 84.09346008300781, 'logps/chosen': -106.16598510742188, 'logps/rejected': -188.9091033935547, 'logps/ref_chosen': -59.00108337402344, 'logps/ref_rejected': -87.89215087890625, 'KL/chosen_KL_mean': -47.16490173339844, 'KL/rejected_KL_mean': -101.01695251464844, 'KL/mean': -74.09092712402344, 'KL/std': 64.85047912597656, 'logits/chosen': -0.5425466299057007, 'logits/rejected': -0.54119473695755, 'epoch': 0.19} + 19%|█▉ | 132/681 [05:36<23:25, 2.56s/it] 20%|█▉ | 133/681 [05:38<23:32, 2.58s/it] {'loss': 1.1096, 'grad_norm': 25.95366096496582, 'learning_rate': 4.870401618977415e-07, 'fcm_dpo/beta': 0.008616752922534943, 'fcm_dpo/q_t': 0.4102787375450134, 'fcm_dpo/delta': -0.009173337370157242, 'fcm_dpo/margin': 47.432525634765625, 'margin_dpo/margin_mean': 47.432525634765625, 'margin_dpo/margin_std': 74.17190551757812, 'logps/chosen': -122.24532318115234, 'logps/rejected': -199.40692138671875, 'logps/ref_chosen': -66.60449981689453, 'logps/ref_rejected': -96.33355712890625, 'KL/chosen_KL_mean': -55.64082336425781, 'KL/rejected_KL_mean': -103.07334899902344, 'KL/mean': -79.35708618164062, 'KL/std': 58.46957015991211, 'logits/chosen': -0.5186644792556763, 'logits/rejected': -0.5046030879020691, 'epoch': 0.2} + 20%|█▉ | 133/681 [05:38<23:32, 2.58s/it] 20%|█▉ | 134/681 [05:41<22:54, 2.51s/it] {'loss': 1.0667, 'grad_norm': 19.049375534057617, 'learning_rate': 4.866292092063986e-07, 'fcm_dpo/beta': 0.008605021983385086, 'fcm_dpo/q_t': 0.4016202986240387, 'fcm_dpo/delta': -0.039981499314308167, 'fcm_dpo/margin': 50.918270111083984, 'margin_dpo/margin_mean': 50.91827392578125, 'margin_dpo/margin_std': 66.06755065917969, 'logps/chosen': -96.20419311523438, 'logps/rejected': -182.70773315429688, 'logps/ref_chosen': -52.06925582885742, 'logps/ref_rejected': -87.6545181274414, 'KL/chosen_KL_mean': -44.13493347167969, 'KL/rejected_KL_mean': -95.05320739746094, 'KL/mean': -69.59406280517578, 'KL/std': 57.12010955810547, 'logits/chosen': -0.47495368123054504, 'logits/rejected': -0.45979058742523193, 'epoch': 0.2} + 20%|█▉ | 134/681 [05:41<22:54, 2.51s/it] 20%|█▉ | 135/681 [05:43<22:44, 2.50s/it] {'loss': 0.993, 'grad_norm': 22.302026748657227, 'learning_rate': 4.862120211153265e-07, 'fcm_dpo/beta': 0.008282874710857868, 'fcm_dpo/q_t': 0.37036457657814026, 'fcm_dpo/delta': -0.21973907947540283, 'fcm_dpo/margin': 73.2041015625, 'margin_dpo/margin_mean': 73.2041015625, 'margin_dpo/margin_std': 91.65379333496094, 'logps/chosen': -99.33219909667969, 'logps/rejected': -238.1621856689453, 'logps/ref_chosen': -50.353858947753906, 'logps/ref_rejected': -115.97975158691406, 'KL/chosen_KL_mean': -48.97834014892578, 'KL/rejected_KL_mean': -122.18243408203125, 'KL/mean': -85.58039855957031, 'KL/std': 75.99504089355469, 'logits/chosen': -0.4845237731933594, 'logits/rejected': -0.5190806984901428, 'epoch': 0.2} + 20%|█▉ | 135/681 [05:43<22:44, 2.50s/it] 20%|█▉ | 136/681 [05:46<23:15, 2.56s/it] {'loss': 1.1412, 'grad_norm': 20.428884506225586, 'learning_rate': 4.857886086178193e-07, 'fcm_dpo/beta': 0.008144080638885498, 'fcm_dpo/q_t': 0.4184736907482147, 'fcm_dpo/delta': 0.0006435923278331757, 'fcm_dpo/margin': 48.954044342041016, 'margin_dpo/margin_mean': 48.95404052734375, 'margin_dpo/margin_std': 88.62368774414062, 'logps/chosen': -123.47661590576172, 'logps/rejected': -203.67938232421875, 'logps/ref_chosen': -65.072509765625, 'logps/ref_rejected': -96.32122802734375, 'KL/chosen_KL_mean': -58.40410614013672, 'KL/rejected_KL_mean': -107.358154296875, 'KL/mean': -82.88113403320312, 'KL/std': 69.1063461303711, 'logits/chosen': -0.4917562007904053, 'logits/rejected': -0.48317497968673706, 'epoch': 0.2} + 20%|█▉ | 136/681 [05:46<23:15, 2.56s/it] 20%|██ | 137/681 [05:48<23:10, 2.56s/it] {'loss': 1.0223, 'grad_norm': 18.094011306762695, 'learning_rate': 4.853589828711902e-07, 'fcm_dpo/beta': 0.007917901501059532, 'fcm_dpo/q_t': 0.37597256898880005, 'fcm_dpo/delta': -0.22291553020477295, 'fcm_dpo/margin': 76.92433166503906, 'margin_dpo/margin_mean': 76.9243392944336, 'margin_dpo/margin_std': 109.81398010253906, 'logps/chosen': -104.988037109375, 'logps/rejected': -247.0170440673828, 'logps/ref_chosen': -48.759117126464844, 'logps/ref_rejected': -113.86376953125, 'KL/chosen_KL_mean': -56.22892761230469, 'KL/rejected_KL_mean': -133.1532745361328, 'KL/mean': -94.69110107421875, 'KL/std': 91.96360778808594, 'logits/chosen': -0.44450414180755615, 'logits/rejected': -0.47170525789260864, 'epoch': 0.2} + 20%|██ | 137/681 [05:49<23:10, 2.56s/it] 20%|██ | 138/681 [05:51<22:28, 2.48s/it] {'loss': 1.0558, 'grad_norm': 21.79376220703125, 'learning_rate': 4.849231551964771e-07, 'fcm_dpo/beta': 0.007785219699144363, 'fcm_dpo/q_t': 0.3965170383453369, 'fcm_dpo/delta': -0.054885830730199814, 'fcm_dpo/margin': 58.114654541015625, 'margin_dpo/margin_mean': 58.114654541015625, 'margin_dpo/margin_std': 71.69284057617188, 'logps/chosen': -119.43486022949219, 'logps/rejected': -210.226806640625, 'logps/ref_chosen': -60.519649505615234, 'logps/ref_rejected': -93.19694519042969, 'KL/chosen_KL_mean': -58.91520690917969, 'KL/rejected_KL_mean': -117.02986145019531, 'KL/mean': -87.9725341796875, 'KL/std': 70.14852905273438, 'logits/chosen': -0.4243091940879822, 'logits/rejected': -0.41211992502212524, 'epoch': 0.2} + 20%|██ | 138/681 [05:51<22:28, 2.48s/it] 20%|██ | 139/681 [05:53<22:14, 2.46s/it] {'loss': 1.0187, 'grad_norm': 18.322908401489258, 'learning_rate': 4.844811370781446e-07, 'fcm_dpo/beta': 0.0076102884486317635, 'fcm_dpo/q_t': 0.382676362991333, 'fcm_dpo/delta': -0.13750019669532776, 'fcm_dpo/margin': 69.69491577148438, 'margin_dpo/margin_mean': 69.6949234008789, 'margin_dpo/margin_std': 86.69469451904297, 'logps/chosen': -96.80577087402344, 'logps/rejected': -199.33729553222656, 'logps/ref_chosen': -46.89138412475586, 'logps/ref_rejected': -79.72798156738281, 'KL/chosen_KL_mean': -49.914390563964844, 'KL/rejected_KL_mean': -119.60931396484375, 'KL/mean': -84.76184844970703, 'KL/std': 66.84112548828125, 'logits/chosen': -0.44147640466690063, 'logits/rejected': -0.4316656291484833, 'epoch': 0.2} + 20%|██ | 139/681 [05:53<22:14, 2.46s/it] 21%|██ | 140/681 [05:56<21:53, 2.43s/it] {'loss': 1.0674, 'grad_norm': 22.85267448425293, 'learning_rate': 4.840329401637809e-07, 'fcm_dpo/beta': 0.00744934706017375, 'fcm_dpo/q_t': 0.3951931893825531, 'fcm_dpo/delta': -0.075960174202919, 'fcm_dpo/margin': 63.40184783935547, 'margin_dpo/margin_mean': 63.40184783935547, 'margin_dpo/margin_std': 89.21878051757812, 'logps/chosen': -119.39387512207031, 'logps/rejected': -207.1051025390625, 'logps/ref_chosen': -58.97471618652344, 'logps/ref_rejected': -83.28410339355469, 'KL/chosen_KL_mean': -60.419158935546875, 'KL/rejected_KL_mean': -123.82099914550781, 'KL/mean': -92.12008666992188, 'KL/std': 74.5600814819336, 'logits/chosen': -0.44698405265808105, 'logits/rejected': -0.43398311734199524, 'epoch': 0.21} + 21%|██ | 140/681 [05:56<21:53, 2.43s/it] 21%|██ | 141/681 [05:58<22:18, 2.48s/it] {'loss': 1.1057, 'grad_norm': 27.14394760131836, 'learning_rate': 4.83578576263792e-07, 'fcm_dpo/beta': 0.007387247867882252, 'fcm_dpo/q_t': 0.4012778103351593, 'fcm_dpo/delta': -0.04261501878499985, 'fcm_dpo/margin': 59.66469955444336, 'margin_dpo/margin_mean': 59.664695739746094, 'margin_dpo/margin_std': 95.55046844482422, 'logps/chosen': -143.8526611328125, 'logps/rejected': -226.63394165039062, 'logps/ref_chosen': -75.07566833496094, 'logps/ref_rejected': -98.1922607421875, 'KL/chosen_KL_mean': -68.77699279785156, 'KL/rejected_KL_mean': -128.44168090820312, 'KL/mean': -98.60934448242188, 'KL/std': 82.50788879394531, 'logits/chosen': -0.43428778648376465, 'logits/rejected': -0.4219193756580353, 'epoch': 0.21} + 21%|██ | 141/681 [05:58<22:18, 2.48s/it] 21%|██ | 142/681 [06:01<22:58, 2.56s/it] {'loss': 1.0853, 'grad_norm': 28.602947235107422, 'learning_rate': 4.83118057351089e-07, 'fcm_dpo/beta': 0.007280835881829262, 'fcm_dpo/q_t': 0.39226555824279785, 'fcm_dpo/delta': -0.10598242282867432, 'fcm_dpo/margin': 68.77471923828125, 'margin_dpo/margin_mean': 68.77471923828125, 'margin_dpo/margin_std': 106.35022735595703, 'logps/chosen': -128.18289184570312, 'logps/rejected': -233.51190185546875, 'logps/ref_chosen': -58.027931213378906, 'logps/ref_rejected': -94.58222961425781, 'KL/chosen_KL_mean': -70.15496826171875, 'KL/rejected_KL_mean': -138.9296875, 'KL/mean': -104.54232025146484, 'KL/std': 90.10287475585938, 'logits/chosen': -0.4074459671974182, 'logits/rejected': -0.4063273072242737, 'epoch': 0.21} + 21%|██ | 142/681 [06:01<22:58, 2.56s/it] 21%|██ | 143/681 [06:04<23:27, 2.62s/it] {'loss': 1.1982, 'grad_norm': 24.429250717163086, 'learning_rate': 4.826513955607734e-07, 'fcm_dpo/beta': 0.007270464673638344, 'fcm_dpo/q_t': 0.43221598863601685, 'fcm_dpo/delta': 0.08424904197454453, 'fcm_dpo/margin': 43.79881286621094, 'margin_dpo/margin_mean': 43.79881286621094, 'margin_dpo/margin_std': 92.73600769042969, 'logps/chosen': -131.46389770507812, 'logps/rejected': -196.6658172607422, 'logps/ref_chosen': -57.59645080566406, 'logps/ref_rejected': -78.99957275390625, 'KL/chosen_KL_mean': -73.86743927001953, 'KL/rejected_KL_mean': -117.66624450683594, 'KL/mean': -95.766845703125, 'KL/std': 79.29611206054688, 'logits/chosen': -0.41874316334724426, 'logits/rejected': -0.41285938024520874, 'epoch': 0.21} + 21%|██ | 143/681 [06:04<23:27, 2.62s/it] 21%|██ | 144/681 [06:06<23:52, 2.67s/it] {'loss': 1.1061, 'grad_norm': 20.96710205078125, 'learning_rate': 4.821786031898176e-07, 'fcm_dpo/beta': 0.007330943364650011, 'fcm_dpo/q_t': 0.41074827313423157, 'fcm_dpo/delta': 0.005903269629925489, 'fcm_dpo/margin': 53.788116455078125, 'margin_dpo/margin_mean': 53.788116455078125, 'margin_dpo/margin_std': 77.68215942382812, 'logps/chosen': -124.40365600585938, 'logps/rejected': -200.28567504882812, 'logps/ref_chosen': -59.90636444091797, 'logps/ref_rejected': -82.00025939941406, 'KL/chosen_KL_mean': -64.49729919433594, 'KL/rejected_KL_mean': -118.28541564941406, 'KL/mean': -91.391357421875, 'KL/std': 66.07506561279297, 'logits/chosen': -0.42471039295196533, 'logits/rejected': -0.41297537088394165, 'epoch': 0.21} + 21%|██ | 144/681 [06:06<23:52, 2.67s/it] 21%|██▏ | 145/681 [06:09<23:22, 2.62s/it] {'loss': 1.0875, 'grad_norm': 24.383813858032227, 'learning_rate': 4.816996926967401e-07, 'fcm_dpo/beta': 0.007309791631996632, 'fcm_dpo/q_t': 0.40421411395072937, 'fcm_dpo/delta': -0.02330685406923294, 'fcm_dpo/margin': 57.771766662597656, 'margin_dpo/margin_mean': 57.771766662597656, 'margin_dpo/margin_std': 80.48458862304688, 'logps/chosen': -118.1037826538086, 'logps/rejected': -197.14120483398438, 'logps/ref_chosen': -56.60066604614258, 'logps/ref_rejected': -77.86631774902344, 'KL/chosen_KL_mean': -61.503116607666016, 'KL/rejected_KL_mean': -119.27488708496094, 'KL/mean': -90.38899993896484, 'KL/std': 66.21023559570312, 'logits/chosen': -0.4303405284881592, 'logits/rejected': -0.41304582357406616, 'epoch': 0.21} + 21%|██▏ | 145/681 [06:09<23:22, 2.62s/it] 21%|██▏ | 146/681 [06:11<23:08, 2.60s/it] {'loss': 1.1839, 'grad_norm': 27.966459274291992, 'learning_rate': 4.812146767012779e-07, 'fcm_dpo/beta': 0.007366587873548269, 'fcm_dpo/q_t': 0.4255162477493286, 'fcm_dpo/delta': 0.07394760102033615, 'fcm_dpo/margin': 44.597129821777344, 'margin_dpo/margin_mean': 44.597129821777344, 'margin_dpo/margin_std': 87.12408447265625, 'logps/chosen': -150.2462158203125, 'logps/rejected': -210.54568481445312, 'logps/ref_chosen': -66.00045013427734, 'logps/ref_rejected': -81.70278930664062, 'KL/chosen_KL_mean': -84.24576568603516, 'KL/rejected_KL_mean': -128.8428955078125, 'KL/mean': -106.54432678222656, 'KL/std': 72.23680877685547, 'logits/chosen': -0.4398476481437683, 'logits/rejected': -0.4156040847301483, 'epoch': 0.21} + 21%|██▏ | 146/681 [06:12<23:08, 2.60s/it] 22%|██▏ | 147/681 [06:14<23:08, 2.60s/it] {'loss': 1.09, 'grad_norm': 19.574729919433594, 'learning_rate': 4.807235679840536e-07, 'fcm_dpo/beta': 0.007349137216806412, 'fcm_dpo/q_t': 0.40209323167800903, 'fcm_dpo/delta': -0.045649539679288864, 'fcm_dpo/margin': 60.3402214050293, 'margin_dpo/margin_mean': 60.34022521972656, 'margin_dpo/margin_std': 90.08858489990234, 'logps/chosen': -114.66069793701172, 'logps/rejected': -192.98605346679688, 'logps/ref_chosen': -53.405487060546875, 'logps/ref_rejected': -71.39060974121094, 'KL/chosen_KL_mean': -61.255210876464844, 'KL/rejected_KL_mean': -121.59544372558594, 'KL/mean': -91.42532348632812, 'KL/std': 72.65022277832031, 'logits/chosen': -0.4582536816596985, 'logits/rejected': -0.43836987018585205, 'epoch': 0.22} + 22%|██▏ | 147/681 [06:14<23:08, 2.60s/it] 22%|██▏ | 148/681 [06:17<23:01, 2.59s/it] {'loss': 1.1251, 'grad_norm': 19.111360549926758, 'learning_rate': 4.802263794862384e-07, 'fcm_dpo/beta': 0.007287460379302502, 'fcm_dpo/q_t': 0.4167160391807556, 'fcm_dpo/delta': -0.0821787416934967, 'fcm_dpo/margin': 51.533233642578125, 'margin_dpo/margin_mean': 51.533233642578125, 'margin_dpo/margin_std': 75.76239776611328, 'logps/chosen': -124.65718078613281, 'logps/rejected': -214.34716796875, 'logps/ref_chosen': -64.93708038330078, 'logps/ref_rejected': -103.09384155273438, 'KL/chosen_KL_mean': -59.720096588134766, 'KL/rejected_KL_mean': -111.25332641601562, 'KL/mean': -85.48670959472656, 'KL/std': 71.65913391113281, 'logits/chosen': -0.5154159665107727, 'logits/rejected': -0.5085688829421997, 'epoch': 0.22} + 22%|██▏ | 148/681 [06:17<23:01, 2.59s/it] 22%|██▏ | 149/681 [06:19<23:14, 2.62s/it] {'loss': 1.0534, 'grad_norm': 17.82282257080078, 'learning_rate': 4.797231243092118e-07, 'fcm_dpo/beta': 0.007102725096046925, 'fcm_dpo/q_t': 0.39505213499069214, 'fcm_dpo/delta': -0.0634830892086029, 'fcm_dpo/margin': 64.60050964355469, 'margin_dpo/margin_mean': 64.60050964355469, 'margin_dpo/margin_std': 77.06610870361328, 'logps/chosen': -115.34933471679688, 'logps/rejected': -220.79083251953125, 'logps/ref_chosen': -58.47376251220703, 'logps/ref_rejected': -99.31474304199219, 'KL/chosen_KL_mean': -56.87557601928711, 'KL/rejected_KL_mean': -121.4760971069336, 'KL/mean': -89.17583465576172, 'KL/std': 65.12533569335938, 'logits/chosen': -0.49969860911369324, 'logits/rejected': -0.4846518933773041, 'epoch': 0.22} + 22%|██▏ | 149/681 [06:19<23:14, 2.62s/it] 22%|██▏ | 150/681 [06:22<23:07, 2.61s/it] {'loss': 1.0814, 'grad_norm': 18.326509475708008, 'learning_rate': 4.792138157142157e-07, 'fcm_dpo/beta': 0.007035818882286549, 'fcm_dpo/q_t': 0.40432196855545044, 'fcm_dpo/delta': -0.04736195132136345, 'fcm_dpo/margin': 63.096431732177734, 'margin_dpo/margin_mean': 63.096431732177734, 'margin_dpo/margin_std': 91.71922302246094, 'logps/chosen': -96.11625671386719, 'logps/rejected': -196.85446166992188, 'logps/ref_chosen': -45.705810546875, 'logps/ref_rejected': -83.34759521484375, 'KL/chosen_KL_mean': -50.41044235229492, 'KL/rejected_KL_mean': -113.50686645507812, 'KL/mean': -81.95865631103516, 'KL/std': 76.46592712402344, 'logits/chosen': -0.45320773124694824, 'logits/rejected': -0.456167995929718, 'epoch': 0.22} + 22%|██▏ | 150/681 [06:22<23:07, 2.61s/it] 22%|██▏ | 151/681 [06:24<22:31, 2.55s/it] {'loss': 1.0626, 'grad_norm': 21.404926300048828, 'learning_rate': 4.786984671220053e-07, 'fcm_dpo/beta': 0.007020828314125538, 'fcm_dpo/q_t': 0.3986474275588989, 'fcm_dpo/delta': -0.04382166266441345, 'fcm_dpo/margin': 62.936920166015625, 'margin_dpo/margin_mean': 62.936920166015625, 'margin_dpo/margin_std': 78.73812866210938, 'logps/chosen': -133.166015625, 'logps/rejected': -225.99591064453125, 'logps/ref_chosen': -70.57083129882812, 'logps/ref_rejected': -100.46382141113281, 'KL/chosen_KL_mean': -62.595176696777344, 'KL/rejected_KL_mean': -125.53208923339844, 'KL/mean': -94.06363677978516, 'KL/std': 71.80149841308594, 'logits/chosen': -0.5180387496948242, 'logits/rejected': -0.4879586100578308, 'epoch': 0.22} + 22%|██▏ | 151/681 [06:24<22:31, 2.55s/it] 22%|██▏ | 152/681 [06:27<22:45, 2.58s/it] {'loss': 1.017, 'grad_norm': 20.40810203552246, 'learning_rate': 4.78177092112495e-07, 'fcm_dpo/beta': 0.006885044276714325, 'fcm_dpo/q_t': 0.3824828267097473, 'fcm_dpo/delta': -0.13378563523292542, 'fcm_dpo/margin': 76.53138732910156, 'margin_dpo/margin_mean': 76.53138732910156, 'margin_dpo/margin_std': 90.15208435058594, 'logps/chosen': -114.58157348632812, 'logps/rejected': -237.08901977539062, 'logps/ref_chosen': -60.16438674926758, 'logps/ref_rejected': -106.14045715332031, 'KL/chosen_KL_mean': -54.41718292236328, 'KL/rejected_KL_mean': -130.9485626220703, 'KL/mean': -92.68287658691406, 'KL/std': 73.81169128417969, 'logits/chosen': -0.4824361205101013, 'logits/rejected': -0.47918662428855896, 'epoch': 0.22} + 22%|██▏ | 152/681 [06:27<22:45, 2.58s/it] 22%|██▏ | 153/681 [06:30<22:44, 2.58s/it] {'loss': 1.0909, 'grad_norm': 15.532352447509766, 'learning_rate': 4.776497044244016e-07, 'fcm_dpo/beta': 0.006790122948586941, 'fcm_dpo/q_t': 0.40387213230133057, 'fcm_dpo/delta': -0.044522788375616074, 'fcm_dpo/margin': 65.17752075195312, 'margin_dpo/margin_mean': 65.17752075195312, 'margin_dpo/margin_std': 99.14810180664062, 'logps/chosen': -112.28076934814453, 'logps/rejected': -206.79884338378906, 'logps/ref_chosen': -56.315277099609375, 'logps/ref_rejected': -85.65583801269531, 'KL/chosen_KL_mean': -55.965492248535156, 'KL/rejected_KL_mean': -121.14301300048828, 'KL/mean': -88.55425262451172, 'KL/std': 80.69013977050781, 'logits/chosen': -0.49872875213623047, 'logits/rejected': -0.4938894510269165, 'epoch': 0.22} + 22%|██▏ | 153/681 [06:30<22:44, 2.58s/it] 23%|██▎ | 154/681 [06:32<22:57, 2.61s/it] {'loss': 1.1206, 'grad_norm': 18.9123592376709, 'learning_rate': 4.771163179548808e-07, 'fcm_dpo/beta': 0.006765860132873058, 'fcm_dpo/q_t': 0.4070885479450226, 'fcm_dpo/delta': -0.023557795211672783, 'fcm_dpo/margin': 62.42070007324219, 'margin_dpo/margin_mean': 62.42070388793945, 'margin_dpo/margin_std': 102.98345947265625, 'logps/chosen': -129.31759643554688, 'logps/rejected': -233.2399139404297, 'logps/ref_chosen': -62.74256896972656, 'logps/ref_rejected': -104.24420166015625, 'KL/chosen_KL_mean': -66.57502746582031, 'KL/rejected_KL_mean': -128.99571228027344, 'KL/mean': -97.78536987304688, 'KL/std': 80.61070251464844, 'logits/chosen': -0.4788493514060974, 'logits/rejected': -0.48109960556030273, 'epoch': 0.23} + 23%|██▎ | 154/681 [06:32<22:57, 2.61s/it] 23%|██▎ | 155/681 [06:35<22:55, 2.62s/it] {'loss': 1.0946, 'grad_norm': 19.345184326171875, 'learning_rate': 4.7657694675916247e-07, 'fcm_dpo/beta': 0.006722897756844759, 'fcm_dpo/q_t': 0.4044472575187683, 'fcm_dpo/delta': -0.026448355987668037, 'fcm_dpo/margin': 63.251991271972656, 'margin_dpo/margin_mean': 63.25199890136719, 'margin_dpo/margin_std': 93.20378112792969, 'logps/chosen': -121.7184066772461, 'logps/rejected': -201.80941772460938, 'logps/ref_chosen': -60.65318298339844, 'logps/ref_rejected': -77.49220275878906, 'KL/chosen_KL_mean': -61.065223693847656, 'KL/rejected_KL_mean': -124.31721496582031, 'KL/mean': -92.69122314453125, 'KL/std': 75.25825500488281, 'logits/chosen': -0.4656848907470703, 'logits/rejected': -0.44397997856140137, 'epoch': 0.23} + 23%|██▎ | 155/681 [06:35<22:55, 2.62s/it] 23%|██▎ | 156/681 [06:38<23:09, 2.65s/it] {'loss': 1.2725, 'grad_norm': 29.024635314941406, 'learning_rate': 4.7603160505017893e-07, 'fcm_dpo/beta': 0.0067663900554180145, 'fcm_dpo/q_t': 0.44376257061958313, 'fcm_dpo/delta': 0.05330243334174156, 'fcm_dpo/margin': 36.699989318847656, 'margin_dpo/margin_mean': 36.699989318847656, 'margin_dpo/margin_std': 105.47511291503906, 'logps/chosen': -155.34820556640625, 'logps/rejected': -199.72561645507812, 'logps/ref_chosen': -69.49188232421875, 'logps/ref_rejected': -77.16929626464844, 'KL/chosen_KL_mean': -85.85633850097656, 'KL/rejected_KL_mean': -122.55632019042969, 'KL/mean': -104.20632934570312, 'KL/std': 80.0552978515625, 'logits/chosen': -0.41135138273239136, 'logits/rejected': -0.40221792459487915, 'epoch': 0.23} + 23%|██▎ | 156/681 [06:38<23:09, 2.65s/it] 23%|██▎ | 157/681 [06:40<22:25, 2.57s/it] {'loss': 1.0299, 'grad_norm': 23.550922393798828, 'learning_rate': 4.7548030719819154e-07, 'fcm_dpo/beta': 0.0065932744182646275, 'fcm_dpo/q_t': 0.37946271896362305, 'fcm_dpo/delta': -0.13586004078388214, 'fcm_dpo/margin': 79.97874450683594, 'margin_dpo/margin_mean': 79.97874450683594, 'margin_dpo/margin_std': 98.83998107910156, 'logps/chosen': -139.48214721679688, 'logps/rejected': -265.73883056640625, 'logps/ref_chosen': -61.368438720703125, 'logps/ref_rejected': -107.64636993408203, 'KL/chosen_KL_mean': -78.11371612548828, 'KL/rejected_KL_mean': -158.09246826171875, 'KL/mean': -118.10308837890625, 'KL/std': 87.00740051269531, 'logits/chosen': -0.40428751707077026, 'logits/rejected': -0.4115862250328064, 'epoch': 0.23} + 23%|██▎ | 157/681 [06:40<22:25, 2.57s/it] 23%|██▎ | 158/681 [06:43<22:29, 2.58s/it] {'loss': 1.0558, 'grad_norm': 20.167179107666016, 'learning_rate': 4.7492306773041136e-07, 'fcm_dpo/beta': 0.006434428971260786, 'fcm_dpo/q_t': 0.38724130392074585, 'fcm_dpo/delta': -0.15647649765014648, 'fcm_dpo/margin': 85.15372467041016, 'margin_dpo/margin_mean': 85.15372467041016, 'margin_dpo/margin_std': 129.78738403320312, 'logps/chosen': -135.62876892089844, 'logps/rejected': -276.8642578125, 'logps/ref_chosen': -57.612918853759766, 'logps/ref_rejected': -113.6946792602539, 'KL/chosen_KL_mean': -78.0158462524414, 'KL/rejected_KL_mean': -163.16958618164062, 'KL/mean': -120.59271240234375, 'KL/std': 107.82058715820312, 'logits/chosen': -0.3908860683441162, 'logits/rejected': -0.40809518098831177, 'epoch': 0.23} + 23%|██▎ | 158/681 [06:43<22:29, 2.58s/it] 23%|██▎ | 159/681 [06:45<22:35, 2.60s/it] {'loss': 1.1488, 'grad_norm': 22.233327865600586, 'learning_rate': 4.743599013306165e-07, 'fcm_dpo/beta': 0.006435505114495754, 'fcm_dpo/q_t': 0.4166509807109833, 'fcm_dpo/delta': 0.026020796969532967, 'fcm_dpo/margin': 58.185665130615234, 'margin_dpo/margin_mean': 58.1856689453125, 'margin_dpo/margin_std': 102.5018310546875, 'logps/chosen': -169.90928649902344, 'logps/rejected': -235.43331909179688, 'logps/ref_chosen': -81.56034851074219, 'logps/ref_rejected': -88.89871215820312, 'KL/chosen_KL_mean': -88.34893798828125, 'KL/rejected_KL_mean': -146.5346221923828, 'KL/mean': -117.44178771972656, 'KL/std': 95.12521362304688, 'logits/chosen': -0.3991559147834778, 'logits/rejected': -0.36633995175361633, 'epoch': 0.23} + 23%|██▎ | 159/681 [06:45<22:35, 2.60s/it] 23%|██▎ | 160/681 [06:48<22:19, 2.57s/it] {'loss': 1.0906, 'grad_norm': 23.24005126953125, 'learning_rate': 4.737908228387656e-07, 'fcm_dpo/beta': 0.00629377830773592, 'fcm_dpo/q_t': 0.3964502215385437, 'fcm_dpo/delta': -0.09926701337099075, 'fcm_dpo/margin': 78.42352294921875, 'margin_dpo/margin_mean': 78.42352294921875, 'margin_dpo/margin_std': 126.56271362304688, 'logps/chosen': -155.90780639648438, 'logps/rejected': -265.8182678222656, 'logps/ref_chosen': -65.73088073730469, 'logps/ref_rejected': -97.21781921386719, 'KL/chosen_KL_mean': -90.17693328857422, 'KL/rejected_KL_mean': -168.6004638671875, 'KL/mean': -129.38868713378906, 'KL/std': 101.24613952636719, 'logits/chosen': -0.3764195144176483, 'logits/rejected': -0.3670395612716675, 'epoch': 0.23} + 23%|██▎ | 160/681 [06:48<22:19, 2.57s/it] 24%|██▎ | 161/681 [06:50<21:25, 2.47s/it] {'loss': 1.0934, 'grad_norm': 21.411426544189453, 'learning_rate': 4.7321584725060594e-07, 'fcm_dpo/beta': 0.006256973836570978, 'fcm_dpo/q_t': 0.40471774339675903, 'fcm_dpo/delta': -0.03292801231145859, 'fcm_dpo/margin': 68.96272277832031, 'margin_dpo/margin_mean': 68.96272277832031, 'margin_dpo/margin_std': 102.0528564453125, 'logps/chosen': -129.27838134765625, 'logps/rejected': -229.23558044433594, 'logps/ref_chosen': -52.43647003173828, 'logps/ref_rejected': -83.43095397949219, 'KL/chosen_KL_mean': -76.84190368652344, 'KL/rejected_KL_mean': -145.80462646484375, 'KL/mean': -111.3232650756836, 'KL/std': 80.283203125, 'logits/chosen': -0.34909725189208984, 'logits/rejected': -0.34455615282058716, 'epoch': 0.24} + 24%|██▎ | 161/681 [06:50<21:25, 2.47s/it] 24%|██▍ | 162/681 [06:53<22:05, 2.55s/it] {'loss': 1.1078, 'grad_norm': 22.422332763671875, 'learning_rate': 4.7263498971727905e-07, 'fcm_dpo/beta': 0.006169519387185574, 'fcm_dpo/q_t': 0.4066374897956848, 'fcm_dpo/delta': -0.02764543890953064, 'fcm_dpo/margin': 68.85668182373047, 'margin_dpo/margin_mean': 68.85668182373047, 'margin_dpo/margin_std': 106.42919158935547, 'logps/chosen': -137.02633666992188, 'logps/rejected': -232.6630096435547, 'logps/ref_chosen': -62.6105842590332, 'logps/ref_rejected': -89.39057922363281, 'KL/chosen_KL_mean': -74.41575622558594, 'KL/rejected_KL_mean': -143.27243041992188, 'KL/mean': -108.84408569335938, 'KL/std': 89.80424499511719, 'logits/chosen': -0.44204244017601013, 'logits/rejected': -0.4256909489631653, 'epoch': 0.24} + 24%|██▍ | 162/681 [06:53<22:05, 2.55s/it] 24%|██▍ | 163/681 [06:55<22:01, 2.55s/it] {'loss': 1.1151, 'grad_norm': 20.55537223815918, 'learning_rate': 4.720482655449212e-07, 'fcm_dpo/beta': 0.006201374344527721, 'fcm_dpo/q_t': 0.40968990325927734, 'fcm_dpo/delta': -0.013848692178726196, 'fcm_dpo/margin': 66.64288330078125, 'margin_dpo/margin_mean': 66.64288330078125, 'margin_dpo/margin_std': 107.60564422607422, 'logps/chosen': -138.04464721679688, 'logps/rejected': -225.0841064453125, 'logps/ref_chosen': -55.021629333496094, 'logps/ref_rejected': -75.418212890625, 'KL/chosen_KL_mean': -83.02301025390625, 'KL/rejected_KL_mean': -149.6658935546875, 'KL/mean': -116.34445190429688, 'KL/std': 89.37168884277344, 'logits/chosen': -0.37833207845687866, 'logits/rejected': -0.3598299026489258, 'epoch': 0.24} + 24%|██▍ | 163/681 [06:55<22:01, 2.55s/it] 24%|██▍ | 164/681 [06:58<21:53, 2.54s/it] {'loss': 1.0355, 'grad_norm': 23.66642189025879, 'learning_rate': 4.714556901942599e-07, 'fcm_dpo/beta': 0.006058148108422756, 'fcm_dpo/q_t': 0.38800323009490967, 'fcm_dpo/delta': -0.10455699265003204, 'fcm_dpo/margin': 82.15338134765625, 'margin_dpo/margin_mean': 82.15338134765625, 'margin_dpo/margin_std': 101.20204162597656, 'logps/chosen': -131.18165588378906, 'logps/rejected': -237.3590087890625, 'logps/ref_chosen': -55.64066696166992, 'logps/ref_rejected': -79.66463470458984, 'KL/chosen_KL_mean': -75.54098510742188, 'KL/rejected_KL_mean': -157.69436645507812, 'KL/mean': -116.61768341064453, 'KL/std': 87.67422485351562, 'logits/chosen': -0.38808923959732056, 'logits/rejected': -0.3748926520347595, 'epoch': 0.24} + 24%|██▍ | 164/681 [06:58<21:53, 2.54s/it] 24%|██▍ | 165/681 [07:00<21:51, 2.54s/it] {'loss': 1.1772, 'grad_norm': 24.392139434814453, 'learning_rate': 4.708572792802069e-07, 'fcm_dpo/beta': 0.006125118583440781, 'fcm_dpo/q_t': 0.42784789204597473, 'fcm_dpo/delta': 0.08051982522010803, 'fcm_dpo/margin': 52.59346008300781, 'margin_dpo/margin_mean': 52.59346008300781, 'margin_dpo/margin_std': 99.10749053955078, 'logps/chosen': -143.25941467285156, 'logps/rejected': -208.21279907226562, 'logps/ref_chosen': -61.310691833496094, 'logps/ref_rejected': -73.67060852050781, 'KL/chosen_KL_mean': -81.94873046875, 'KL/rejected_KL_mean': -134.54220581054688, 'KL/mean': -108.24546813964844, 'KL/std': 74.16279602050781, 'logits/chosen': -0.3788298964500427, 'logits/rejected': -0.3494594097137451, 'epoch': 0.24} + 24%|██▍ | 165/681 [07:00<21:51, 2.54s/it] 24%|██▍ | 166/681 [07:03<21:13, 2.47s/it] {'loss': 1.0179, 'grad_norm': 17.77814292907715, 'learning_rate': 4.702530485714461e-07, 'fcm_dpo/beta': 0.00596030056476593, 'fcm_dpo/q_t': 0.3804228901863098, 'fcm_dpo/delta': -0.19864240288734436, 'fcm_dpo/margin': 98.27557373046875, 'margin_dpo/margin_mean': 98.27557373046875, 'margin_dpo/margin_std': 137.25851440429688, 'logps/chosen': -123.1624755859375, 'logps/rejected': -268.5495910644531, 'logps/ref_chosen': -50.98360061645508, 'logps/ref_rejected': -98.09512329101562, 'KL/chosen_KL_mean': -72.17887115478516, 'KL/rejected_KL_mean': -170.4544677734375, 'KL/mean': -121.31666564941406, 'KL/std': 108.35174560546875, 'logits/chosen': -0.3884061574935913, 'logits/rejected': -0.39925825595855713, 'epoch': 0.24} + 24%|██▍ | 166/681 [07:03<21:13, 2.47s/it] 25%|██▍ | 167/681 [07:05<21:31, 2.51s/it] {'loss': 0.9736, 'grad_norm': 18.200578689575195, 'learning_rate': 4.6964301399001877e-07, 'fcm_dpo/beta': 0.0057468172162771225, 'fcm_dpo/q_t': 0.36781153082847595, 'fcm_dpo/delta': -0.20526599884033203, 'fcm_dpo/margin': 103.19131469726562, 'margin_dpo/margin_mean': 103.19131469726562, 'margin_dpo/margin_std': 114.00679016113281, 'logps/chosen': -123.55524444580078, 'logps/rejected': -272.3529052734375, 'logps/ref_chosen': -50.424095153808594, 'logps/ref_rejected': -96.03042602539062, 'KL/chosen_KL_mean': -73.13114929199219, 'KL/rejected_KL_mean': -176.3224639892578, 'KL/mean': -124.726806640625, 'KL/std': 98.56492614746094, 'logits/chosen': -0.36232346296310425, 'logits/rejected': -0.36501890420913696, 'epoch': 0.25} + 25%|██▍ | 167/681 [07:05<21:31, 2.51s/it] 25%|██▍ | 168/681 [07:08<21:40, 2.54s/it] {'loss': 1.0797, 'grad_norm': 19.609725952148438, 'learning_rate': 4.690271916109034e-07, 'fcm_dpo/beta': 0.005651239771395922, 'fcm_dpo/q_t': 0.40402811765670776, 'fcm_dpo/delta': -0.029224606230854988, 'fcm_dpo/margin': 75.70437622070312, 'margin_dpo/margin_mean': 75.70437622070312, 'margin_dpo/margin_std': 102.8135986328125, 'logps/chosen': -129.17868041992188, 'logps/rejected': -230.72879028320312, 'logps/ref_chosen': -49.462825775146484, 'logps/ref_rejected': -75.30855560302734, 'KL/chosen_KL_mean': -79.71585083007812, 'KL/rejected_KL_mean': -155.42022705078125, 'KL/mean': -117.56804656982422, 'KL/std': 91.46189880371094, 'logits/chosen': -0.3544921278953552, 'logits/rejected': -0.3437988758087158, 'epoch': 0.25} + 25%|██▍ | 168/681 [07:08<21:40, 2.54s/it] 25%|██▍ | 169/681 [07:11<22:08, 2.59s/it] {'loss': 1.1611, 'grad_norm': 19.77760124206543, 'learning_rate': 4.6840559766159235e-07, 'fcm_dpo/beta': 0.005572349298745394, 'fcm_dpo/q_t': 0.42019665241241455, 'fcm_dpo/delta': -0.07357925921678543, 'fcm_dpo/margin': 66.46955108642578, 'margin_dpo/margin_mean': 66.46955108642578, 'margin_dpo/margin_std': 125.47772216796875, 'logps/chosen': -141.92974853515625, 'logps/rejected': -231.94158935546875, 'logps/ref_chosen': -59.803443908691406, 'logps/ref_rejected': -83.34574890136719, 'KL/chosen_KL_mean': -82.12629699707031, 'KL/rejected_KL_mean': -148.59585571289062, 'KL/mean': -115.36107635498047, 'KL/std': 91.35723876953125, 'logits/chosen': -0.3844687044620514, 'logits/rejected': -0.36855146288871765, 'epoch': 0.25} + 25%|██▍ | 169/681 [07:11<22:08, 2.59s/it] 25%|██▍ | 170/681 [07:13<22:30, 2.64s/it] {'loss': 1.077, 'grad_norm': 18.546783447265625, 'learning_rate': 4.6777824852166437e-07, 'fcm_dpo/beta': 0.005510912276804447, 'fcm_dpo/q_t': 0.4006960988044739, 'fcm_dpo/delta': -0.03551424294710159, 'fcm_dpo/margin': 78.5929946899414, 'margin_dpo/margin_mean': 78.5929946899414, 'margin_dpo/margin_std': 103.26647186279297, 'logps/chosen': -122.4301986694336, 'logps/rejected': -227.46875, 'logps/ref_chosen': -49.471771240234375, 'logps/ref_rejected': -75.91734313964844, 'KL/chosen_KL_mean': -72.95841979980469, 'KL/rejected_KL_mean': -151.55142211914062, 'KL/mean': -112.25492095947266, 'KL/std': 86.3768310546875, 'logits/chosen': -0.3328009247779846, 'logits/rejected': -0.321723610162735, 'epoch': 0.25} + 25%|██▍ | 170/681 [07:13<22:30, 2.64s/it] 25%|██▌ | 171/681 [07:16<21:38, 2.55s/it] {'loss': 1.1782, 'grad_norm': 27.314945220947266, 'learning_rate': 4.6714516072235273e-07, 'fcm_dpo/beta': 0.005571361631155014, 'fcm_dpo/q_t': 0.425686776638031, 'fcm_dpo/delta': 0.04921392351388931, 'fcm_dpo/margin': 63.276710510253906, 'margin_dpo/margin_mean': 63.276710510253906, 'margin_dpo/margin_std': 130.07742309570312, 'logps/chosen': -191.21240234375, 'logps/rejected': -279.37188720703125, 'logps/ref_chosen': -84.49931335449219, 'logps/ref_rejected': -109.38209533691406, 'KL/chosen_KL_mean': -106.71308135986328, 'KL/rejected_KL_mean': -169.98980712890625, 'KL/mean': -138.3514404296875, 'KL/std': 100.98884582519531, 'logits/chosen': -0.33173030614852905, 'logits/rejected': -0.3102639317512512, 'epoch': 0.25} + 25%|██▌ | 171/681 [07:16<21:38, 2.55s/it] 25%|██▌ | 172/681 [07:18<21:23, 2.52s/it] {'loss': 1.1342, 'grad_norm': 19.571752548217773, 'learning_rate': 4.6650635094610966e-07, 'fcm_dpo/beta': 0.005604305304586887, 'fcm_dpo/q_t': 0.4164371192455292, 'fcm_dpo/delta': 0.027874935418367386, 'fcm_dpo/margin': 66.58624267578125, 'margin_dpo/margin_mean': 66.58624267578125, 'margin_dpo/margin_std': 109.93568420410156, 'logps/chosen': -162.6361083984375, 'logps/rejected': -246.00509643554688, 'logps/ref_chosen': -68.65391540527344, 'logps/ref_rejected': -85.43667602539062, 'KL/chosen_KL_mean': -93.982177734375, 'KL/rejected_KL_mean': -160.56842041015625, 'KL/mean': -127.27529907226562, 'KL/std': 97.7683334350586, 'logits/chosen': -0.3909180760383606, 'logits/rejected': -0.3718334138393402, 'epoch': 0.25} + 25%|██▌ | 172/681 [07:18<21:23, 2.52s/it] 25%|██▌ | 173/681 [07:21<21:13, 2.51s/it] {'loss': 1.1093, 'grad_norm': 19.52805519104004, 'learning_rate': 4.6586183602616687e-07, 'fcm_dpo/beta': 0.00565545866265893, 'fcm_dpo/q_t': 0.4138815701007843, 'fcm_dpo/delta': 0.021153416484594345, 'fcm_dpo/margin': 67.0791015625, 'margin_dpo/margin_mean': 67.0791015625, 'margin_dpo/margin_std': 95.22209167480469, 'logps/chosen': -148.8457794189453, 'logps/rejected': -231.55792236328125, 'logps/ref_chosen': -63.050880432128906, 'logps/ref_rejected': -78.68392181396484, 'KL/chosen_KL_mean': -85.7948989868164, 'KL/rejected_KL_mean': -152.87399291992188, 'KL/mean': -119.3344497680664, 'KL/std': 91.15321350097656, 'logits/chosen': -0.38386523723602295, 'logits/rejected': -0.353518009185791, 'epoch': 0.25} + 25%|██▌ | 173/681 [07:21<21:13, 2.51s/it] 26%|██▌ | 174/681 [07:23<21:14, 2.51s/it] {'loss': 1.0871, 'grad_norm': 26.33484649658203, 'learning_rate': 4.652116329460919e-07, 'fcm_dpo/beta': 0.005634433589875698, 'fcm_dpo/q_t': 0.4018397331237793, 'fcm_dpo/delta': -0.04934954643249512, 'fcm_dpo/margin': 79.28968048095703, 'margin_dpo/margin_mean': 79.28968048095703, 'margin_dpo/margin_std': 115.50275421142578, 'logps/chosen': -135.46676635742188, 'logps/rejected': -263.3046875, 'logps/ref_chosen': -53.36296844482422, 'logps/ref_rejected': -101.91120910644531, 'KL/chosen_KL_mean': -82.10379791259766, 'KL/rejected_KL_mean': -161.3934783935547, 'KL/mean': -121.74862670898438, 'KL/std': 96.38128662109375, 'logits/chosen': -0.32500776648521423, 'logits/rejected': -0.3427908718585968, 'epoch': 0.26} + 26%|██▌ | 174/681 [07:23<21:14, 2.51s/it] 26%|██▌ | 175/681 [07:26<21:26, 2.54s/it] {'loss': 0.9587, 'grad_norm': 27.85973358154297, 'learning_rate': 4.645557588393406e-07, 'fcm_dpo/beta': 0.00541552621871233, 'fcm_dpo/q_t': 0.3663579821586609, 'fcm_dpo/delta': -0.19940567016601562, 'fcm_dpo/margin': 108.45330810546875, 'margin_dpo/margin_mean': 108.45330810546875, 'margin_dpo/margin_std': 108.14402770996094, 'logps/chosen': -120.93342590332031, 'logps/rejected': -273.47479248046875, 'logps/ref_chosen': -45.417762756347656, 'logps/ref_rejected': -89.50579833984375, 'KL/chosen_KL_mean': -75.51565551757812, 'KL/rejected_KL_mean': -183.96896362304688, 'KL/mean': -129.7423095703125, 'KL/std': 102.85943603515625, 'logits/chosen': -0.32464098930358887, 'logits/rejected': -0.31119775772094727, 'epoch': 0.26} + 26%|██▌ | 175/681 [07:26<21:26, 2.54s/it] 26%|██▌ | 176/681 [07:28<20:44, 2.46s/it] {'loss': 1.0459, 'grad_norm': 21.088232040405273, 'learning_rate': 4.638942309888058e-07, 'fcm_dpo/beta': 0.00530798826366663, 'fcm_dpo/q_t': 0.3935086727142334, 'fcm_dpo/delta': -0.08994344621896744, 'fcm_dpo/margin': 91.49755859375, 'margin_dpo/margin_mean': 91.49755859375, 'margin_dpo/margin_std': 118.77714538574219, 'logps/chosen': -131.6407470703125, 'logps/rejected': -268.24444580078125, 'logps/ref_chosen': -50.452842712402344, 'logps/ref_rejected': -95.5589599609375, 'KL/chosen_KL_mean': -81.18791198730469, 'KL/rejected_KL_mean': -172.68548583984375, 'KL/mean': -126.93669891357422, 'KL/std': 102.40462493896484, 'logits/chosen': -0.3157244324684143, 'logits/rejected': -0.3328602910041809, 'epoch': 0.26} + 26%|██▌ | 176/681 [07:28<20:44, 2.46s/it] 26%|██▌ | 177/681 [07:31<21:17, 2.53s/it] {'loss': 1.0546, 'grad_norm': 26.937484741210938, 'learning_rate': 4.6322706682636137e-07, 'fcm_dpo/beta': 0.005232410505414009, 'fcm_dpo/q_t': 0.3961235284805298, 'fcm_dpo/delta': -0.07259676605463028, 'fcm_dpo/margin': 89.67583465576172, 'margin_dpo/margin_mean': 89.67582702636719, 'margin_dpo/margin_std': 117.8981704711914, 'logps/chosen': -154.6217498779297, 'logps/rejected': -278.97491455078125, 'logps/ref_chosen': -61.216468811035156, 'logps/ref_rejected': -95.89378356933594, 'KL/chosen_KL_mean': -93.40528869628906, 'KL/rejected_KL_mean': -183.0811309814453, 'KL/mean': -138.24319458007812, 'KL/std': 109.86962127685547, 'logits/chosen': -0.3570261597633362, 'logits/rejected': -0.3483562469482422, 'epoch': 0.26} + 26%|██▌ | 177/681 [07:31<21:17, 2.53s/it] 26%|██▌ | 178/681 [07:33<21:04, 2.51s/it] {'loss': 0.9997, 'grad_norm': 24.992889404296875, 'learning_rate': 4.6255428393240354e-07, 'fcm_dpo/beta': 0.005035985726863146, 'fcm_dpo/q_t': 0.37551695108413696, 'fcm_dpo/delta': -0.18584051728248596, 'fcm_dpo/margin': 114.01716613769531, 'margin_dpo/margin_mean': 114.01716613769531, 'margin_dpo/margin_std': 141.47775268554688, 'logps/chosen': -160.82928466796875, 'logps/rejected': -321.947021484375, 'logps/ref_chosen': -58.26478958129883, 'logps/ref_rejected': -105.3653335571289, 'KL/chosen_KL_mean': -102.56449890136719, 'KL/rejected_KL_mean': -216.5816650390625, 'KL/mean': -159.57308959960938, 'KL/std': 129.33131408691406, 'logits/chosen': -0.2407420575618744, 'logits/rejected': -0.23280589282512665, 'epoch': 0.26} + 26%|██▌ | 178/681 [07:33<21:04, 2.51s/it] 26%|██▋ | 179/681 [07:36<21:33, 2.58s/it] {'loss': 1.1302, 'grad_norm': 26.21021270751953, 'learning_rate': 4.6187590003538724e-07, 'fcm_dpo/beta': 0.004978477954864502, 'fcm_dpo/q_t': 0.4102519154548645, 'fcm_dpo/delta': -0.007790856063365936, 'fcm_dpo/margin': 81.7546157836914, 'margin_dpo/margin_mean': 81.75462341308594, 'margin_dpo/margin_std': 138.115966796875, 'logps/chosen': -165.05535888671875, 'logps/rejected': -276.27947998046875, 'logps/ref_chosen': -61.05832290649414, 'logps/ref_rejected': -90.52782440185547, 'KL/chosen_KL_mean': -103.99703979492188, 'KL/rejected_KL_mean': -185.75167846679688, 'KL/mean': -144.8743438720703, 'KL/std': 110.77649688720703, 'logits/chosen': -0.2729560434818268, 'logits/rejected': -0.2793646454811096, 'epoch': 0.26} + 26%|██▋ | 179/681 [07:36<21:33, 2.58s/it] 26%|██▋ | 180/681 [07:38<21:06, 2.53s/it] {'loss': 1.0234, 'grad_norm': 17.1671142578125, 'learning_rate': 4.611919330113591e-07, 'fcm_dpo/beta': 0.004909820854663849, 'fcm_dpo/q_t': 0.38421761989593506, 'fcm_dpo/delta': -0.1142577975988388, 'fcm_dpo/margin': 103.52678680419922, 'margin_dpo/margin_mean': 103.52677917480469, 'margin_dpo/margin_std': 122.91793060302734, 'logps/chosen': -143.68246459960938, 'logps/rejected': -291.078369140625, 'logps/ref_chosen': -54.34272003173828, 'logps/ref_rejected': -98.21183776855469, 'KL/chosen_KL_mean': -89.33973693847656, 'KL/rejected_KL_mean': -192.86651611328125, 'KL/mean': -141.10313415527344, 'KL/std': 98.31314086914062, 'logits/chosen': -0.3321910500526428, 'logits/rejected': -0.32747143507003784, 'epoch': 0.26} + 26%|██▋ | 180/681 [07:38<21:06, 2.53s/it] 27%|██▋ | 181/681 [07:41<21:16, 2.55s/it] {'loss': 1.1666, 'grad_norm': 17.40216064453125, 'learning_rate': 4.605024008834863e-07, 'fcm_dpo/beta': 0.004953712224960327, 'fcm_dpo/q_t': 0.4273186922073364, 'fcm_dpo/delta': 0.08287452906370163, 'fcm_dpo/margin': 64.52621459960938, 'margin_dpo/margin_mean': 64.52621459960938, 'margin_dpo/margin_std': 114.01302337646484, 'logps/chosen': -132.30836486816406, 'logps/rejected': -203.49029541015625, 'logps/ref_chosen': -55.000457763671875, 'logps/ref_rejected': -61.656166076660156, 'KL/chosen_KL_mean': -77.30790710449219, 'KL/rejected_KL_mean': -141.83412170410156, 'KL/mean': -109.57101440429688, 'KL/std': 90.15087890625, 'logits/chosen': -0.3179657459259033, 'logits/rejected': -0.290554940700531, 'epoch': 0.27} + 27%|██▋ | 181/681 [07:41<21:16, 2.55s/it] 27%|██▋ | 182/681 [07:43<21:12, 2.55s/it] {'loss': 1.0114, 'grad_norm': 17.18657112121582, 'learning_rate': 4.598073218215817e-07, 'fcm_dpo/beta': 0.004840575158596039, 'fcm_dpo/q_t': 0.37746167182922363, 'fcm_dpo/delta': -0.1485544741153717, 'fcm_dpo/margin': 111.48786926269531, 'margin_dpo/margin_mean': 111.48786926269531, 'margin_dpo/margin_std': 131.83238220214844, 'logps/chosen': -114.70294952392578, 'logps/rejected': -274.60455322265625, 'logps/ref_chosen': -41.107852935791016, 'logps/ref_rejected': -89.5215835571289, 'KL/chosen_KL_mean': -73.5950927734375, 'KL/rejected_KL_mean': -185.08297729492188, 'KL/mean': -129.3390350341797, 'KL/std': 109.08509063720703, 'logits/chosen': -0.30293479561805725, 'logits/rejected': -0.3121221959590912, 'epoch': 0.27} + 27%|██▋ | 182/681 [07:44<21:12, 2.55s/it] 27%|██▋ | 183/681 [07:46<20:38, 2.49s/it] {'loss': 1.1804, 'grad_norm': 18.724519729614258, 'learning_rate': 4.5910671414162484e-07, 'fcm_dpo/beta': 0.004783437587320805, 'fcm_dpo/q_t': 0.43268686532974243, 'fcm_dpo/delta': -0.041149888187646866, 'fcm_dpo/margin': 60.38490295410156, 'margin_dpo/margin_mean': 60.38490295410156, 'margin_dpo/margin_std': 99.7965087890625, 'logps/chosen': -167.5782470703125, 'logps/rejected': -246.414306640625, 'logps/ref_chosen': -57.52456283569336, 'logps/ref_rejected': -75.97572326660156, 'KL/chosen_KL_mean': -110.05368041992188, 'KL/rejected_KL_mean': -170.43858337402344, 'KL/mean': -140.24612426757812, 'KL/std': 90.4064712524414, 'logits/chosen': -0.31450870633125305, 'logits/rejected': -0.30454862117767334, 'epoch': 0.27} + 27%|██▋ | 183/681 [07:46<20:38, 2.49s/it] 27%|██▋ | 184/681 [07:49<21:10, 2.56s/it] {'loss': 1.1707, 'grad_norm': 19.71858024597168, 'learning_rate': 4.5840059630527985e-07, 'fcm_dpo/beta': 0.00478300591930747, 'fcm_dpo/q_t': 0.43101605772972107, 'fcm_dpo/delta': -0.0009023167076520622, 'fcm_dpo/margin': 63.327796936035156, 'margin_dpo/margin_mean': 63.32780075073242, 'margin_dpo/margin_std': 107.8141098022461, 'logps/chosen': -151.58563232421875, 'logps/rejected': -233.0025634765625, 'logps/ref_chosen': -58.544952392578125, 'logps/ref_rejected': -76.63406372070312, 'KL/chosen_KL_mean': -93.04067993164062, 'KL/rejected_KL_mean': -156.3684844970703, 'KL/mean': -124.70458984375, 'KL/std': 85.67855834960938, 'logits/chosen': -0.34772413969039917, 'logits/rejected': -0.337999165058136, 'epoch': 0.27} + 27%|██▋ | 184/681 [07:49<21:10, 2.56s/it] 27%|██▋ | 185/681 [07:51<20:51, 2.52s/it] {'loss': 1.2331, 'grad_norm': 19.57623291015625, 'learning_rate': 4.5768898691940836e-07, 'fcm_dpo/beta': 0.004893806297332048, 'fcm_dpo/q_t': 0.446666955947876, 'fcm_dpo/delta': 0.1559191346168518, 'fcm_dpo/margin': 50.67546081542969, 'margin_dpo/margin_mean': 50.67546081542969, 'margin_dpo/margin_std': 120.71915435791016, 'logps/chosen': -163.92337036132812, 'logps/rejected': -226.3355712890625, 'logps/ref_chosen': -62.025848388671875, 'logps/ref_rejected': -73.7625961303711, 'KL/chosen_KL_mean': -101.89752960205078, 'KL/rejected_KL_mean': -152.57298278808594, 'KL/mean': -127.2352523803711, 'KL/std': 100.57600402832031, 'logits/chosen': -0.31452715396881104, 'logits/rejected': -0.29128819704055786, 'epoch': 0.27} + 27%|██▋ | 185/681 [07:51<20:51, 2.52s/it] 27%|██▋ | 186/681 [07:53<20:46, 2.52s/it] {'loss': 1.043, 'grad_norm': 28.687524795532227, 'learning_rate': 4.5697190473557947e-07, 'fcm_dpo/beta': 0.004873909987509251, 'fcm_dpo/q_t': 0.3932849168777466, 'fcm_dpo/delta': -0.0744955912232399, 'fcm_dpo/margin': 96.61160278320312, 'margin_dpo/margin_mean': 96.6115951538086, 'margin_dpo/margin_std': 116.60765075683594, 'logps/chosen': -162.64187622070312, 'logps/rejected': -277.97247314453125, 'logps/ref_chosen': -69.35346984863281, 'logps/ref_rejected': -88.07244873046875, 'KL/chosen_KL_mean': -93.28840637207031, 'KL/rejected_KL_mean': -189.89999389648438, 'KL/mean': -141.59420776367188, 'KL/std': 101.8707046508789, 'logits/chosen': -0.36662542819976807, 'logits/rejected': -0.3481537103652954, 'epoch': 0.27} + 27%|██▋ | 186/681 [07:54<20:46, 2.52s/it] 27%|██▋ | 187/681 [07:56<20:10, 2.45s/it] {'loss': 1.0963, 'grad_norm': 22.329120635986328, 'learning_rate': 4.5624936864957555e-07, 'fcm_dpo/beta': 0.004888523370027542, 'fcm_dpo/q_t': 0.41026172041893005, 'fcm_dpo/delta': 0.008520994335412979, 'fcm_dpo/margin': 80.06629943847656, 'margin_dpo/margin_mean': 80.06629943847656, 'margin_dpo/margin_std': 105.62159729003906, 'logps/chosen': -140.65951538085938, 'logps/rejected': -249.93846130371094, 'logps/ref_chosen': -52.7564582824707, 'logps/ref_rejected': -81.96910095214844, 'KL/chosen_KL_mean': -87.90306091308594, 'KL/rejected_KL_mean': -167.9693603515625, 'KL/mean': -127.93620300292969, 'KL/std': 96.8105697631836, 'logits/chosen': -0.3108750581741333, 'logits/rejected': -0.3043569326400757, 'epoch': 0.27} + 27%|██▋ | 187/681 [07:56<20:10, 2.45s/it] 28%|██▊ | 188/681 [07:58<20:25, 2.49s/it] {'loss': 1.0432, 'grad_norm': 30.24648094177246, 'learning_rate': 4.5552139770089454e-07, 'fcm_dpo/beta': 0.004818486049771309, 'fcm_dpo/q_t': 0.3935472369194031, 'fcm_dpo/delta': -0.07663469016551971, 'fcm_dpo/margin': 98.17579650878906, 'margin_dpo/margin_mean': 98.17579650878906, 'margin_dpo/margin_std': 119.95960998535156, 'logps/chosen': -132.5113525390625, 'logps/rejected': -270.81207275390625, 'logps/ref_chosen': -49.415489196777344, 'logps/ref_rejected': -89.54043579101562, 'KL/chosen_KL_mean': -83.09587097167969, 'KL/rejected_KL_mean': -181.27166748046875, 'KL/mean': -132.18377685546875, 'KL/std': 108.80839538574219, 'logits/chosen': -0.29885581135749817, 'logits/rejected': -0.3061617612838745, 'epoch': 0.28} + 28%|██▊ | 188/681 [07:58<20:25, 2.49s/it] 28%|██▊ | 189/681 [08:01<20:19, 2.48s/it] {'loss': 1.1307, 'grad_norm': 26.0447998046875, 'learning_rate': 4.5478801107224794e-07, 'fcm_dpo/beta': 0.004812294617295265, 'fcm_dpo/q_t': 0.41616469621658325, 'fcm_dpo/delta': 0.017675260081887245, 'fcm_dpo/margin': 79.58168029785156, 'margin_dpo/margin_mean': 79.58168029785156, 'margin_dpo/margin_std': 134.16441345214844, 'logps/chosen': -151.27183532714844, 'logps/rejected': -250.62191772460938, 'logps/ref_chosen': -52.39896011352539, 'logps/ref_rejected': -72.16735076904297, 'KL/chosen_KL_mean': -98.87287902832031, 'KL/rejected_KL_mean': -178.45455932617188, 'KL/mean': -138.66372680664062, 'KL/std': 109.66006469726562, 'logits/chosen': -0.3316497206687927, 'logits/rejected': -0.3185557723045349, 'epoch': 0.28} + 28%|██▊ | 189/681 [08:01<20:19, 2.48s/it] 28%|██▊ | 190/681 [08:03<19:46, 2.42s/it] {'loss': 1.0853, 'grad_norm': 18.914052963256836, 'learning_rate': 4.5404922808905543e-07, 'fcm_dpo/beta': 0.004810405895113945, 'fcm_dpo/q_t': 0.39865702390670776, 'fcm_dpo/delta': -0.0634998232126236, 'fcm_dpo/margin': 95.53245544433594, 'margin_dpo/margin_mean': 95.53245544433594, 'margin_dpo/margin_std': 137.739990234375, 'logps/chosen': -171.46310424804688, 'logps/rejected': -304.863037109375, 'logps/ref_chosen': -64.68305969238281, 'logps/ref_rejected': -102.55052185058594, 'KL/chosen_KL_mean': -106.78004455566406, 'KL/rejected_KL_mean': -202.3125, 'KL/mean': -154.5462646484375, 'KL/std': 118.31991577148438, 'logits/chosen': -0.3244064450263977, 'logits/rejected': -0.31318405270576477, 'epoch': 0.28} + 28%|██▊ | 190/681 [08:03<19:46, 2.42s/it] 28%|██▊ | 191/681 [08:06<20:33, 2.52s/it] {'loss': 0.955, 'grad_norm': 21.69601058959961, 'learning_rate': 4.5330506821893565e-07, 'fcm_dpo/beta': 0.004560886882245541, 'fcm_dpo/q_t': 0.3622073531150818, 'fcm_dpo/delta': -0.24714502692222595, 'fcm_dpo/margin': 138.17645263671875, 'margin_dpo/margin_mean': 138.17645263671875, 'margin_dpo/margin_std': 153.68263244628906, 'logps/chosen': -167.88194274902344, 'logps/rejected': -347.5391845703125, 'logps/ref_chosen': -68.65887451171875, 'logps/ref_rejected': -110.1396713256836, 'KL/chosen_KL_mean': -99.22306823730469, 'KL/rejected_KL_mean': -237.39950561523438, 'KL/mean': -168.31130981445312, 'KL/std': 138.23770141601562, 'logits/chosen': -0.3293009102344513, 'logits/rejected': -0.30673933029174805, 'epoch': 0.28} + 28%|██▊ | 191/681 [08:06<20:33, 2.52s/it] 28%|██▊ | 192/681 [08:08<20:25, 2.51s/it] {'loss': 1.1165, 'grad_norm': 25.06287956237793, 'learning_rate': 4.5255555107119336e-07, 'fcm_dpo/beta': 0.0045256055891513824, 'fcm_dpo/q_t': 0.4095039367675781, 'fcm_dpo/delta': -0.013612883165478706, 'fcm_dpo/margin': 91.24540710449219, 'margin_dpo/margin_mean': 91.24540710449219, 'margin_dpo/margin_std': 148.260009765625, 'logps/chosen': -197.106201171875, 'logps/rejected': -321.946044921875, 'logps/ref_chosen': -69.72691345214844, 'logps/ref_rejected': -103.32135009765625, 'KL/chosen_KL_mean': -127.37928009033203, 'KL/rejected_KL_mean': -218.62469482421875, 'KL/mean': -173.00198364257812, 'KL/std': 119.46993255615234, 'logits/chosen': -0.29608154296875, 'logits/rejected': -0.2969720959663391, 'epoch': 0.28} + 28%|██▊ | 192/681 [08:08<20:25, 2.51s/it] 28%|██▊ | 193/681 [08:11<20:06, 2.47s/it] {'loss': 1.2557, 'grad_norm': 29.155603408813477, 'learning_rate': 4.5180069639630236e-07, 'fcm_dpo/beta': 0.0045287711545825005, 'fcm_dpo/q_t': 0.44289711117744446, 'fcm_dpo/delta': 0.041177622973918915, 'fcm_dpo/margin': 53.04465103149414, 'margin_dpo/margin_mean': 53.04465103149414, 'margin_dpo/margin_std': 140.07958984375, 'logps/chosen': -185.99163818359375, 'logps/rejected': -255.25335693359375, 'logps/ref_chosen': -60.19049835205078, 'logps/ref_rejected': -76.40755462646484, 'KL/chosen_KL_mean': -125.8011474609375, 'KL/rejected_KL_mean': -178.84580993652344, 'KL/mean': -152.323486328125, 'KL/std': 109.30430603027344, 'logits/chosen': -0.3240417540073395, 'logits/rejected': -0.319375216960907, 'epoch': 0.28} + 28%|██▊ | 193/681 [08:11<20:06, 2.47s/it] 28%|██▊ | 194/681 [08:13<19:41, 2.43s/it] {'loss': 1.0819, 'grad_norm': 16.880617141723633, 'learning_rate': 4.510405240853854e-07, 'fcm_dpo/beta': 0.004540526773780584, 'fcm_dpo/q_t': 0.40839725732803345, 'fcm_dpo/delta': 0.00930863805115223, 'fcm_dpo/margin': 86.09469604492188, 'margin_dpo/margin_mean': 86.09468841552734, 'margin_dpo/margin_std': 99.02046203613281, 'logps/chosen': -115.75386047363281, 'logps/rejected': -224.69296264648438, 'logps/ref_chosen': -37.84037399291992, 'logps/ref_rejected': -60.684783935546875, 'KL/chosen_KL_mean': -77.91348266601562, 'KL/rejected_KL_mean': -164.00819396972656, 'KL/mean': -120.96083068847656, 'KL/std': 90.18110656738281, 'logits/chosen': -0.2117328941822052, 'logits/rejected': -0.19627614319324493, 'epoch': 0.28} + 28%|██▊ | 194/681 [08:13<19:41, 2.43s/it] 29%|██▊ | 195/681 [08:16<20:08, 2.49s/it] {'loss': 1.0732, 'grad_norm': 23.332624435424805, 'learning_rate': 4.5027505416968985e-07, 'fcm_dpo/beta': 0.004536244552582502, 'fcm_dpo/q_t': 0.40300172567367554, 'fcm_dpo/delta': -0.02507840283215046, 'fcm_dpo/margin': 93.46810913085938, 'margin_dpo/margin_mean': 93.46810913085938, 'margin_dpo/margin_std': 118.67242431640625, 'logps/chosen': -178.29763793945312, 'logps/rejected': -313.6451416015625, 'logps/ref_chosen': -54.891571044921875, 'logps/ref_rejected': -96.77095794677734, 'KL/chosen_KL_mean': -123.40607452392578, 'KL/rejected_KL_mean': -216.87420654296875, 'KL/mean': -170.14013671875, 'KL/std': 114.27099609375, 'logits/chosen': -0.2611733078956604, 'logits/rejected': -0.2804575562477112, 'epoch': 0.29} + 29%|██▊ | 195/681 [08:16<20:08, 2.49s/it] 29%|██▉ | 196/681 [08:18<20:14, 2.50s/it] {'loss': 1.0618, 'grad_norm': 17.2235107421875, 'learning_rate': 4.495043068200599e-07, 'fcm_dpo/beta': 0.004456968978047371, 'fcm_dpo/q_t': 0.3952023983001709, 'fcm_dpo/delta': -0.07591746747493744, 'fcm_dpo/margin': 105.74667358398438, 'margin_dpo/margin_mean': 105.74667358398438, 'margin_dpo/margin_std': 138.9565887451172, 'logps/chosen': -147.8690185546875, 'logps/rejected': -276.42340087890625, 'logps/ref_chosen': -53.245243072509766, 'logps/ref_rejected': -76.05294799804688, 'KL/chosen_KL_mean': -94.62376403808594, 'KL/rejected_KL_mean': -200.3704376220703, 'KL/mean': -147.49710083007812, 'KL/std': 114.38645935058594, 'logits/chosen': -0.2857983708381653, 'logits/rejected': -0.2708747684955597, 'epoch': 0.29} + 29%|██▉ | 196/681 [08:18<20:14, 2.50s/it] 29%|██▉ | 197/681 [08:21<20:25, 2.53s/it] {'loss': 1.1147, 'grad_norm': 18.91534996032715, 'learning_rate': 4.4872830234640493e-07, 'fcm_dpo/beta': 0.0045026084408164024, 'fcm_dpo/q_t': 0.4166555404663086, 'fcm_dpo/delta': 0.03682290017604828, 'fcm_dpo/margin': 80.89613342285156, 'margin_dpo/margin_mean': 80.89613342285156, 'margin_dpo/margin_std': 111.98675537109375, 'logps/chosen': -158.08641052246094, 'logps/rejected': -255.7711181640625, 'logps/ref_chosen': -60.42033386230469, 'logps/ref_rejected': -77.20890808105469, 'KL/chosen_KL_mean': -97.66607666015625, 'KL/rejected_KL_mean': -178.56219482421875, 'KL/mean': -138.1141357421875, 'KL/std': 99.23837280273438, 'logits/chosen': -0.28415030241012573, 'logits/rejected': -0.2793928384780884, 'epoch': 0.29} + 29%|██▉ | 197/681 [08:21<20:25, 2.53s/it] 29%|██▉ | 198/681 [08:23<20:29, 2.55s/it] {'loss': 1.0625, 'grad_norm': 22.420948028564453, 'learning_rate': 4.479470611971645e-07, 'fcm_dpo/beta': 0.004457796923816204, 'fcm_dpo/q_t': 0.3982738256454468, 'fcm_dpo/delta': -0.06293203681707382, 'fcm_dpo/margin': 103.20319366455078, 'margin_dpo/margin_mean': 103.20319366455078, 'margin_dpo/margin_std': 139.06460571289062, 'logps/chosen': -165.71139526367188, 'logps/rejected': -311.12164306640625, 'logps/ref_chosen': -55.03618621826172, 'logps/ref_rejected': -97.24325561523438, 'KL/chosen_KL_mean': -110.67520141601562, 'KL/rejected_KL_mean': -213.87840270996094, 'KL/mean': -162.27679443359375, 'KL/std': 122.85459899902344, 'logits/chosen': -0.31583186984062195, 'logits/rejected': -0.31620723009109497, 'epoch': 0.29} + 29%|██▉ | 198/681 [08:23<20:29, 2.55s/it] 29%|██▉ | 199/681 [08:26<20:49, 2.59s/it] {'loss': 1.0706, 'grad_norm': 23.365877151489258, 'learning_rate': 4.471606039587695e-07, 'fcm_dpo/beta': 0.004380636848509312, 'fcm_dpo/q_t': 0.39817678928375244, 'fcm_dpo/delta': -0.054408542811870575, 'fcm_dpo/margin': 102.95765686035156, 'margin_dpo/margin_mean': 102.95765686035156, 'margin_dpo/margin_std': 137.37387084960938, 'logps/chosen': -161.298828125, 'logps/rejected': -292.07586669921875, 'logps/ref_chosen': -56.828826904296875, 'logps/ref_rejected': -84.64820861816406, 'KL/chosen_KL_mean': -104.47000885009766, 'KL/rejected_KL_mean': -207.42767333984375, 'KL/mean': -155.94883728027344, 'KL/std': 111.98977661132812, 'logits/chosen': -0.3535653352737427, 'logits/rejected': -0.3379266858100891, 'epoch': 0.29} + 29%|██▉ | 199/681 [08:26<20:49, 2.59s/it] 29%|██▉ | 200/681 [08:29<20:48, 2.60s/it] {'loss': 1.0923, 'grad_norm': 20.43732452392578, 'learning_rate': 4.4636895135509966e-07, 'fcm_dpo/beta': 0.004349041730165482, 'fcm_dpo/q_t': 0.4016646146774292, 'fcm_dpo/delta': -0.046485088765621185, 'fcm_dpo/margin': 102.1077880859375, 'margin_dpo/margin_mean': 102.1077880859375, 'margin_dpo/margin_std': 154.43467712402344, 'logps/chosen': -156.04405212402344, 'logps/rejected': -285.6932067871094, 'logps/ref_chosen': -53.06706237792969, 'logps/ref_rejected': -80.60843658447266, 'KL/chosen_KL_mean': -102.97698974609375, 'KL/rejected_KL_mean': -205.0847625732422, 'KL/mean': -154.0308837890625, 'KL/std': 120.30370330810547, 'logits/chosen': -0.28155362606048584, 'logits/rejected': -0.2662222385406494, 'epoch': 0.29} + 29%|██▉ | 200/681 [08:29<20:48, 2.60s/it] 30%|██▉ | 201/681 [08:31<20:56, 2.62s/it] {'loss': 1.0816, 'grad_norm': 20.02793312072754, 'learning_rate': 4.455721242469372e-07, 'fcm_dpo/beta': 0.004335206001996994, 'fcm_dpo/q_t': 0.4008065462112427, 'fcm_dpo/delta': -0.04319122061133385, 'fcm_dpo/margin': 101.78836059570312, 'margin_dpo/margin_mean': 101.78836059570312, 'margin_dpo/margin_std': 143.44985961914062, 'logps/chosen': -182.7252197265625, 'logps/rejected': -323.9195861816406, 'logps/ref_chosen': -75.4022216796875, 'logps/ref_rejected': -114.80821990966797, 'KL/chosen_KL_mean': -107.32299041748047, 'KL/rejected_KL_mean': -209.11135864257812, 'KL/mean': -158.21717834472656, 'KL/std': 126.88838195800781, 'logits/chosen': -0.3679494261741638, 'logits/rejected': -0.36475256085395813, 'epoch': 0.3} + 30%|██▉ | 201/681 [08:31<20:56, 2.62s/it] 30%|██▉ | 202/681 [08:34<20:58, 2.63s/it] {'loss': 1.1858, 'grad_norm': 20.026168823242188, 'learning_rate': 4.4477014363141755e-07, 'fcm_dpo/beta': 0.004375634714961052, 'fcm_dpo/q_t': 0.43042024970054626, 'fcm_dpo/delta': 0.08873856067657471, 'fcm_dpo/margin': 71.75799560546875, 'margin_dpo/margin_mean': 71.75798797607422, 'margin_dpo/margin_std': 141.61204528808594, 'logps/chosen': -161.15150451660156, 'logps/rejected': -269.793212890625, 'logps/ref_chosen': -50.101318359375, 'logps/ref_rejected': -86.98503112792969, 'KL/chosen_KL_mean': -111.05018615722656, 'KL/rejected_KL_mean': -182.8081817626953, 'KL/mean': -146.92918395996094, 'KL/std': 109.39289855957031, 'logits/chosen': -0.2920665740966797, 'logits/rejected': -0.30537718534469604, 'epoch': 0.3} + 30%|██▉ | 202/681 [08:34<20:58, 2.63s/it] 30%|██▉ | 203/681 [08:37<21:08, 2.65s/it] {'loss': 1.1052, 'grad_norm': 20.92191505432129, 'learning_rate': 4.439630306414758e-07, 'fcm_dpo/beta': 0.004399011377245188, 'fcm_dpo/q_t': 0.41227254271507263, 'fcm_dpo/delta': 0.016021015122532845, 'fcm_dpo/margin': 87.4269790649414, 'margin_dpo/margin_mean': 87.4269790649414, 'margin_dpo/margin_std': 122.52427673339844, 'logps/chosen': -169.751953125, 'logps/rejected': -282.4652099609375, 'logps/ref_chosen': -60.60969543457031, 'logps/ref_rejected': -85.89596557617188, 'KL/chosen_KL_mean': -109.14225769042969, 'KL/rejected_KL_mean': -196.5692138671875, 'KL/mean': -152.8557586669922, 'KL/std': 111.0545654296875, 'logits/chosen': -0.3324393033981323, 'logits/rejected': -0.321586549282074, 'epoch': 0.3} + 30%|██▉ | 203/681 [08:37<21:08, 2.65s/it] 30%|██▉ | 204/681 [08:39<21:16, 2.68s/it] {'loss': 1.1504, 'grad_norm': 26.054628372192383, 'learning_rate': 4.431508065452897e-07, 'fcm_dpo/beta': 0.004431103356182575, 'fcm_dpo/q_t': 0.4206123650074005, 'fcm_dpo/delta': 0.04046226292848587, 'fcm_dpo/margin': 81.47259521484375, 'margin_dpo/margin_mean': 81.47258758544922, 'margin_dpo/margin_std': 142.71099853515625, 'logps/chosen': -202.4814453125, 'logps/rejected': -291.4849853515625, 'logps/ref_chosen': -80.16496276855469, 'logps/ref_rejected': -87.69590759277344, 'KL/chosen_KL_mean': -122.31649780273438, 'KL/rejected_KL_mean': -203.78907775878906, 'KL/mean': -163.0527801513672, 'KL/std': 121.6878662109375, 'logits/chosen': -0.4261574149131775, 'logits/rejected': -0.38839346170425415, 'epoch': 0.3} + 30%|██▉ | 204/681 [08:39<21:16, 2.68s/it] 30%|███ | 205/681 [08:42<21:05, 2.66s/it] {'loss': 1.0606, 'grad_norm': 19.602901458740234, 'learning_rate': 4.4233349274571974e-07, 'fcm_dpo/beta': 0.004357962869107723, 'fcm_dpo/q_t': 0.3927791714668274, 'fcm_dpo/delta': -0.07328492403030396, 'fcm_dpo/margin': 107.37113189697266, 'margin_dpo/margin_mean': 107.37113952636719, 'margin_dpo/margin_std': 136.42298889160156, 'logps/chosen': -176.95726013183594, 'logps/rejected': -310.0687255859375, 'logps/ref_chosen': -59.384735107421875, 'logps/ref_rejected': -85.12505340576172, 'KL/chosen_KL_mean': -117.57252502441406, 'KL/rejected_KL_mean': -224.94366455078125, 'KL/mean': -171.2581024169922, 'KL/std': 124.95415496826172, 'logits/chosen': -0.34010183811187744, 'logits/rejected': -0.3110367953777313, 'epoch': 0.3} + 30%|███ | 205/681 [08:42<21:05, 2.66s/it] 30%|███ | 206/681 [08:44<20:11, 2.55s/it] {'loss': 1.0155, 'grad_norm': 25.08537483215332, 'learning_rate': 4.415111107797445e-07, 'fcm_dpo/beta': 0.004292918369174004, 'fcm_dpo/q_t': 0.38319119811058044, 'fcm_dpo/delta': -0.10862280428409576, 'fcm_dpo/margin': 117.07073974609375, 'margin_dpo/margin_mean': 117.07073974609375, 'margin_dpo/margin_std': 127.296875, 'logps/chosen': -155.09474182128906, 'logps/rejected': -324.15447998046875, 'logps/ref_chosen': -46.964500427246094, 'logps/ref_rejected': -98.9534912109375, 'KL/chosen_KL_mean': -108.13024139404297, 'KL/rejected_KL_mean': -225.20098876953125, 'KL/mean': -166.66561889648438, 'KL/std': 116.73199462890625, 'logits/chosen': -0.25357377529144287, 'logits/rejected': -0.25648266077041626, 'epoch': 0.3} + 30%|███ | 206/681 [08:44<20:11, 2.55s/it] 30%|███ | 207/681 [08:47<19:58, 2.53s/it] {'loss': 0.9899, 'grad_norm': 22.83678436279297, 'learning_rate': 4.4068368231789365e-07, 'fcm_dpo/beta': 0.004175534471869469, 'fcm_dpo/q_t': 0.37481075525283813, 'fcm_dpo/delta': -0.1783892959356308, 'fcm_dpo/margin': 136.12110900878906, 'margin_dpo/margin_mean': 136.12110900878906, 'margin_dpo/margin_std': 157.92230224609375, 'logps/chosen': -155.42156982421875, 'logps/rejected': -319.9342346191406, 'logps/ref_chosen': -56.05625915527344, 'logps/ref_rejected': -84.44779968261719, 'KL/chosen_KL_mean': -99.36531829833984, 'KL/rejected_KL_mean': -235.48643493652344, 'KL/mean': -167.42587280273438, 'KL/std': 135.0901641845703, 'logits/chosen': -0.3456140458583832, 'logits/rejected': -0.3190155029296875, 'epoch': 0.3} + 30%|███ | 207/681 [08:47<19:58, 2.53s/it] 31%|███ | 208/681 [08:49<20:11, 2.56s/it] {'loss': 1.0928, 'grad_norm': 25.079753875732422, 'learning_rate': 4.398512291636768e-07, 'fcm_dpo/beta': 0.004102812148630619, 'fcm_dpo/q_t': 0.40152066946029663, 'fcm_dpo/delta': -0.03629569336771965, 'fcm_dpo/margin': 105.91764068603516, 'margin_dpo/margin_mean': 105.91764831542969, 'margin_dpo/margin_std': 156.15579223632812, 'logps/chosen': -219.79457092285156, 'logps/rejected': -352.9315185546875, 'logps/ref_chosen': -67.06761169433594, 'logps/ref_rejected': -94.28689575195312, 'KL/chosen_KL_mean': -152.72695922851562, 'KL/rejected_KL_mean': -258.64459228515625, 'KL/mean': -205.685791015625, 'KL/std': 128.31884765625, 'logits/chosen': -0.3910176157951355, 'logits/rejected': -0.3746778964996338, 'epoch': 0.31} + 31%|███ | 208/681 [08:50<20:11, 2.56s/it] 31%|███ | 209/681 [08:52<19:38, 2.50s/it] {'loss': 1.1281, 'grad_norm': 30.509929656982422, 'learning_rate': 4.3901377325300857e-07, 'fcm_dpo/beta': 0.004114994779229164, 'fcm_dpo/q_t': 0.4131506383419037, 'fcm_dpo/delta': 0.015758566558361053, 'fcm_dpo/margin': 93.5212173461914, 'margin_dpo/margin_mean': 93.52120971679688, 'margin_dpo/margin_std': 149.02239990234375, 'logps/chosen': -186.01309204101562, 'logps/rejected': -304.29412841796875, 'logps/ref_chosen': -56.18169403076172, 'logps/ref_rejected': -80.94152069091797, 'KL/chosen_KL_mean': -129.83139038085938, 'KL/rejected_KL_mean': -223.35260009765625, 'KL/mean': -176.5919952392578, 'KL/std': 117.19390869140625, 'logits/chosen': -0.26792603731155396, 'logits/rejected': -0.2554609179496765, 'epoch': 0.31} + 31%|███ | 209/681 [08:52<19:38, 2.50s/it] 31%|███ | 210/681 [08:54<19:28, 2.48s/it] {'loss': 1.0767, 'grad_norm': 24.466625213623047, 'learning_rate': 4.381713366536311e-07, 'fcm_dpo/beta': 0.004107258282601833, 'fcm_dpo/q_t': 0.40103164315223694, 'fcm_dpo/delta': -0.04311756044626236, 'fcm_dpo/margin': 107.38471984863281, 'margin_dpo/margin_mean': 107.38471221923828, 'margin_dpo/margin_std': 146.1619873046875, 'logps/chosen': -162.81959533691406, 'logps/rejected': -300.51409912109375, 'logps/ref_chosen': -46.371822357177734, 'logps/ref_rejected': -76.68162536621094, 'KL/chosen_KL_mean': -116.4477767944336, 'KL/rejected_KL_mean': -223.83248901367188, 'KL/mean': -170.14013671875, 'KL/std': 119.5206069946289, 'logits/chosen': -0.30964159965515137, 'logits/rejected': -0.3021623492240906, 'epoch': 0.31} + 31%|███ | 210/681 [08:54<19:28, 2.48s/it] 31%|███ | 211/681 [08:57<19:02, 2.43s/it] {'loss': 1.1425, 'grad_norm': 33.73249816894531, 'learning_rate': 4.373239415645323e-07, 'fcm_dpo/beta': 0.0040979161858558655, 'fcm_dpo/q_t': 0.4184762239456177, 'fcm_dpo/delta': 0.02301332727074623, 'fcm_dpo/margin': 92.20182800292969, 'margin_dpo/margin_mean': 92.20182800292969, 'margin_dpo/margin_std': 158.70635986328125, 'logps/chosen': -246.72286987304688, 'logps/rejected': -346.8133544921875, 'logps/ref_chosen': -78.93235778808594, 'logps/ref_rejected': -86.82098388671875, 'KL/chosen_KL_mean': -167.79052734375, 'KL/rejected_KL_mean': -259.99237060546875, 'KL/mean': -213.89144897460938, 'KL/std': 136.38101196289062, 'logits/chosen': -0.3087081015110016, 'logits/rejected': -0.26862210035324097, 'epoch': 0.31} + 31%|███ | 211/681 [08:57<19:02, 2.43s/it] 31%|███ | 212/681 [08:59<19:14, 2.46s/it] {'loss': 1.0304, 'grad_norm': 24.812414169311523, 'learning_rate': 4.3647161031536086e-07, 'fcm_dpo/beta': 0.003989426419138908, 'fcm_dpo/q_t': 0.382382869720459, 'fcm_dpo/delta': -0.13511215150356293, 'fcm_dpo/margin': 131.85598754882812, 'margin_dpo/margin_mean': 131.85598754882812, 'margin_dpo/margin_std': 161.92364501953125, 'logps/chosen': -196.25643920898438, 'logps/rejected': -372.9732666015625, 'logps/ref_chosen': -58.19701385498047, 'logps/ref_rejected': -103.05785369873047, 'KL/chosen_KL_mean': -138.05943298339844, 'KL/rejected_KL_mean': -269.9154052734375, 'KL/mean': -203.9874267578125, 'KL/std': 145.75244140625, 'logits/chosen': -0.26592007279396057, 'logits/rejected': -0.25487691164016724, 'epoch': 0.31} + 31%|███ | 212/681 [08:59<19:14, 2.46s/it] 31%|███▏ | 213/681 [09:02<19:34, 2.51s/it] {'loss': 1.0348, 'grad_norm': 32.999141693115234, 'learning_rate': 4.3561436536583774e-07, 'fcm_dpo/beta': 0.003918571397662163, 'fcm_dpo/q_t': 0.3877994418144226, 'fcm_dpo/delta': -0.09918186068534851, 'fcm_dpo/margin': 125.9916000366211, 'margin_dpo/margin_mean': 125.9916000366211, 'margin_dpo/margin_std': 152.36477661132812, 'logps/chosen': -196.92742919921875, 'logps/rejected': -349.321044921875, 'logps/ref_chosen': -67.51271057128906, 'logps/ref_rejected': -93.91471862792969, 'KL/chosen_KL_mean': -129.4147186279297, 'KL/rejected_KL_mean': -255.40631103515625, 'KL/mean': -192.4105224609375, 'KL/std': 128.76019287109375, 'logits/chosen': -0.3247559368610382, 'logits/rejected': -0.30033838748931885, 'epoch': 0.31} + 31%|███▏ | 213/681 [09:02<19:34, 2.51s/it] 31%|███▏ | 214/681 [09:04<19:06, 2.46s/it] {'loss': 1.0691, 'grad_norm': 22.442670822143555, 'learning_rate': 4.3475222930516473e-07, 'fcm_dpo/beta': 0.0038879900239408016, 'fcm_dpo/q_t': 0.40068867802619934, 'fcm_dpo/delta': -0.04128566384315491, 'fcm_dpo/margin': 113.03146362304688, 'margin_dpo/margin_mean': 113.03147888183594, 'margin_dpo/margin_std': 146.84898376464844, 'logps/chosen': -153.3330535888672, 'logps/rejected': -302.27703857421875, 'logps/ref_chosen': -41.604888916015625, 'logps/ref_rejected': -77.51741027832031, 'KL/chosen_KL_mean': -111.72816467285156, 'KL/rejected_KL_mean': -224.75961303710938, 'KL/mean': -168.243896484375, 'KL/std': 126.36701965332031, 'logits/chosen': -0.2424134612083435, 'logits/rejected': -0.24728670716285706, 'epoch': 0.31} + 31%|███▏ | 214/681 [09:04<19:06, 2.46s/it] 32%|███▏ | 215/681 [09:07<19:29, 2.51s/it] {'loss': 1.044, 'grad_norm': 26.03775405883789, 'learning_rate': 4.3388522485142885e-07, 'fcm_dpo/beta': 0.0038366110529750586, 'fcm_dpo/q_t': 0.3948417901992798, 'fcm_dpo/delta': -0.05947209149599075, 'fcm_dpo/margin': 118.9841079711914, 'margin_dpo/margin_mean': 118.9841079711914, 'margin_dpo/margin_std': 135.40081787109375, 'logps/chosen': -183.4717254638672, 'logps/rejected': -339.1412353515625, 'logps/ref_chosen': -53.279266357421875, 'logps/ref_rejected': -89.96464538574219, 'KL/chosen_KL_mean': -130.1924591064453, 'KL/rejected_KL_mean': -249.17657470703125, 'KL/mean': -189.68450927734375, 'KL/std': 129.92074584960938, 'logits/chosen': -0.27569520473480225, 'logits/rejected': -0.2664262354373932, 'epoch': 0.32} + 32%|███▏ | 215/681 [09:07<19:29, 2.51s/it] 32%|███▏ | 216/681 [09:10<20:10, 2.60s/it] {'loss': 1.085, 'grad_norm': 25.67644691467285, 'learning_rate': 4.330133748510036e-07, 'fcm_dpo/beta': 0.0038237408734858036, 'fcm_dpo/q_t': 0.4005330502986908, 'fcm_dpo/delta': -0.046559788286685944, 'fcm_dpo/margin': 116.20650482177734, 'margin_dpo/margin_mean': 116.20650482177734, 'margin_dpo/margin_std': 166.7809295654297, 'logps/chosen': -183.54251098632812, 'logps/rejected': -328.06011962890625, 'logps/ref_chosen': -48.887794494628906, 'logps/ref_rejected': -77.19892883300781, 'KL/chosen_KL_mean': -134.6547088623047, 'KL/rejected_KL_mean': -250.8612060546875, 'KL/mean': -192.75794982910156, 'KL/std': 135.34701538085938, 'logits/chosen': -0.2784517705440521, 'logits/rejected': -0.26232653856277466, 'epoch': 0.32} + 32%|███▏ | 216/681 [09:10<20:10, 2.60s/it] 32%|███▏ | 217/681 [09:12<19:54, 2.57s/it] {'loss': 1.0138, 'grad_norm': 21.137720108032227, 'learning_rate': 4.3213670227794757e-07, 'fcm_dpo/beta': 0.0037270013708621264, 'fcm_dpo/q_t': 0.3843996822834015, 'fcm_dpo/delta': -0.11725132167339325, 'fcm_dpo/margin': 137.11187744140625, 'margin_dpo/margin_mean': 137.11187744140625, 'margin_dpo/margin_std': 154.92494201660156, 'logps/chosen': -183.45480346679688, 'logps/rejected': -370.7996826171875, 'logps/ref_chosen': -49.845306396484375, 'logps/ref_rejected': -100.07832336425781, 'KL/chosen_KL_mean': -133.6094970703125, 'KL/rejected_KL_mean': -270.72137451171875, 'KL/mean': -202.16543579101562, 'KL/std': 137.3214111328125, 'logits/chosen': -0.26740846037864685, 'logits/rejected': -0.2628672122955322, 'epoch': 0.32} + 32%|███▏ | 217/681 [09:12<19:54, 2.57s/it] 32%|███▏ | 218/681 [09:15<19:57, 2.59s/it] {'loss': 1.1123, 'grad_norm': 22.63395881652832, 'learning_rate': 4.3125523023339815e-07, 'fcm_dpo/beta': 0.0037083416245877743, 'fcm_dpo/q_t': 0.4114874601364136, 'fcm_dpo/delta': 0.0059033287689089775, 'fcm_dpo/margin': 106.32524108886719, 'margin_dpo/margin_mean': 106.32524108886719, 'margin_dpo/margin_std': 159.38711547851562, 'logps/chosen': -200.29495239257812, 'logps/rejected': -335.8899230957031, 'logps/ref_chosen': -58.576683044433594, 'logps/ref_rejected': -87.84639739990234, 'KL/chosen_KL_mean': -141.71827697753906, 'KL/rejected_KL_mean': -248.04351806640625, 'KL/mean': -194.88088989257812, 'KL/std': 136.5504150390625, 'logits/chosen': -0.2704193592071533, 'logits/rejected': -0.26462244987487793, 'epoch': 0.32} + 32%|███▏ | 218/681 [09:15<19:57, 2.59s/it] 32%|███▏ | 219/681 [09:17<19:59, 2.60s/it] {'loss': 1.1678, 'grad_norm': 27.498462677001953, 'learning_rate': 4.303689819449636e-07, 'fcm_dpo/beta': 0.0037643599789589643, 'fcm_dpo/q_t': 0.4227282404899597, 'fcm_dpo/delta': 0.05545644462108612, 'fcm_dpo/margin': 91.84481048583984, 'margin_dpo/margin_mean': 91.84481048583984, 'margin_dpo/margin_std': 171.24391174316406, 'logps/chosen': -211.04232788085938, 'logps/rejected': -327.6336975097656, 'logps/ref_chosen': -61.083858489990234, 'logps/ref_rejected': -85.83042907714844, 'KL/chosen_KL_mean': -149.95846557617188, 'KL/rejected_KL_mean': -241.8032684326172, 'KL/mean': -195.880859375, 'KL/std': 140.34046936035156, 'logits/chosen': -0.34284111857414246, 'logits/rejected': -0.33793115615844727, 'epoch': 0.32} + 32%|███▏ | 219/681 [09:17<19:59, 2.60s/it] 32%|███▏ | 220/681 [09:20<19:59, 2.60s/it] {'loss': 1.1685, 'grad_norm': 24.416948318481445, 'learning_rate': 4.2947798076611047e-07, 'fcm_dpo/beta': 0.00381092494353652, 'fcm_dpo/q_t': 0.4311019778251648, 'fcm_dpo/delta': 0.10894529521465302, 'fcm_dpo/margin': 77.24984741210938, 'margin_dpo/margin_mean': 77.24984741210938, 'margin_dpo/margin_std': 125.33375549316406, 'logps/chosen': -238.18557739257812, 'logps/rejected': -333.08966064453125, 'logps/ref_chosen': -70.03128051757812, 'logps/ref_rejected': -87.68551635742188, 'KL/chosen_KL_mean': -168.154296875, 'KL/rejected_KL_mean': -245.40414428710938, 'KL/mean': -206.7792205810547, 'KL/std': 120.42225646972656, 'logits/chosen': -0.2805694043636322, 'logits/rejected': -0.2573145031929016, 'epoch': 0.32} + 32%|███▏ | 220/681 [09:20<19:59, 2.60s/it] 32%|███▏ | 221/681 [09:22<19:41, 2.57s/it] {'loss': 0.9329, 'grad_norm': 25.376220703125, 'learning_rate': 4.285822501755485e-07, 'fcm_dpo/beta': 0.0037145623937249184, 'fcm_dpo/q_t': 0.3563760221004486, 'fcm_dpo/delta': -0.24419276416301727, 'fcm_dpo/margin': 169.42575073242188, 'margin_dpo/margin_mean': 169.42575073242188, 'margin_dpo/margin_std': 157.756103515625, 'logps/chosen': -196.8850555419922, 'logps/rejected': -420.623779296875, 'logps/ref_chosen': -52.15470886230469, 'logps/ref_rejected': -106.46768188476562, 'KL/chosen_KL_mean': -144.7303466796875, 'KL/rejected_KL_mean': -314.1560974121094, 'KL/mean': -229.44322204589844, 'KL/std': 154.45883178710938, 'logits/chosen': -0.28012099862098694, 'logits/rejected': -0.28704455494880676, 'epoch': 0.32} + 32%|███▏ | 221/681 [09:22<19:41, 2.57s/it] 33%|███▎ | 222/681 [09:25<19:40, 2.57s/it] {'loss': 1.0556, 'grad_norm': 21.781951904296875, 'learning_rate': 4.276818137766118e-07, 'fcm_dpo/beta': 0.0036375990603119135, 'fcm_dpo/q_t': 0.39524269104003906, 'fcm_dpo/delta': -0.06309865415096283, 'fcm_dpo/margin': 126.48562622070312, 'margin_dpo/margin_mean': 126.4856185913086, 'margin_dpo/margin_std': 158.17788696289062, 'logps/chosen': -215.4439697265625, 'logps/rejected': -380.95965576171875, 'logps/ref_chosen': -60.971099853515625, 'logps/ref_rejected': -100.00115203857422, 'KL/chosen_KL_mean': -154.47286987304688, 'KL/rejected_KL_mean': -280.95849609375, 'KL/mean': -217.7156982421875, 'KL/std': 141.71084594726562, 'logits/chosen': -0.3290114402770996, 'logits/rejected': -0.3324123024940491, 'epoch': 0.33} + 33%|███▎ | 222/681 [09:25<19:40, 2.57s/it] 33%|███▎ | 223/681 [09:27<18:44, 2.46s/it] {'loss': 1.1264, 'grad_norm': 25.37874412536621, 'learning_rate': 4.2677669529663686e-07, 'fcm_dpo/beta': 0.0036115439143031836, 'fcm_dpo/q_t': 0.4122130274772644, 'fcm_dpo/delta': 0.009921977296471596, 'fcm_dpo/margin': 108.11112213134766, 'margin_dpo/margin_mean': 108.11112213134766, 'margin_dpo/margin_std': 176.22784423828125, 'logps/chosen': -215.06320190429688, 'logps/rejected': -353.3587646484375, 'logps/ref_chosen': -52.64057540893555, 'logps/ref_rejected': -82.82502746582031, 'KL/chosen_KL_mean': -162.42262268066406, 'KL/rejected_KL_mean': -270.53375244140625, 'KL/mean': -216.47817993164062, 'KL/std': 139.66900634765625, 'logits/chosen': -0.24600395560264587, 'logits/rejected': -0.2434278130531311, 'epoch': 0.33} + 33%|███▎ | 223/681 [09:27<18:44, 2.46s/it] 33%|███▎ | 224/681 [09:29<17:56, 2.36s/it] {'loss': 1.0901, 'grad_norm': 23.84183120727539, 'learning_rate': 4.2586691858633747e-07, 'fcm_dpo/beta': 0.003568105399608612, 'fcm_dpo/q_t': 0.4034996032714844, 'fcm_dpo/delta': -0.04215101897716522, 'fcm_dpo/margin': 123.00054168701172, 'margin_dpo/margin_mean': 123.00054168701172, 'margin_dpo/margin_std': 179.27581787109375, 'logps/chosen': -189.64259338378906, 'logps/rejected': -341.1642150878906, 'logps/ref_chosen': -48.59541320800781, 'logps/ref_rejected': -77.11648559570312, 'KL/chosen_KL_mean': -141.04718017578125, 'KL/rejected_KL_mean': -264.0477294921875, 'KL/mean': -202.54745483398438, 'KL/std': 154.926513671875, 'logits/chosen': -0.3231106102466583, 'logits/rejected': -0.3093082904815674, 'epoch': 0.33} + 33%|███▎ | 224/681 [09:29<17:56, 2.36s/it] 33%|███▎ | 225/681 [09:32<17:42, 2.33s/it] {'loss': 1.0322, 'grad_norm': 22.832763671875, 'learning_rate': 4.249525076191759e-07, 'fcm_dpo/beta': 0.003522678278386593, 'fcm_dpo/q_t': 0.3856618404388428, 'fcm_dpo/delta': -0.11579211056232452, 'fcm_dpo/margin': 144.70822143554688, 'margin_dpo/margin_mean': 144.70822143554688, 'margin_dpo/margin_std': 182.02337646484375, 'logps/chosen': -221.38186645507812, 'logps/rejected': -407.9925231933594, 'logps/ref_chosen': -58.000465393066406, 'logps/ref_rejected': -99.90291595458984, 'KL/chosen_KL_mean': -163.38140869140625, 'KL/rejected_KL_mean': -308.089599609375, 'KL/mean': -235.73553466796875, 'KL/std': 149.63555908203125, 'logits/chosen': -0.30160531401634216, 'logits/rejected': -0.29380398988723755, 'epoch': 0.33} + 33%|███▎ | 225/681 [09:32<17:42, 2.33s/it] 33%|███▎ | 226/681 [09:34<18:31, 2.44s/it] {'loss': 1.1053, 'grad_norm': 30.9562931060791, 'learning_rate': 4.2403348649073167e-07, 'fcm_dpo/beta': 0.0034828565549105406, 'fcm_dpo/q_t': 0.4106452763080597, 'fcm_dpo/delta': -0.001416236162185669, 'fcm_dpo/margin': 115.01838684082031, 'margin_dpo/margin_mean': 115.01838684082031, 'margin_dpo/margin_std': 166.55606079101562, 'logps/chosen': -193.8486328125, 'logps/rejected': -328.65594482421875, 'logps/ref_chosen': -58.898799896240234, 'logps/ref_rejected': -78.68775939941406, 'KL/chosen_KL_mean': -134.9498291015625, 'KL/rejected_KL_mean': -249.96820068359375, 'KL/mean': -192.45901489257812, 'KL/std': 144.02420043945312, 'logits/chosen': -0.36006784439086914, 'logits/rejected': -0.32199037075042725, 'epoch': 0.33} + 33%|███▎ | 226/681 [09:34<18:31, 2.44s/it] 33%|███▎ | 227/681 [09:37<18:13, 2.41s/it] {'loss': 1.0293, 'grad_norm': 22.616573333740234, 'learning_rate': 4.2310987941806615e-07, 'fcm_dpo/beta': 0.0034378478303551674, 'fcm_dpo/q_t': 0.38717547059059143, 'fcm_dpo/delta': -0.10290348529815674, 'fcm_dpo/margin': 144.60397338867188, 'margin_dpo/margin_mean': 144.60397338867188, 'margin_dpo/margin_std': 172.94851684570312, 'logps/chosen': -219.77462768554688, 'logps/rejected': -404.71881103515625, 'logps/ref_chosen': -59.072181701660156, 'logps/ref_rejected': -99.41236877441406, 'KL/chosen_KL_mean': -160.70245361328125, 'KL/rejected_KL_mean': -305.3064270019531, 'KL/mean': -233.0044403076172, 'KL/std': 166.17343139648438, 'logits/chosen': -0.36182135343551636, 'logits/rejected': -0.3515620529651642, 'epoch': 0.33} + 33%|███▎ | 227/681 [09:37<18:13, 2.41s/it] 33%|███▎ | 228/681 [09:39<18:49, 2.49s/it] {'loss': 1.1398, 'grad_norm': 26.848163604736328, 'learning_rate': 4.2218171073908463e-07, 'fcm_dpo/beta': 0.0034628671128302813, 'fcm_dpo/q_t': 0.418906033039093, 'fcm_dpo/delta': 0.0474376454949379, 'fcm_dpo/margin': 102.26102447509766, 'margin_dpo/margin_mean': 102.26102447509766, 'margin_dpo/margin_std': 164.67837524414062, 'logps/chosen': -228.861083984375, 'logps/rejected': -356.2795715332031, 'logps/ref_chosen': -65.89128875732422, 'logps/ref_rejected': -91.04875183105469, 'KL/chosen_KL_mean': -162.9698028564453, 'KL/rejected_KL_mean': -265.2308349609375, 'KL/mean': -214.10031127929688, 'KL/std': 134.146728515625, 'logits/chosen': -0.31502634286880493, 'logits/rejected': -0.2975466251373291, 'epoch': 0.33} + 33%|███▎ | 228/681 [09:39<18:49, 2.49s/it] 34%|███▎ | 229/681 [09:42<18:44, 2.49s/it] {'loss': 1.1155, 'grad_norm': 31.090105056762695, 'learning_rate': 4.212490049118951e-07, 'fcm_dpo/beta': 0.003477250225841999, 'fcm_dpo/q_t': 0.41217368841171265, 'fcm_dpo/delta': 0.018432918936014175, 'fcm_dpo/margin': 109.92990112304688, 'margin_dpo/margin_mean': 109.92990112304688, 'margin_dpo/margin_std': 164.13958740234375, 'logps/chosen': -238.4125213623047, 'logps/rejected': -362.1634521484375, 'logps/ref_chosen': -70.70637512207031, 'logps/ref_rejected': -84.52741241455078, 'KL/chosen_KL_mean': -167.70614624023438, 'KL/rejected_KL_mean': -277.63604736328125, 'KL/mean': -222.6710968017578, 'KL/std': 156.47640991210938, 'logits/chosen': -0.40321260690689087, 'logits/rejected': -0.37525972723960876, 'epoch': 0.34} + 34%|███▎ | 229/681 [09:42<18:44, 2.49s/it] 34%|███▍ | 230/681 [09:44<18:26, 2.45s/it] {'loss': 0.9708, 'grad_norm': 24.862947463989258, 'learning_rate': 4.203117865141635e-07, 'fcm_dpo/beta': 0.0033868225291371346, 'fcm_dpo/q_t': 0.37030667066574097, 'fcm_dpo/delta': -0.17020674049854279, 'fcm_dpo/margin': 165.46487426757812, 'margin_dpo/margin_mean': 165.46487426757812, 'margin_dpo/margin_std': 161.74786376953125, 'logps/chosen': -165.30010986328125, 'logps/rejected': -377.1048889160156, 'logps/ref_chosen': -39.282005310058594, 'logps/ref_rejected': -85.62191009521484, 'KL/chosen_KL_mean': -126.01810455322266, 'KL/rejected_KL_mean': -291.48297119140625, 'KL/mean': -208.7505340576172, 'KL/std': 146.91603088378906, 'logits/chosen': -0.31466907262802124, 'logits/rejected': -0.31940126419067383, 'epoch': 0.34} + 34%|███▍ | 230/681 [09:44<18:26, 2.45s/it] 34%|███▍ | 231/681 [09:47<18:46, 2.50s/it] {'loss': 1.0961, 'grad_norm': 23.897953033447266, 'learning_rate': 4.1937008024246625e-07, 'fcm_dpo/beta': 0.003367940429598093, 'fcm_dpo/q_t': 0.4120003581047058, 'fcm_dpo/delta': 0.01239101029932499, 'fcm_dpo/margin': 115.23086547851562, 'margin_dpo/margin_mean': 115.23086547851562, 'margin_dpo/margin_std': 152.61228942871094, 'logps/chosen': -211.55612182617188, 'logps/rejected': -337.63446044921875, 'logps/ref_chosen': -63.27644348144531, 'logps/ref_rejected': -74.1239013671875, 'KL/chosen_KL_mean': -148.27967834472656, 'KL/rejected_KL_mean': -263.51055908203125, 'KL/mean': -205.89512634277344, 'KL/std': 131.91383361816406, 'logits/chosen': -0.3472822308540344, 'logits/rejected': -0.31850665807724, 'epoch': 0.34} + 34%|███▍ | 231/681 [09:47<18:46, 2.50s/it] 34%|███▍ | 232/681 [09:49<19:11, 2.57s/it] {'loss': 1.1552, 'grad_norm': 25.275489807128906, 'learning_rate': 4.1842391091163933e-07, 'fcm_dpo/beta': 0.0034012598916888237, 'fcm_dpo/q_t': 0.4272800087928772, 'fcm_dpo/delta': 0.07421056926250458, 'fcm_dpo/margin': 96.52035522460938, 'margin_dpo/margin_mean': 96.52035522460938, 'margin_dpo/margin_std': 163.61196899414062, 'logps/chosen': -253.182373046875, 'logps/rejected': -362.9310302734375, 'logps/ref_chosen': -70.74876403808594, 'logps/ref_rejected': -83.97706604003906, 'KL/chosen_KL_mean': -182.43360900878906, 'KL/rejected_KL_mean': -278.9539794921875, 'KL/mean': -230.69378662109375, 'KL/std': 157.08425903320312, 'logits/chosen': -0.36489853262901306, 'logits/rejected': -0.3447696268558502, 'epoch': 0.34} + 34%|███▍ | 232/681 [09:49<19:11, 2.57s/it] 34%|███▍ | 233/681 [09:52<19:30, 2.61s/it] {'loss': 1.0631, 'grad_norm': 26.219318389892578, 'learning_rate': 4.174733034541245e-07, 'fcm_dpo/beta': 0.003372794948518276, 'fcm_dpo/q_t': 0.3924492597579956, 'fcm_dpo/delta': -0.10033433884382248, 'fcm_dpo/margin': 146.88909912109375, 'margin_dpo/margin_mean': 146.88909912109375, 'margin_dpo/margin_std': 210.21575927734375, 'logps/chosen': -221.85171508789062, 'logps/rejected': -421.33795166015625, 'logps/ref_chosen': -54.8829345703125, 'logps/ref_rejected': -107.4800796508789, 'KL/chosen_KL_mean': -166.96878051757812, 'KL/rejected_KL_mean': -313.8578796386719, 'KL/mean': -240.413330078125, 'KL/std': 165.35609436035156, 'logits/chosen': -0.31235527992248535, 'logits/rejected': -0.3153640925884247, 'epoch': 0.34} + 34%|███▍ | 233/681 [09:52<19:30, 2.61s/it] 34%|███▍ | 234/681 [09:55<19:33, 2.63s/it] {'loss': 1.0245, 'grad_norm': 41.00167465209961, 'learning_rate': 4.165182829193126e-07, 'fcm_dpo/beta': 0.0032793269492685795, 'fcm_dpo/q_t': 0.38733774423599243, 'fcm_dpo/delta': -0.1023728996515274, 'fcm_dpo/margin': 151.15582275390625, 'margin_dpo/margin_mean': 151.15582275390625, 'margin_dpo/margin_std': 169.50576782226562, 'logps/chosen': -210.00350952148438, 'logps/rejected': -417.07147216796875, 'logps/ref_chosen': -44.094520568847656, 'logps/ref_rejected': -100.00663757324219, 'KL/chosen_KL_mean': -165.90899658203125, 'KL/rejected_KL_mean': -317.0648193359375, 'KL/mean': -241.48690795898438, 'KL/std': 148.5336151123047, 'logits/chosen': -0.3150627017021179, 'logits/rejected': -0.34308189153671265, 'epoch': 0.34} + 34%|███▍ | 234/681 [09:55<19:33, 2.63s/it] 35%|███▍ | 235/681 [09:57<19:03, 2.56s/it] {'loss': 1.166, 'grad_norm': 30.126893997192383, 'learning_rate': 4.1555887447288255e-07, 'fcm_dpo/beta': 0.0033345932606607676, 'fcm_dpo/q_t': 0.42566415667533875, 'fcm_dpo/delta': 0.07840821146965027, 'fcm_dpo/margin': 97.06028747558594, 'margin_dpo/margin_mean': 97.06028747558594, 'margin_dpo/margin_std': 170.45330810546875, 'logps/chosen': -259.88311767578125, 'logps/rejected': -385.1005859375, 'logps/ref_chosen': -62.237911224365234, 'logps/ref_rejected': -90.39506530761719, 'KL/chosen_KL_mean': -197.6452178955078, 'KL/rejected_KL_mean': -294.70550537109375, 'KL/mean': -246.17535400390625, 'KL/std': 142.49789428710938, 'logits/chosen': -0.36974847316741943, 'logits/rejected': -0.35245949029922485, 'epoch': 0.35} + 35%|███▍ | 235/681 [09:57<19:03, 2.56s/it] 35%|███▍ | 236/681 [10:00<19:14, 2.59s/it] {'loss': 0.9902, 'grad_norm': 48.95918273925781, 'learning_rate': 4.1459510339613946e-07, 'fcm_dpo/beta': 0.003280568402260542, 'fcm_dpo/q_t': 0.3806627690792084, 'fcm_dpo/delta': -0.11472684144973755, 'fcm_dpo/margin': 155.12411499023438, 'margin_dpo/margin_mean': 155.12413024902344, 'margin_dpo/margin_std': 140.13827514648438, 'logps/chosen': -190.79881286621094, 'logps/rejected': -400.09320068359375, 'logps/ref_chosen': -49.34136199951172, 'logps/ref_rejected': -103.51162719726562, 'KL/chosen_KL_mean': -141.45745849609375, 'KL/rejected_KL_mean': -296.58160400390625, 'KL/mean': -219.01951599121094, 'KL/std': 149.04501342773438, 'logits/chosen': -0.3421390652656555, 'logits/rejected': -0.341775506734848, 'epoch': 0.35} + 35%|███▍ | 236/681 [10:00<19:14, 2.59s/it] 35%|███▍ | 237/681 [10:02<19:14, 2.60s/it] {'loss': 1.1038, 'grad_norm': 27.945327758789062, 'learning_rate': 4.136269950853473e-07, 'fcm_dpo/beta': 0.0032692216336727142, 'fcm_dpo/q_t': 0.41055381298065186, 'fcm_dpo/delta': 0.006964612752199173, 'fcm_dpo/margin': 120.29085540771484, 'margin_dpo/margin_mean': 120.29085540771484, 'margin_dpo/margin_std': 172.14694213867188, 'logps/chosen': -242.03762817382812, 'logps/rejected': -402.94073486328125, 'logps/ref_chosen': -54.168121337890625, 'logps/ref_rejected': -94.78036499023438, 'KL/chosen_KL_mean': -187.8695068359375, 'KL/rejected_KL_mean': -308.1603698730469, 'KL/mean': -248.0149383544922, 'KL/std': 149.92160034179688, 'logits/chosen': -0.3814457356929779, 'logits/rejected': -0.3774615526199341, 'epoch': 0.35} + 35%|███▍ | 237/681 [10:03<19:14, 2.60s/it] 35%|███▍ | 238/681 [10:05<19:26, 2.63s/it] {'loss': 1.1057, 'grad_norm': 25.66066551208496, 'learning_rate': 4.126545750510605e-07, 'fcm_dpo/beta': 0.0032576932571828365, 'fcm_dpo/q_t': 0.41405510902404785, 'fcm_dpo/delta': 0.01427885890007019, 'fcm_dpo/margin': 118.46604919433594, 'margin_dpo/margin_mean': 118.46604919433594, 'margin_dpo/margin_std': 167.74099731445312, 'logps/chosen': -220.21658325195312, 'logps/rejected': -374.1274719238281, 'logps/ref_chosen': -53.973121643066406, 'logps/ref_rejected': -89.41795349121094, 'KL/chosen_KL_mean': -166.24346923828125, 'KL/rejected_KL_mean': -284.70953369140625, 'KL/mean': -225.47650146484375, 'KL/std': 151.5367889404297, 'logits/chosen': -0.3540055751800537, 'logits/rejected': -0.3696235418319702, 'epoch': 0.35} + 35%|███▍ | 238/681 [10:05<19:26, 2.63s/it] 35%|███▌ | 239/681 [10:08<18:50, 2.56s/it] {'loss': 1.0726, 'grad_norm': 41.689571380615234, 'learning_rate': 4.116778689174514e-07, 'fcm_dpo/beta': 0.0032407566905021667, 'fcm_dpo/q_t': 0.40173038840293884, 'fcm_dpo/delta': -0.026493586599826813, 'fcm_dpo/margin': 130.94488525390625, 'margin_dpo/margin_mean': 130.94488525390625, 'margin_dpo/margin_std': 158.7174530029297, 'logps/chosen': -234.58370971679688, 'logps/rejected': -401.02374267578125, 'logps/ref_chosen': -58.09782409667969, 'logps/ref_rejected': -93.59294128417969, 'KL/chosen_KL_mean': -176.48590087890625, 'KL/rejected_KL_mean': -307.4307861328125, 'KL/mean': -241.95834350585938, 'KL/std': 140.23287963867188, 'logits/chosen': -0.36024659872055054, 'logits/rejected': -0.3487205505371094, 'epoch': 0.35} + 35%|███▌ | 239/681 [10:08<18:50, 2.56s/it] 35%|███▌ | 240/681 [10:10<19:04, 2.60s/it] {'loss': 1.1445, 'grad_norm': 34.611045837402344, 'learning_rate': 4.106969024216348e-07, 'fcm_dpo/beta': 0.003267391351982951, 'fcm_dpo/q_t': 0.41868656873703003, 'fcm_dpo/delta': 0.044143058359622955, 'fcm_dpo/margin': 109.3631820678711, 'margin_dpo/margin_mean': 109.36317443847656, 'margin_dpo/margin_std': 180.47584533691406, 'logps/chosen': -244.80670166015625, 'logps/rejected': -367.6739196777344, 'logps/ref_chosen': -60.6144905090332, 'logps/ref_rejected': -74.1185302734375, 'KL/chosen_KL_mean': -184.19223022460938, 'KL/rejected_KL_mean': -293.5553894042969, 'KL/mean': -238.87380981445312, 'KL/std': 150.6685028076172, 'logits/chosen': -0.38304704427719116, 'logits/rejected': -0.362338662147522, 'epoch': 0.35} + 35%|███▌ | 240/681 [10:10<19:04, 2.60s/it] 35%|███▌ | 241/681 [10:13<18:41, 2.55s/it] {'loss': 1.0002, 'grad_norm': 30.021371841430664, 'learning_rate': 4.097117014129903e-07, 'fcm_dpo/beta': 0.0032168994657695293, 'fcm_dpo/q_t': 0.37778547406196594, 'fcm_dpo/delta': -0.15657520294189453, 'fcm_dpo/margin': 170.4112548828125, 'margin_dpo/margin_mean': 170.4112548828125, 'margin_dpo/margin_std': 193.95298767089844, 'logps/chosen': -218.62478637695312, 'logps/rejected': -411.005859375, 'logps/ref_chosen': -66.091064453125, 'logps/ref_rejected': -88.06088256835938, 'KL/chosen_KL_mean': -152.53372192382812, 'KL/rejected_KL_mean': -322.9449462890625, 'KL/mean': -237.73934936523438, 'KL/std': 166.03857421875, 'logits/chosen': -0.4348849952220917, 'logits/rejected': -0.4122951626777649, 'epoch': 0.35} + 35%|███▌ | 241/681 [10:13<18:41, 2.55s/it] 36%|███▌ | 242/681 [10:15<18:26, 2.52s/it] {'loss': 1.1033, 'grad_norm': 35.95293045043945, 'learning_rate': 4.087222918524807e-07, 'fcm_dpo/beta': 0.0032022669911384583, 'fcm_dpo/q_t': 0.4114469289779663, 'fcm_dpo/delta': 0.004475157707929611, 'fcm_dpo/margin': 123.42952728271484, 'margin_dpo/margin_mean': 123.42953491210938, 'margin_dpo/margin_std': 175.8994140625, 'logps/chosen': -246.00924682617188, 'logps/rejected': -384.9352111816406, 'logps/ref_chosen': -67.86392974853516, 'logps/ref_rejected': -83.36033630371094, 'KL/chosen_KL_mean': -178.14532470703125, 'KL/rejected_KL_mean': -301.57489013671875, 'KL/mean': -239.86007690429688, 'KL/std': 146.30352783203125, 'logits/chosen': -0.35760676860809326, 'logits/rejected': -0.33343029022216797, 'epoch': 0.36} + 36%|███▌ | 242/681 [10:15<18:26, 2.52s/it] 36%|███▌ | 243/681 [10:18<18:23, 2.52s/it] {'loss': 1.0419, 'grad_norm': 27.06866455078125, 'learning_rate': 4.07728699811968e-07, 'fcm_dpo/beta': 0.0031442558392882347, 'fcm_dpo/q_t': 0.3913443386554718, 'fcm_dpo/delta': -0.08152244985103607, 'fcm_dpo/margin': 151.84548950195312, 'margin_dpo/margin_mean': 151.84548950195312, 'margin_dpo/margin_std': 185.63485717773438, 'logps/chosen': -244.2119903564453, 'logps/rejected': -409.3088684082031, 'logps/ref_chosen': -63.0842399597168, 'logps/ref_rejected': -76.33563232421875, 'KL/chosen_KL_mean': -181.12774658203125, 'KL/rejected_KL_mean': -332.9732360839844, 'KL/mean': -257.05047607421875, 'KL/std': 163.24075317382812, 'logits/chosen': -0.34606635570526123, 'logits/rejected': -0.3166462182998657, 'epoch': 0.36} + 36%|███▌ | 243/681 [10:18<18:23, 2.52s/it] 36%|███▌ | 244/681 [10:20<18:19, 2.52s/it] {'loss': 1.0159, 'grad_norm': 32.62556076049805, 'learning_rate': 4.067309514735267e-07, 'fcm_dpo/beta': 0.0030848030000925064, 'fcm_dpo/q_t': 0.3867965638637543, 'fcm_dpo/delta': -0.09792040288448334, 'fcm_dpo/margin': 159.75762939453125, 'margin_dpo/margin_mean': 159.75762939453125, 'margin_dpo/margin_std': 168.3123779296875, 'logps/chosen': -222.38796997070312, 'logps/rejected': -415.8968505859375, 'logps/ref_chosen': -61.140689849853516, 'logps/ref_rejected': -94.89193725585938, 'KL/chosen_KL_mean': -161.24728393554688, 'KL/rejected_KL_mean': -321.0049133300781, 'KL/mean': -241.1260986328125, 'KL/std': 159.36138916015625, 'logits/chosen': -0.4207112193107605, 'logits/rejected': -0.4130573272705078, 'epoch': 0.36} + 36%|███▌ | 244/681 [10:20<18:19, 2.52s/it] 36%|███▌ | 245/681 [10:23<18:44, 2.58s/it] {'loss': 1.1166, 'grad_norm': 28.120004653930664, 'learning_rate': 4.057290731287531e-07, 'fcm_dpo/beta': 0.003060833550989628, 'fcm_dpo/q_t': 0.4138393700122833, 'fcm_dpo/delta': 0.022731080651283264, 'fcm_dpo/margin': 123.15798950195312, 'margin_dpo/margin_mean': 123.15798950195312, 'margin_dpo/margin_std': 173.21768188476562, 'logps/chosen': -256.5846862792969, 'logps/rejected': -400.1204833984375, 'logps/ref_chosen': -67.26228332519531, 'logps/ref_rejected': -87.64010620117188, 'KL/chosen_KL_mean': -189.32240295410156, 'KL/rejected_KL_mean': -312.48040771484375, 'KL/mean': -250.90139770507812, 'KL/std': 155.51824951171875, 'logits/chosen': -0.3828911781311035, 'logits/rejected': -0.35266777873039246, 'epoch': 0.36} + 36%|███▌ | 245/681 [10:23<18:44, 2.58s/it] 36%|███▌ | 246/681 [10:25<18:42, 2.58s/it] {'loss': 1.108, 'grad_norm': 25.968130111694336, 'learning_rate': 4.047230911780736e-07, 'fcm_dpo/beta': 0.0030869655311107635, 'fcm_dpo/q_t': 0.4123349189758301, 'fcm_dpo/delta': 0.009035417810082436, 'fcm_dpo/margin': 126.73939514160156, 'margin_dpo/margin_mean': 126.73939514160156, 'margin_dpo/margin_std': 186.197265625, 'logps/chosen': -251.6303253173828, 'logps/rejected': -396.01910400390625, 'logps/ref_chosen': -66.69696807861328, 'logps/ref_rejected': -84.34634399414062, 'KL/chosen_KL_mean': -184.933349609375, 'KL/rejected_KL_mean': -311.67279052734375, 'KL/mean': -248.30307006835938, 'KL/std': 172.1173858642578, 'logits/chosen': -0.43583017587661743, 'logits/rejected': -0.39911651611328125, 'epoch': 0.36} + 36%|███▌ | 246/681 [10:26<18:42, 2.58s/it] 36%|███▋ | 247/681 [10:28<18:29, 2.56s/it] {'loss': 1.0034, 'grad_norm': 34.5230712890625, 'learning_rate': 4.0371303213004814e-07, 'fcm_dpo/beta': 0.0030250344425439835, 'fcm_dpo/q_t': 0.377947062253952, 'fcm_dpo/delta': -0.14571964740753174, 'fcm_dpo/margin': 177.74020385742188, 'margin_dpo/margin_mean': 177.7401885986328, 'margin_dpo/margin_std': 199.57327270507812, 'logps/chosen': -273.94427490234375, 'logps/rejected': -501.37237548828125, 'logps/ref_chosen': -56.6053466796875, 'logps/ref_rejected': -106.29326629638672, 'KL/chosen_KL_mean': -217.3389129638672, 'KL/rejected_KL_mean': -395.0791015625, 'KL/mean': -306.2090148925781, 'KL/std': 180.24539184570312, 'logits/chosen': -0.35546165704727173, 'logits/rejected': -0.3532963991165161, 'epoch': 0.36} + 36%|███▋ | 247/681 [10:28<18:29, 2.56s/it] 36%|███▋ | 248/681 [10:31<18:27, 2.56s/it] {'loss': 1.0265, 'grad_norm': 30.407548904418945, 'learning_rate': 4.0269892260067197e-07, 'fcm_dpo/beta': 0.00296983914449811, 'fcm_dpo/q_t': 0.3929385244846344, 'fcm_dpo/delta': -0.05443059653043747, 'fcm_dpo/margin': 152.0039825439453, 'margin_dpo/margin_mean': 152.00399780273438, 'margin_dpo/margin_std': 138.8626708984375, 'logps/chosen': -231.76812744140625, 'logps/rejected': -431.5857849121094, 'logps/ref_chosen': -44.043216705322266, 'logps/ref_rejected': -91.85687255859375, 'KL/chosen_KL_mean': -187.72491455078125, 'KL/rejected_KL_mean': -339.7288818359375, 'KL/mean': -263.7269287109375, 'KL/std': 144.5110321044922, 'logits/chosen': -0.3517131209373474, 'logits/rejected': -0.37110453844070435, 'epoch': 0.36} + 36%|███▋ | 248/681 [10:31<18:27, 2.56s/it] 37%|███▋ | 249/681 [10:33<17:58, 2.50s/it] {'loss': 1.2284, 'grad_norm': 51.20861053466797, 'learning_rate': 4.0168078931267426e-07, 'fcm_dpo/beta': 0.0030452050268650055, 'fcm_dpo/q_t': 0.4446827173233032, 'fcm_dpo/delta': 0.1617167890071869, 'fcm_dpo/margin': 79.55818176269531, 'margin_dpo/margin_mean': 79.55818176269531, 'margin_dpo/margin_std': 176.84884643554688, 'logps/chosen': -294.95782470703125, 'logps/rejected': -392.5417175292969, 'logps/ref_chosen': -62.442352294921875, 'logps/ref_rejected': -80.46806335449219, 'KL/chosen_KL_mean': -232.51548767089844, 'KL/rejected_KL_mean': -312.07366943359375, 'KL/mean': -272.2945556640625, 'KL/std': 151.4901580810547, 'logits/chosen': -0.3823295533657074, 'logits/rejected': -0.360689252614975, 'epoch': 0.37} + 37%|███▋ | 249/681 [10:33<17:58, 2.50s/it] 37%|███▋ | 250/681 [10:35<17:42, 2.47s/it] {'loss': 1.0317, 'grad_norm': 63.86355972290039, 'learning_rate': 4.006586590948141e-07, 'fcm_dpo/beta': 0.0030563112813979387, 'fcm_dpo/q_t': 0.39273035526275635, 'fcm_dpo/delta': -0.061967238783836365, 'fcm_dpo/margin': 150.18910217285156, 'margin_dpo/margin_mean': 150.18910217285156, 'margin_dpo/margin_std': 152.9610137939453, 'logps/chosen': -273.9414367675781, 'logps/rejected': -432.36566162109375, 'logps/ref_chosen': -65.63668823242188, 'logps/ref_rejected': -73.87184143066406, 'KL/chosen_KL_mean': -208.30474853515625, 'KL/rejected_KL_mean': -358.49383544921875, 'KL/mean': -283.3992919921875, 'KL/std': 153.48965454101562, 'logits/chosen': -0.3651628792285919, 'logits/rejected': -0.3122418522834778, 'epoch': 0.37} + 37%|███▋ | 250/681 [10:35<17:42, 2.47s/it] 37%|███▋ | 251/681 [10:38<17:44, 2.48s/it] {'loss': 1.1706, 'grad_norm': 46.124000549316406, 'learning_rate': 3.9963255888117325e-07, 'fcm_dpo/beta': 0.003067499492317438, 'fcm_dpo/q_t': 0.42934930324554443, 'fcm_dpo/delta': 0.09177864342927933, 'fcm_dpo/margin': 101.44242858886719, 'margin_dpo/margin_mean': 101.44244384765625, 'margin_dpo/margin_std': 174.69888305664062, 'logps/chosen': -275.42706298828125, 'logps/rejected': -397.3501892089844, 'logps/ref_chosen': -57.182716369628906, 'logps/ref_rejected': -77.66343688964844, 'KL/chosen_KL_mean': -218.24432373046875, 'KL/rejected_KL_mean': -319.686767578125, 'KL/mean': -268.9655456542969, 'KL/std': 161.1807861328125, 'logits/chosen': -0.32652994990348816, 'logits/rejected': -0.29296159744262695, 'epoch': 0.37} + 37%|███▋ | 251/681 [10:38<17:44, 2.48s/it] 37%|███▋ | 252/681 [10:40<17:48, 2.49s/it] {'loss': 1.0445, 'grad_norm': 26.554500579833984, 'learning_rate': 3.9860251571044666e-07, 'fcm_dpo/beta': 0.0030634840950369835, 'fcm_dpo/q_t': 0.3959454894065857, 'fcm_dpo/delta': -0.037671059370040894, 'fcm_dpo/margin': 142.23912048339844, 'margin_dpo/margin_mean': 142.23912048339844, 'margin_dpo/margin_std': 143.8651580810547, 'logps/chosen': -281.4176330566406, 'logps/rejected': -436.72906494140625, 'logps/ref_chosen': -71.68563842773438, 'logps/ref_rejected': -84.75799560546875, 'KL/chosen_KL_mean': -209.73199462890625, 'KL/rejected_KL_mean': -351.9710693359375, 'KL/mean': -280.8515625, 'KL/std': 143.38165283203125, 'logits/chosen': -0.3959979712963104, 'logits/rejected': -0.3560243248939514, 'epoch': 0.37} + 37%|███▋ | 252/681 [10:40<17:48, 2.49s/it] 37%|███▋ | 253/681 [10:43<18:17, 2.56s/it] {'loss': 1.0801, 'grad_norm': 22.02428436279297, 'learning_rate': 3.9756855672522986e-07, 'fcm_dpo/beta': 0.003077391069382429, 'fcm_dpo/q_t': 0.40381836891174316, 'fcm_dpo/delta': -0.012822866439819336, 'fcm_dpo/margin': 133.81402587890625, 'margin_dpo/margin_mean': 133.81402587890625, 'margin_dpo/margin_std': 165.3370819091797, 'logps/chosen': -249.51309204101562, 'logps/rejected': -412.8957214355469, 'logps/ref_chosen': -69.1339340209961, 'logps/ref_rejected': -98.70252990722656, 'KL/chosen_KL_mean': -180.37916564941406, 'KL/rejected_KL_mean': -314.19317626953125, 'KL/mean': -247.28616333007812, 'KL/std': 149.814208984375, 'logits/chosen': -0.4026602804660797, 'logits/rejected': -0.3960729241371155, 'epoch': 0.37} + 37%|███▋ | 253/681 [10:43<18:17, 2.56s/it] 37%|███▋ | 254/681 [10:46<18:25, 2.59s/it] {'loss': 1.1402, 'grad_norm': 20.989831924438477, 'learning_rate': 3.965307091713037e-07, 'fcm_dpo/beta': 0.0030737267807126045, 'fcm_dpo/q_t': 0.42006951570510864, 'fcm_dpo/delta': 0.03981554135680199, 'fcm_dpo/margin': 117.6561279296875, 'margin_dpo/margin_mean': 117.6561279296875, 'margin_dpo/margin_std': 197.60504150390625, 'logps/chosen': -227.42269897460938, 'logps/rejected': -381.2314758300781, 'logps/ref_chosen': -54.154998779296875, 'logps/ref_rejected': -90.30764770507812, 'KL/chosen_KL_mean': -173.2677001953125, 'KL/rejected_KL_mean': -290.923828125, 'KL/mean': -232.09576416015625, 'KL/std': 157.92564392089844, 'logits/chosen': -0.37722885608673096, 'logits/rejected': -0.364244282245636, 'epoch': 0.37} + 37%|███▋ | 254/681 [10:46<18:25, 2.59s/it] 37%|███▋ | 255/681 [10:48<17:58, 2.53s/it] {'loss': 1.107, 'grad_norm': 21.194082260131836, 'learning_rate': 3.954890003969163e-07, 'fcm_dpo/beta': 0.0030783750116825104, 'fcm_dpo/q_t': 0.4109645187854767, 'fcm_dpo/delta': 0.015461381524801254, 'fcm_dpo/margin': 125.03014373779297, 'margin_dpo/margin_mean': 125.03013610839844, 'margin_dpo/margin_std': 173.13796997070312, 'logps/chosen': -227.46080017089844, 'logps/rejected': -385.557861328125, 'logps/ref_chosen': -57.14167022705078, 'logps/ref_rejected': -90.2085952758789, 'KL/chosen_KL_mean': -170.31912231445312, 'KL/rejected_KL_mean': -295.3492736816406, 'KL/mean': -232.83419799804688, 'KL/std': 138.8697967529297, 'logits/chosen': -0.3547831177711487, 'logits/rejected': -0.3463220000267029, 'epoch': 0.37} + 37%|███▋ | 255/681 [10:48<17:58, 2.53s/it] 38%|███▊ | 256/681 [10:51<17:57, 2.53s/it] {'loss': 1.0822, 'grad_norm': 39.32383346557617, 'learning_rate': 3.944434578520628e-07, 'fcm_dpo/beta': 0.0030894456431269646, 'fcm_dpo/q_t': 0.40535274147987366, 'fcm_dpo/delta': -0.010427280329167843, 'fcm_dpo/margin': 132.71002197265625, 'margin_dpo/margin_mean': 132.71002197265625, 'margin_dpo/margin_std': 170.27206420898438, 'logps/chosen': -202.24896240234375, 'logps/rejected': -372.3583984375, 'logps/ref_chosen': -55.163490295410156, 'logps/ref_rejected': -92.56291961669922, 'KL/chosen_KL_mean': -147.08546447753906, 'KL/rejected_KL_mean': -279.79547119140625, 'KL/mean': -213.44046020507812, 'KL/std': 147.87371826171875, 'logits/chosen': -0.3375306725502014, 'logits/rejected': -0.3469845950603485, 'epoch': 0.38} + 38%|███▊ | 256/681 [10:51<17:57, 2.53s/it] 38%|███▊ | 257/681 [10:53<18:09, 2.57s/it] {'loss': 1.0705, 'grad_norm': 23.27699089050293, 'learning_rate': 3.933941090877615e-07, 'fcm_dpo/beta': 0.00305275060236454, 'fcm_dpo/q_t': 0.40170031785964966, 'fcm_dpo/delta': -0.03301442041993141, 'fcm_dpo/margin': 141.07920837402344, 'margin_dpo/margin_mean': 141.07920837402344, 'margin_dpo/margin_std': 172.331787109375, 'logps/chosen': -190.0641632080078, 'logps/rejected': -361.257568359375, 'logps/ref_chosen': -49.42369842529297, 'logps/ref_rejected': -79.53791809082031, 'KL/chosen_KL_mean': -140.64047241210938, 'KL/rejected_KL_mean': -281.71966552734375, 'KL/mean': -211.18006896972656, 'KL/std': 152.0925750732422, 'logits/chosen': -0.3658456802368164, 'logits/rejected': -0.3539636731147766, 'epoch': 0.38} + 38%|███▊ | 257/681 [10:53<18:09, 2.57s/it] 38%|███▊ | 258/681 [10:56<17:27, 2.48s/it] {'loss': 1.0923, 'grad_norm': 33.59754943847656, 'learning_rate': 3.923409817553284e-07, 'fcm_dpo/beta': 0.003063221462070942, 'fcm_dpo/q_t': 0.403425931930542, 'fcm_dpo/delta': -0.017175834625959396, 'fcm_dpo/margin': 135.947265625, 'margin_dpo/margin_mean': 135.947265625, 'margin_dpo/margin_std': 190.87680053710938, 'logps/chosen': -245.93429565429688, 'logps/rejected': -418.487548828125, 'logps/ref_chosen': -59.384124755859375, 'logps/ref_rejected': -95.99010467529297, 'KL/chosen_KL_mean': -186.5501708984375, 'KL/rejected_KL_mean': -322.4974365234375, 'KL/mean': -254.5238037109375, 'KL/std': 160.353515625, 'logits/chosen': -0.33080989122390747, 'logits/rejected': -0.3309909701347351, 'epoch': 0.38} + 38%|███▊ | 258/681 [10:56<17:27, 2.48s/it] 38%|███▊ | 259/681 [10:58<17:29, 2.49s/it] {'loss': 1.1328, 'grad_norm': 27.576894760131836, 'learning_rate': 3.9128410360564793e-07, 'fcm_dpo/beta': 0.003068537451326847, 'fcm_dpo/q_t': 0.4190807044506073, 'fcm_dpo/delta': 0.046667762100696564, 'fcm_dpo/margin': 115.67851257324219, 'margin_dpo/margin_mean': 115.67851257324219, 'margin_dpo/margin_std': 177.17535400390625, 'logps/chosen': -233.31454467773438, 'logps/rejected': -385.35638427734375, 'logps/ref_chosen': -52.828346252441406, 'logps/ref_rejected': -89.191650390625, 'KL/chosen_KL_mean': -180.4862060546875, 'KL/rejected_KL_mean': -296.16473388671875, 'KL/mean': -238.32546997070312, 'KL/std': 154.69522094726562, 'logits/chosen': -0.3981941342353821, 'logits/rejected': -0.39807045459747314, 'epoch': 0.38} + 38%|███▊ | 259/681 [10:58<17:29, 2.49s/it] 38%|███▊ | 260/681 [11:01<17:49, 2.54s/it] {'loss': 1.0238, 'grad_norm': 31.068809509277344, 'learning_rate': 3.9022350248844246e-07, 'fcm_dpo/beta': 0.003057563677430153, 'fcm_dpo/q_t': 0.39034503698349, 'fcm_dpo/delta': -0.08508844673633575, 'fcm_dpo/margin': 157.31893920898438, 'margin_dpo/margin_mean': 157.31893920898438, 'margin_dpo/margin_std': 170.58743286132812, 'logps/chosen': -234.81146240234375, 'logps/rejected': -439.802490234375, 'logps/ref_chosen': -47.41767501831055, 'logps/ref_rejected': -95.08978271484375, 'KL/chosen_KL_mean': -187.39376831054688, 'KL/rejected_KL_mean': -344.71270751953125, 'KL/mean': -266.05322265625, 'KL/std': 160.06930541992188, 'logits/chosen': -0.3578794002532959, 'logits/rejected': -0.37384307384490967, 'epoch': 0.38} + 38%|███▊ | 260/681 [11:01<17:49, 2.54s/it] 38%|███▊ | 261/681 [11:03<17:05, 2.44s/it] {'loss': 1.0747, 'grad_norm': 19.665233612060547, 'learning_rate': 3.891592063515376e-07, 'fcm_dpo/beta': 0.0030062044970691204, 'fcm_dpo/q_t': 0.4001613259315491, 'fcm_dpo/delta': -0.045364413410425186, 'fcm_dpo/margin': 147.43002319335938, 'margin_dpo/margin_mean': 147.43002319335938, 'margin_dpo/margin_std': 202.10751342773438, 'logps/chosen': -251.45053100585938, 'logps/rejected': -434.3641357421875, 'logps/ref_chosen': -53.03137969970703, 'logps/ref_rejected': -88.51494598388672, 'KL/chosen_KL_mean': -198.41915893554688, 'KL/rejected_KL_mean': -345.84918212890625, 'KL/mean': -272.1341552734375, 'KL/std': 177.0203857421875, 'logits/chosen': -0.3024892210960388, 'logits/rejected': -0.303438663482666, 'epoch': 0.38} + 38%|███▊ | 261/681 [11:03<17:05, 2.44s/it] 38%|███▊ | 262/681 [11:05<16:54, 2.42s/it] {'loss': 1.1084, 'grad_norm': 23.96550750732422, 'learning_rate': 3.880912432401264e-07, 'fcm_dpo/beta': 0.0030295196920633316, 'fcm_dpo/q_t': 0.4150589108467102, 'fcm_dpo/delta': 0.035723648965358734, 'fcm_dpo/margin': 120.58787536621094, 'margin_dpo/margin_mean': 120.58787536621094, 'margin_dpo/margin_std': 159.1489715576172, 'logps/chosen': -291.3773193359375, 'logps/rejected': -438.76361083984375, 'logps/ref_chosen': -59.620140075683594, 'logps/ref_rejected': -86.41853332519531, 'KL/chosen_KL_mean': -231.7572021484375, 'KL/rejected_KL_mean': -352.3450927734375, 'KL/mean': -292.0511474609375, 'KL/std': 160.82723999023438, 'logits/chosen': -0.3180779814720154, 'logits/rejected': -0.29295575618743896, 'epoch': 0.38} + 38%|███▊ | 262/681 [11:05<16:54, 2.42s/it] 39%|███▊ | 263/681 [11:08<16:54, 2.43s/it] {'loss': 1.0246, 'grad_norm': 20.86168098449707, 'learning_rate': 3.870196412960302e-07, 'fcm_dpo/beta': 0.002960496349260211, 'fcm_dpo/q_t': 0.38409924507141113, 'fcm_dpo/delta': -0.11779750883579254, 'fcm_dpo/margin': 172.56646728515625, 'margin_dpo/margin_mean': 172.56646728515625, 'margin_dpo/margin_std': 205.58511352539062, 'logps/chosen': -273.7001953125, 'logps/rejected': -483.70294189453125, 'logps/ref_chosen': -59.42094421386719, 'logps/ref_rejected': -96.85720825195312, 'KL/chosen_KL_mean': -214.27926635742188, 'KL/rejected_KL_mean': -386.8457336425781, 'KL/mean': -300.5625, 'KL/std': 190.634033203125, 'logits/chosen': -0.3342781960964203, 'logits/rejected': -0.31365495920181274, 'epoch': 0.39} + 39%|███▊ | 263/681 [11:08<16:54, 2.43s/it] 39%|███▉ | 264/681 [11:11<17:31, 2.52s/it] {'loss': 1.0907, 'grad_norm': 22.25633430480957, 'learning_rate': 3.8594442875695665e-07, 'fcm_dpo/beta': 0.0029227761551737785, 'fcm_dpo/q_t': 0.4040907025337219, 'fcm_dpo/delta': -0.026993874460458755, 'fcm_dpo/margin': 145.28106689453125, 'margin_dpo/margin_mean': 145.2810821533203, 'margin_dpo/margin_std': 201.77871704101562, 'logps/chosen': -290.8427734375, 'logps/rejected': -467.25799560546875, 'logps/ref_chosen': -62.722084045410156, 'logps/ref_rejected': -93.85620880126953, 'KL/chosen_KL_mean': -228.12066650390625, 'KL/rejected_KL_mean': -373.40179443359375, 'KL/mean': -300.76123046875, 'KL/std': 175.52401733398438, 'logits/chosen': -0.37826618552207947, 'logits/rejected': -0.3711628019809723, 'epoch': 0.39} + 39%|███▉ | 264/681 [11:11<17:31, 2.52s/it] 39%|███▉ | 265/681 [11:13<17:28, 2.52s/it] {'loss': 1.1159, 'grad_norm': 26.028135299682617, 'learning_rate': 3.848656339557562e-07, 'fcm_dpo/beta': 0.0029316158033907413, 'fcm_dpo/q_t': 0.40785303711891174, 'fcm_dpo/delta': -0.019387083128094673, 'fcm_dpo/margin': 142.77999877929688, 'margin_dpo/margin_mean': 142.77999877929688, 'margin_dpo/margin_std': 233.77301025390625, 'logps/chosen': -304.80584716796875, 'logps/rejected': -473.6349792480469, 'logps/ref_chosen': -61.971466064453125, 'logps/ref_rejected': -88.02059936523438, 'KL/chosen_KL_mean': -242.83438110351562, 'KL/rejected_KL_mean': -385.6143798828125, 'KL/mean': -314.224365234375, 'KL/std': 194.88223266601562, 'logits/chosen': -0.3183874785900116, 'logits/rejected': -0.304283082485199, 'epoch': 0.39} + 39%|███▉ | 265/681 [11:13<17:28, 2.52s/it] 39%|███▉ | 266/681 [11:16<17:22, 2.51s/it] {'loss': 1.1555, 'grad_norm': 42.105552673339844, 'learning_rate': 3.8378328531967507e-07, 'fcm_dpo/beta': 0.0029581869021058083, 'fcm_dpo/q_t': 0.42461222410202026, 'fcm_dpo/delta': 0.06766145676374435, 'fcm_dpo/margin': 113.11186218261719, 'margin_dpo/margin_mean': 113.11186218261719, 'margin_dpo/margin_std': 193.41064453125, 'logps/chosen': -314.1628112792969, 'logps/rejected': -428.146240234375, 'logps/ref_chosen': -67.09967041015625, 'logps/ref_rejected': -67.97122192382812, 'KL/chosen_KL_mean': -247.06314086914062, 'KL/rejected_KL_mean': -360.17498779296875, 'KL/mean': -303.61907958984375, 'KL/std': 165.74261474609375, 'logits/chosen': -0.33068183064460754, 'logits/rejected': -0.2887161374092102, 'epoch': 0.39} + 39%|███▉ | 266/681 [11:16<17:22, 2.51s/it] 39%|███▉ | 267/681 [11:18<17:24, 2.52s/it] {'loss': 1.1093, 'grad_norm': 30.943706512451172, 'learning_rate': 3.8269741136960646e-07, 'fcm_dpo/beta': 0.002957455348223448, 'fcm_dpo/q_t': 0.4094040095806122, 'fcm_dpo/delta': 0.0007606670260429382, 'fcm_dpo/margin': 134.9067840576172, 'margin_dpo/margin_mean': 134.90679931640625, 'margin_dpo/margin_std': 202.06175231933594, 'logps/chosen': -287.60015869140625, 'logps/rejected': -443.7046203613281, 'logps/ref_chosen': -68.97075653076172, 'logps/ref_rejected': -90.16844940185547, 'KL/chosen_KL_mean': -218.62939453125, 'KL/rejected_KL_mean': -353.53619384765625, 'KL/mean': -286.082763671875, 'KL/std': 175.55633544921875, 'logits/chosen': -0.4011858105659485, 'logits/rejected': -0.37256526947021484, 'epoch': 0.39} + 39%|███▉ | 267/681 [11:18<17:24, 2.52s/it] 39%|███▉ | 268/681 [11:21<17:26, 2.53s/it] {'loss': 1.0936, 'grad_norm': 23.621414184570312, 'learning_rate': 3.8160804071933894e-07, 'fcm_dpo/beta': 0.0029503919649869204, 'fcm_dpo/q_t': 0.40528371930122375, 'fcm_dpo/delta': -0.02154139243066311, 'fcm_dpo/margin': 142.48526000976562, 'margin_dpo/margin_mean': 142.48526000976562, 'margin_dpo/margin_std': 206.6456298828125, 'logps/chosen': -278.20574951171875, 'logps/rejected': -466.4383239746094, 'logps/ref_chosen': -55.90031051635742, 'logps/ref_rejected': -101.64763641357422, 'KL/chosen_KL_mean': -222.305419921875, 'KL/rejected_KL_mean': -364.79071044921875, 'KL/mean': -293.548095703125, 'KL/std': 170.23300170898438, 'logits/chosen': -0.3952227234840393, 'logits/rejected': -0.40232187509536743, 'epoch': 0.39} + 39%|███▉ | 268/681 [11:21<17:26, 2.53s/it] 40%|███▉ | 269/681 [11:23<17:14, 2.51s/it] {'loss': 1.0422, 'grad_norm': 23.399341583251953, 'learning_rate': 3.8051520207480204e-07, 'fcm_dpo/beta': 0.002910827985033393, 'fcm_dpo/q_t': 0.38658440113067627, 'fcm_dpo/delta': -0.109227254986763, 'fcm_dpo/margin': 173.074951171875, 'margin_dpo/margin_mean': 173.074951171875, 'margin_dpo/margin_std': 224.85488891601562, 'logps/chosen': -303.83038330078125, 'logps/rejected': -514.2151489257812, 'logps/ref_chosen': -70.03955841064453, 'logps/ref_rejected': -107.34937286376953, 'KL/chosen_KL_mean': -233.79080200195312, 'KL/rejected_KL_mean': -406.86578369140625, 'KL/mean': -320.32830810546875, 'KL/std': 176.70233154296875, 'logits/chosen': -0.4322871267795563, 'logits/rejected': -0.4134712517261505, 'epoch': 0.4} + 40%|███▉ | 269/681 [11:23<17:14, 2.51s/it] 40%|███▉ | 270/681 [11:26<17:32, 2.56s/it] {'loss': 1.1288, 'grad_norm': 25.85466194152832, 'learning_rate': 3.794189242333106e-07, 'fcm_dpo/beta': 0.002915448509156704, 'fcm_dpo/q_t': 0.4160010814666748, 'fcm_dpo/delta': 0.03554879128932953, 'fcm_dpo/margin': 125.37371826171875, 'margin_dpo/margin_mean': 125.37371826171875, 'margin_dpo/margin_std': 193.83901977539062, 'logps/chosen': -275.7391357421875, 'logps/rejected': -441.5080261230469, 'logps/ref_chosen': -69.53347778320312, 'logps/ref_rejected': -109.92864990234375, 'KL/chosen_KL_mean': -206.2056427001953, 'KL/rejected_KL_mean': -331.57940673828125, 'KL/mean': -268.89251708984375, 'KL/std': 156.54763793945312, 'logits/chosen': -0.46122607588768005, 'logits/rejected': -0.45263946056365967, 'epoch': 0.4} + 40%|███▉ | 270/681 [11:26<17:32, 2.56s/it] 40%|███▉ | 271/681 [11:28<17:12, 2.52s/it] {'loss': 1.0476, 'grad_norm': 22.624128341674805, 'learning_rate': 3.7831923608280514e-07, 'fcm_dpo/beta': 0.0028847784269601107, 'fcm_dpo/q_t': 0.3972168564796448, 'fcm_dpo/delta': -0.049142319709062576, 'fcm_dpo/margin': 154.854248046875, 'margin_dpo/margin_mean': 154.85426330566406, 'margin_dpo/margin_std': 175.0887908935547, 'logps/chosen': -248.6131591796875, 'logps/rejected': -439.2166748046875, 'logps/ref_chosen': -56.76456832885742, 'logps/ref_rejected': -92.51383972167969, 'KL/chosen_KL_mean': -191.8485870361328, 'KL/rejected_KL_mean': -346.70281982421875, 'KL/mean': -269.27569580078125, 'KL/std': 154.6339111328125, 'logits/chosen': -0.4137924313545227, 'logits/rejected': -0.39371395111083984, 'epoch': 0.4} + 40%|███▉ | 271/681 [11:28<17:12, 2.52s/it] 40%|███▉ | 272/681 [11:31<17:35, 2.58s/it] {'loss': 0.9828, 'grad_norm': 32.70468521118164, 'learning_rate': 3.772161666010912e-07, 'fcm_dpo/beta': 0.002835802501067519, 'fcm_dpo/q_t': 0.37538450956344604, 'fcm_dpo/delta': -0.1421043574810028, 'fcm_dpo/margin': 188.56448364257812, 'margin_dpo/margin_mean': 188.56448364257812, 'margin_dpo/margin_std': 178.37030029296875, 'logps/chosen': -230.33010864257812, 'logps/rejected': -474.94024658203125, 'logps/ref_chosen': -49.497154235839844, 'logps/ref_rejected': -105.54279327392578, 'KL/chosen_KL_mean': -180.83294677734375, 'KL/rejected_KL_mean': -369.3974304199219, 'KL/mean': -275.11517333984375, 'KL/std': 171.1865234375, 'logits/chosen': -0.3418217897415161, 'logits/rejected': -0.350533664226532, 'epoch': 0.4} + 40%|███▉ | 272/681 [11:31<17:35, 2.58s/it] 40%|████ | 273/681 [11:33<17:08, 2.52s/it] {'loss': 1.0078, 'grad_norm': 21.104841232299805, 'learning_rate': 3.761097448550755e-07, 'fcm_dpo/beta': 0.00274536176584661, 'fcm_dpo/q_t': 0.3824120759963989, 'fcm_dpo/delta': -0.11640150099992752, 'fcm_dpo/margin': 185.6915283203125, 'margin_dpo/margin_mean': 185.6915283203125, 'margin_dpo/margin_std': 196.62474060058594, 'logps/chosen': -274.3792724609375, 'logps/rejected': -489.593994140625, 'logps/ref_chosen': -62.97539520263672, 'logps/ref_rejected': -92.49858093261719, 'KL/chosen_KL_mean': -211.40390014648438, 'KL/rejected_KL_mean': -397.0954284667969, 'KL/mean': -304.2496643066406, 'KL/std': 175.37908935546875, 'logits/chosen': -0.3616599440574646, 'logits/rejected': -0.34069812297821045, 'epoch': 0.4} + 40%|████ | 273/681 [11:33<17:08, 2.52s/it] 40%|████ | 274/681 [11:36<16:47, 2.48s/it] {'loss': 1.1023, 'grad_norm': 26.784929275512695, 'learning_rate': 3.75e-07, 'fcm_dpo/beta': 0.0027546617202460766, 'fcm_dpo/q_t': 0.4122427701950073, 'fcm_dpo/delta': 0.021147366613149643, 'fcm_dpo/margin': 137.75990295410156, 'margin_dpo/margin_mean': 137.7598876953125, 'margin_dpo/margin_std': 182.3780517578125, 'logps/chosen': -310.5295715332031, 'logps/rejected': -469.954833984375, 'logps/ref_chosen': -55.66770935058594, 'logps/ref_rejected': -77.33308410644531, 'KL/chosen_KL_mean': -254.86184692382812, 'KL/rejected_KL_mean': -392.6217346191406, 'KL/mean': -323.7417907714844, 'KL/std': 157.4027099609375, 'logits/chosen': -0.2770734429359436, 'logits/rejected': -0.2582925260066986, 'epoch': 0.4} + 40%|████ | 274/681 [11:36<16:47, 2.48s/it] 40%|████ | 275/681 [11:38<17:20, 2.56s/it] {'loss': 1.0776, 'grad_norm': 23.660945892333984, 'learning_rate': 3.738869612786737e-07, 'fcm_dpo/beta': 0.0027543343603610992, 'fcm_dpo/q_t': 0.4048367142677307, 'fcm_dpo/delta': -0.010161615908145905, 'fcm_dpo/margin': 148.71763610839844, 'margin_dpo/margin_mean': 148.71762084960938, 'margin_dpo/margin_std': 181.3911590576172, 'logps/chosen': -249.32666015625, 'logps/rejected': -442.7532958984375, 'logps/ref_chosen': -48.594703674316406, 'logps/ref_rejected': -93.30369567871094, 'KL/chosen_KL_mean': -200.73196411132812, 'KL/rejected_KL_mean': -349.4495849609375, 'KL/mean': -275.09075927734375, 'KL/std': 161.7529296875, 'logits/chosen': -0.3343503177165985, 'logits/rejected': -0.3386019766330719, 'epoch': 0.4} + 40%|████ | 275/681 [11:38<17:20, 2.56s/it] 41%|████ | 276/681 [11:41<17:09, 2.54s/it] {'loss': 1.0889, 'grad_norm': 28.4442195892334, 'learning_rate': 3.7277065802070204e-07, 'fcm_dpo/beta': 0.002734929323196411, 'fcm_dpo/q_t': 0.40702730417251587, 'fcm_dpo/delta': -0.0065729208290576935, 'fcm_dpo/margin': 148.5008544921875, 'margin_dpo/margin_mean': 148.5008544921875, 'margin_dpo/margin_std': 197.42532348632812, 'logps/chosen': -276.5999450683594, 'logps/rejected': -438.8890380859375, 'logps/ref_chosen': -56.57740783691406, 'logps/ref_rejected': -70.36566925048828, 'KL/chosen_KL_mean': -220.0225372314453, 'KL/rejected_KL_mean': -368.52337646484375, 'KL/mean': -294.27294921875, 'KL/std': 166.26907348632812, 'logits/chosen': -0.33677220344543457, 'logits/rejected': -0.31244635581970215, 'epoch': 0.41} + 41%|████ | 276/681 [11:41<17:09, 2.54s/it] 41%|████ | 277/681 [11:43<16:35, 2.46s/it] {'loss': 1.0752, 'grad_norm': 25.2097225189209, 'learning_rate': 3.71651119641714e-07, 'fcm_dpo/beta': 0.0027346829883754253, 'fcm_dpo/q_t': 0.40158289670944214, 'fcm_dpo/delta': -0.028729649260640144, 'fcm_dpo/margin': 156.3212890625, 'margin_dpo/margin_mean': 156.3212890625, 'margin_dpo/margin_std': 201.70455932617188, 'logps/chosen': -295.73431396484375, 'logps/rejected': -488.6653137207031, 'logps/ref_chosen': -56.27156066894531, 'logps/ref_rejected': -92.88127136230469, 'KL/chosen_KL_mean': -239.46275329589844, 'KL/rejected_KL_mean': -395.7840576171875, 'KL/mean': -317.6234130859375, 'KL/std': 170.51141357421875, 'logits/chosen': -0.2805439829826355, 'logits/rejected': -0.2614123523235321, 'epoch': 0.41} + 41%|████ | 277/681 [11:43<16:35, 2.46s/it] 41%|████ | 278/681 [11:46<16:59, 2.53s/it] {'loss': 1.0282, 'grad_norm': 23.9796199798584, 'learning_rate': 3.705283756425872e-07, 'fcm_dpo/beta': 0.0026860979851335287, 'fcm_dpo/q_t': 0.38982489705085754, 'fcm_dpo/delta': -0.08402146399021149, 'fcm_dpo/margin': 178.59414672851562, 'margin_dpo/margin_mean': 178.59414672851562, 'margin_dpo/margin_std': 198.23104858398438, 'logps/chosen': -268.568115234375, 'logps/rejected': -485.4739074707031, 'logps/ref_chosen': -52.94194030761719, 'logps/ref_rejected': -91.25357818603516, 'KL/chosen_KL_mean': -215.6261749267578, 'KL/rejected_KL_mean': -394.2203369140625, 'KL/mean': -304.9232482910156, 'KL/std': 179.00375366210938, 'logits/chosen': -0.31917044520378113, 'logits/rejected': -0.32198548316955566, 'epoch': 0.41} + 41%|████ | 278/681 [11:46<16:59, 2.53s/it] 41%|████ | 279/681 [11:48<16:57, 2.53s/it] {'loss': 1.0662, 'grad_norm': 28.65115737915039, 'learning_rate': 3.6940245560867e-07, 'fcm_dpo/beta': 0.0026397216133773327, 'fcm_dpo/q_t': 0.3971731960773468, 'fcm_dpo/delta': -0.058702681213617325, 'fcm_dpo/margin': 172.298583984375, 'margin_dpo/margin_mean': 172.298583984375, 'margin_dpo/margin_std': 225.11993408203125, 'logps/chosen': -301.76824951171875, 'logps/rejected': -513.2769775390625, 'logps/ref_chosen': -48.641319274902344, 'logps/ref_rejected': -87.8514404296875, 'KL/chosen_KL_mean': -253.12692260742188, 'KL/rejected_KL_mean': -425.425537109375, 'KL/mean': -339.2762145996094, 'KL/std': 185.4033966064453, 'logits/chosen': -0.2409660816192627, 'logits/rejected': -0.23715651035308838, 'epoch': 0.41} + 41%|████ | 279/681 [11:48<16:57, 2.53s/it] 41%|████ | 280/681 [11:51<17:08, 2.56s/it] {'loss': 1.0195, 'grad_norm': 23.646604537963867, 'learning_rate': 3.6827338920900253e-07, 'fcm_dpo/beta': 0.0026130005717277527, 'fcm_dpo/q_t': 0.3867556154727936, 'fcm_dpo/delta': -0.08905528485774994, 'fcm_dpo/margin': 185.492431640625, 'margin_dpo/margin_mean': 185.492431640625, 'margin_dpo/margin_std': 194.14852905273438, 'logps/chosen': -310.5230407714844, 'logps/rejected': -535.837158203125, 'logps/ref_chosen': -58.797122955322266, 'logps/ref_rejected': -98.61885070800781, 'KL/chosen_KL_mean': -251.72592163085938, 'KL/rejected_KL_mean': -437.21832275390625, 'KL/mean': -344.47216796875, 'KL/std': 179.44480895996094, 'logits/chosen': -0.29928305745124817, 'logits/rejected': -0.29868048429489136, 'epoch': 0.41} + 41%|████ | 280/681 [11:51<17:08, 2.56s/it] 41%|████▏ | 281/681 [11:54<17:07, 2.57s/it] {'loss': 1.0576, 'grad_norm': 23.116756439208984, 'learning_rate': 3.6714120619553435e-07, 'fcm_dpo/beta': 0.0025973522569984198, 'fcm_dpo/q_t': 0.3972865343093872, 'fcm_dpo/delta': -0.036213867366313934, 'fcm_dpo/margin': 167.29629516601562, 'margin_dpo/margin_mean': 167.29629516601562, 'margin_dpo/margin_std': 190.1474609375, 'logps/chosen': -285.50469970703125, 'logps/rejected': -478.195068359375, 'logps/ref_chosen': -55.488521575927734, 'logps/ref_rejected': -80.88258361816406, 'KL/chosen_KL_mean': -230.0161895751953, 'KL/rejected_KL_mean': -397.3125, 'KL/mean': -313.6643371582031, 'KL/std': 175.47067260742188, 'logits/chosen': -0.32348203659057617, 'logits/rejected': -0.2926068902015686, 'epoch': 0.41} + 41%|████▏ | 281/681 [11:54<17:07, 2.57s/it] 41%|████▏ | 282/681 [11:56<17:00, 2.56s/it] {'loss': 1.1456, 'grad_norm': 23.07098388671875, 'learning_rate': 3.660059364023408e-07, 'fcm_dpo/beta': 0.0026173896621912718, 'fcm_dpo/q_t': 0.42490720748901367, 'fcm_dpo/delta': 0.06215311959385872, 'fcm_dpo/margin': 129.69415283203125, 'margin_dpo/margin_mean': 129.69415283203125, 'margin_dpo/margin_std': 215.59033203125, 'logps/chosen': -327.32122802734375, 'logps/rejected': -479.29620361328125, 'logps/ref_chosen': -73.07014465332031, 'logps/ref_rejected': -95.35098266601562, 'KL/chosen_KL_mean': -254.25106811523438, 'KL/rejected_KL_mean': -383.9451904296875, 'KL/mean': -319.09814453125, 'KL/std': 183.95884704589844, 'logits/chosen': -0.407487154006958, 'logits/rejected': -0.3855167031288147, 'epoch': 0.41} + 41%|████▏ | 282/681 [11:56<17:00, 2.56s/it] 42%|████▏ | 283/681 [11:59<16:52, 2.54s/it] {'loss': 1.0023, 'grad_norm': 27.85664176940918, 'learning_rate': 3.6486760974483685e-07, 'fcm_dpo/beta': 0.002578072715550661, 'fcm_dpo/q_t': 0.3798179626464844, 'fcm_dpo/delta': -0.12350119650363922, 'fcm_dpo/margin': 200.64541625976562, 'margin_dpo/margin_mean': 200.64540100097656, 'margin_dpo/margin_std': 206.13702392578125, 'logps/chosen': -319.53778076171875, 'logps/rejected': -555.2713012695312, 'logps/ref_chosen': -61.89844512939453, 'logps/ref_rejected': -96.98655700683594, 'KL/chosen_KL_mean': -257.63934326171875, 'KL/rejected_KL_mean': -458.2847595214844, 'KL/mean': -357.9620361328125, 'KL/std': 216.70352172851562, 'logits/chosen': -0.3043696880340576, 'logits/rejected': -0.3012719452381134, 'epoch': 0.42} + 42%|████▏ | 283/681 [11:59<16:52, 2.54s/it] 42%|████▏ | 284/681 [12:01<17:12, 2.60s/it] {'loss': 1.0327, 'grad_norm': 21.307559967041016, 'learning_rate': 3.6372625621898863e-07, 'fcm_dpo/beta': 0.0025225451681762934, 'fcm_dpo/q_t': 0.39294561743736267, 'fcm_dpo/delta': -0.06718793511390686, 'fcm_dpo/margin': 183.906494140625, 'margin_dpo/margin_mean': 183.906494140625, 'margin_dpo/margin_std': 199.44464111328125, 'logps/chosen': -299.55535888671875, 'logps/rejected': -518.49560546875, 'logps/ref_chosen': -58.4355354309082, 'logps/ref_rejected': -93.46926879882812, 'KL/chosen_KL_mean': -241.11981201171875, 'KL/rejected_KL_mean': -425.02630615234375, 'KL/mean': -333.07305908203125, 'KL/std': 198.67276000976562, 'logits/chosen': -0.38930195569992065, 'logits/rejected': -0.37415850162506104, 'epoch': 0.42} + 42%|████▏ | 284/681 [12:01<17:12, 2.60s/it] 42%|████▏ | 285/681 [12:04<17:06, 2.59s/it] {'loss': 1.0824, 'grad_norm': 25.553455352783203, 'learning_rate': 3.625819059005228e-07, 'fcm_dpo/beta': 0.0025313901714980602, 'fcm_dpo/q_t': 0.4059276878833771, 'fcm_dpo/delta': 0.001558154821395874, 'fcm_dpo/margin': 157.2245330810547, 'margin_dpo/margin_mean': 157.2245330810547, 'margin_dpo/margin_std': 185.91542053222656, 'logps/chosen': -338.38751220703125, 'logps/rejected': -528.5067138671875, 'logps/ref_chosen': -66.23219299316406, 'logps/ref_rejected': -99.1268310546875, 'KL/chosen_KL_mean': -272.1553039550781, 'KL/rejected_KL_mean': -429.3798522949219, 'KL/mean': -350.767578125, 'KL/std': 169.2427978515625, 'logits/chosen': -0.3947150707244873, 'logits/rejected': -0.37686532735824585, 'epoch': 0.42} + 42%|████▏ | 285/681 [12:04<17:06, 2.59s/it] 42%|████▏ | 286/681 [12:07<17:07, 2.60s/it] {'loss': 1.0627, 'grad_norm': 24.010190963745117, 'learning_rate': 3.614345889441346e-07, 'fcm_dpo/beta': 0.002504766918718815, 'fcm_dpo/q_t': 0.3975168466567993, 'fcm_dpo/delta': -0.04638573154807091, 'fcm_dpo/margin': 177.4013671875, 'margin_dpo/margin_mean': 177.40138244628906, 'margin_dpo/margin_std': 221.60133361816406, 'logps/chosen': -341.6971740722656, 'logps/rejected': -534.7359619140625, 'logps/ref_chosen': -72.95100402832031, 'logps/ref_rejected': -88.58845520019531, 'KL/chosen_KL_mean': -268.74615478515625, 'KL/rejected_KL_mean': -446.1475524902344, 'KL/mean': -357.44683837890625, 'KL/std': 201.09698486328125, 'logits/chosen': -0.3724295198917389, 'logits/rejected': -0.3507525324821472, 'epoch': 0.42} + 42%|████▏ | 286/681 [12:07<17:07, 2.60s/it] 42%|████▏ | 287/681 [12:09<16:24, 2.50s/it] {'loss': 1.1047, 'grad_norm': 21.714580535888672, 'learning_rate': 3.6028433558269275e-07, 'fcm_dpo/beta': 0.002508362988010049, 'fcm_dpo/q_t': 0.41409826278686523, 'fcm_dpo/delta': 0.028594862669706345, 'fcm_dpo/margin': 148.4761199951172, 'margin_dpo/margin_mean': 148.47613525390625, 'margin_dpo/margin_std': 196.79652404785156, 'logps/chosen': -306.6651916503906, 'logps/rejected': -471.2962341308594, 'logps/ref_chosen': -61.54115295410156, 'logps/ref_rejected': -77.69607543945312, 'KL/chosen_KL_mean': -245.1240234375, 'KL/rejected_KL_mean': -393.60015869140625, 'KL/mean': -319.36212158203125, 'KL/std': 172.98507690429688, 'logits/chosen': -0.36127710342407227, 'logits/rejected': -0.33609965443611145, 'epoch': 0.42} + 42%|████▏ | 287/681 [12:09<16:24, 2.50s/it] 42%|████▏ | 288/681 [12:11<16:30, 2.52s/it] {'loss': 1.0344, 'grad_norm': 21.64046287536621, 'learning_rate': 3.5913117612644327e-07, 'fcm_dpo/beta': 0.0024691871367394924, 'fcm_dpo/q_t': 0.39265191555023193, 'fcm_dpo/delta': -0.06168384104967117, 'fcm_dpo/margin': 185.25796508789062, 'margin_dpo/margin_mean': 185.25796508789062, 'margin_dpo/margin_std': 186.76431274414062, 'logps/chosen': -297.84820556640625, 'logps/rejected': -513.7806396484375, 'logps/ref_chosen': -56.661224365234375, 'logps/ref_rejected': -87.33570098876953, 'KL/chosen_KL_mean': -241.18698120117188, 'KL/rejected_KL_mean': -426.4449462890625, 'KL/mean': -333.81597900390625, 'KL/std': 176.66453552246094, 'logits/chosen': -0.3853694200515747, 'logits/rejected': -0.36701393127441406, 'epoch': 0.42} + 42%|████▏ | 288/681 [12:11<16:30, 2.52s/it] 42%|████▏ | 289/681 [12:14<16:27, 2.52s/it] {'loss': 1.0067, 'grad_norm': 24.331398010253906, 'learning_rate': 3.5797514096221024e-07, 'fcm_dpo/beta': 0.0024433922953903675, 'fcm_dpo/q_t': 0.38267892599105835, 'fcm_dpo/delta': -0.11754532158374786, 'fcm_dpo/margin': 209.41796875, 'margin_dpo/margin_mean': 209.41796875, 'margin_dpo/margin_std': 220.97320556640625, 'logps/chosen': -279.693603515625, 'logps/rejected': -531.5238037109375, 'logps/ref_chosen': -45.23039245605469, 'logps/ref_rejected': -87.64266967773438, 'KL/chosen_KL_mean': -234.46319580078125, 'KL/rejected_KL_mean': -443.88116455078125, 'KL/mean': -339.17218017578125, 'KL/std': 201.45144653320312, 'logits/chosen': -0.3093263506889343, 'logits/rejected': -0.30716824531555176, 'epoch': 0.42} + 42%|████▏ | 289/681 [12:14<16:27, 2.52s/it] 43%|████▎ | 290/681 [12:16<16:25, 2.52s/it] {'loss': 1.0288, 'grad_norm': 21.887035369873047, 'learning_rate': 3.568162605525952e-07, 'fcm_dpo/beta': 0.002378998324275017, 'fcm_dpo/q_t': 0.3870581388473511, 'fcm_dpo/delta': -0.10634815692901611, 'fcm_dpo/margin': 210.4925537109375, 'margin_dpo/margin_mean': 210.49253845214844, 'margin_dpo/margin_std': 254.7548828125, 'logps/chosen': -314.74066162109375, 'logps/rejected': -586.4702758789062, 'logps/ref_chosen': -55.47149658203125, 'logps/ref_rejected': -116.70857238769531, 'KL/chosen_KL_mean': -259.2691650390625, 'KL/rejected_KL_mean': -469.76171875, 'KL/mean': -364.51544189453125, 'KL/std': 214.3240509033203, 'logits/chosen': -0.31333810091018677, 'logits/rejected': -0.33406785130500793, 'epoch': 0.43} + 43%|████▎ | 290/681 [12:17<16:25, 2.52s/it] 43%|████▎ | 291/681 [12:19<16:34, 2.55s/it] {'loss': 1.0516, 'grad_norm': 22.43724250793457, 'learning_rate': 3.5565456543517485e-07, 'fcm_dpo/beta': 0.002363018225878477, 'fcm_dpo/q_t': 0.39525771141052246, 'fcm_dpo/delta': -0.05237455666065216, 'fcm_dpo/margin': 190.36480712890625, 'margin_dpo/margin_mean': 190.36480712890625, 'margin_dpo/margin_std': 219.41854858398438, 'logps/chosen': -292.3713073730469, 'logps/rejected': -508.7728576660156, 'logps/ref_chosen': -63.26036834716797, 'logps/ref_rejected': -89.29708862304688, 'KL/chosen_KL_mean': -229.11093139648438, 'KL/rejected_KL_mean': -419.47576904296875, 'KL/mean': -324.2933654785156, 'KL/std': 183.89297485351562, 'logits/chosen': -0.32546311616897583, 'logits/rejected': -0.3105616271495819, 'epoch': 0.43} + 43%|████▎ | 291/681 [12:19<16:34, 2.55s/it] 43%|████▎ | 292/681 [12:21<16:12, 2.50s/it] {'loss': 1.0518, 'grad_norm': 26.2148380279541, 'learning_rate': 3.5449008622169583e-07, 'fcm_dpo/beta': 0.0023269178345799446, 'fcm_dpo/q_t': 0.39132484793663025, 'fcm_dpo/delta': -0.07821857929229736, 'fcm_dpo/margin': 203.92965698242188, 'margin_dpo/margin_mean': 203.92965698242188, 'margin_dpo/margin_std': 259.8817443847656, 'logps/chosen': -317.7785339355469, 'logps/rejected': -557.7510375976562, 'logps/ref_chosen': -53.91852951049805, 'logps/ref_rejected': -89.96138000488281, 'KL/chosen_KL_mean': -263.8599853515625, 'KL/rejected_KL_mean': -467.7896728515625, 'KL/mean': -365.8248291015625, 'KL/std': 221.3306884765625, 'logits/chosen': -0.3232491612434387, 'logits/rejected': -0.3077685534954071, 'epoch': 0.43} + 43%|████▎ | 292/681 [12:22<16:12, 2.50s/it] 43%|████▎ | 293/681 [12:24<16:25, 2.54s/it] {'loss': 1.1161, 'grad_norm': 38.31715774536133, 'learning_rate': 3.5332285359726846e-07, 'fcm_dpo/beta': 0.002317019272595644, 'fcm_dpo/q_t': 0.41295474767684937, 'fcm_dpo/delta': 0.011760619468986988, 'fcm_dpo/margin': 167.7296905517578, 'margin_dpo/margin_mean': 167.72970581054688, 'margin_dpo/margin_std': 257.7914733886719, 'logps/chosen': -330.2181396484375, 'logps/rejected': -515.4242553710938, 'logps/ref_chosen': -60.376033782958984, 'logps/ref_rejected': -77.85244750976562, 'KL/chosen_KL_mean': -269.84210205078125, 'KL/rejected_KL_mean': -437.5718078613281, 'KL/mean': -353.70697021484375, 'KL/std': 224.77517700195312, 'logits/chosen': -0.31585967540740967, 'logits/rejected': -0.3076015114784241, 'epoch': 0.43} + 43%|████▎ | 293/681 [12:24<16:25, 2.54s/it] 43%|████▎ | 294/681 [12:27<16:09, 2.51s/it] {'loss': 1.0891, 'grad_norm': 20.47630500793457, 'learning_rate': 3.5215289831955786e-07, 'fcm_dpo/beta': 0.002303325105458498, 'fcm_dpo/q_t': 0.4063786268234253, 'fcm_dpo/delta': -0.01764640584588051, 'fcm_dpo/margin': 180.93899536132812, 'margin_dpo/margin_mean': 180.93899536132812, 'margin_dpo/margin_std': 251.64306640625, 'logps/chosen': -305.3386535644531, 'logps/rejected': -520.0870971679688, 'logps/ref_chosen': -48.0875358581543, 'logps/ref_rejected': -81.89698791503906, 'KL/chosen_KL_mean': -257.2511291503906, 'KL/rejected_KL_mean': -438.19012451171875, 'KL/mean': -347.72064208984375, 'KL/std': 204.02166748046875, 'logits/chosen': -0.3238428235054016, 'logits/rejected': -0.33083072304725647, 'epoch': 0.43} + 43%|████▎ | 294/681 [12:27<16:09, 2.51s/it] 43%|████▎ | 295/681 [12:29<15:59, 2.49s/it] {'loss': 1.0634, 'grad_norm': 25.35572052001953, 'learning_rate': 3.509802512179737e-07, 'fcm_dpo/beta': 0.002286091446876526, 'fcm_dpo/q_t': 0.3953544497489929, 'fcm_dpo/delta': -0.06729740649461746, 'fcm_dpo/margin': 203.06219482421875, 'margin_dpo/margin_mean': 203.06219482421875, 'margin_dpo/margin_std': 268.56146240234375, 'logps/chosen': -356.6201171875, 'logps/rejected': -597.2139282226562, 'logps/ref_chosen': -49.92467498779297, 'logps/ref_rejected': -87.45632934570312, 'KL/chosen_KL_mean': -306.6954345703125, 'KL/rejected_KL_mean': -509.75762939453125, 'KL/mean': -408.22650146484375, 'KL/std': 231.20632934570312, 'logits/chosen': -0.3378216624259949, 'logits/rejected': -0.34275323152542114, 'epoch': 0.43} + 43%|████▎ | 295/681 [12:29<15:59, 2.49s/it] 43%|████▎ | 296/681 [12:31<15:52, 2.48s/it] {'loss': 1.177, 'grad_norm': 37.518798828125, 'learning_rate': 3.498049431928577e-07, 'fcm_dpo/beta': 0.002259893110021949, 'fcm_dpo/q_t': 0.4249199628829956, 'fcm_dpo/delta': -0.03520293906331062, 'fcm_dpo/margin': 143.65863037109375, 'margin_dpo/margin_mean': 143.65863037109375, 'margin_dpo/margin_std': 266.5452575683594, 'logps/chosen': -427.18310546875, 'logps/rejected': -598.4395751953125, 'logps/ref_chosen': -65.49124145507812, 'logps/ref_rejected': -93.08908081054688, 'KL/chosen_KL_mean': -361.69189453125, 'KL/rejected_KL_mean': -505.3504943847656, 'KL/mean': -433.52117919921875, 'KL/std': 217.83050537109375, 'logits/chosen': -0.39878833293914795, 'logits/rejected': -0.3886667490005493, 'epoch': 0.43} + 43%|████▎ | 296/681 [12:31<15:52, 2.48s/it] 44%|████▎ | 297/681 [12:34<16:03, 2.51s/it] {'loss': 1.1114, 'grad_norm': 30.537891387939453, 'learning_rate': 3.486270052146694e-07, 'fcm_dpo/beta': 0.002268793759867549, 'fcm_dpo/q_t': 0.4162394404411316, 'fcm_dpo/delta': 0.04043077677488327, 'fcm_dpo/margin': 159.12591552734375, 'margin_dpo/margin_mean': 159.12591552734375, 'margin_dpo/margin_std': 209.26858520507812, 'logps/chosen': -373.6283874511719, 'logps/rejected': -571.4158935546875, 'logps/ref_chosen': -56.476951599121094, 'logps/ref_rejected': -95.1385498046875, 'KL/chosen_KL_mean': -317.15142822265625, 'KL/rejected_KL_mean': -476.27734375, 'KL/mean': -396.7143859863281, 'KL/std': 205.87338256835938, 'logits/chosen': -0.38363754749298096, 'logits/rejected': -0.38842642307281494, 'epoch': 0.44} + 44%|████▎ | 297/681 [12:34<16:03, 2.51s/it] 44%|████▍ | 298/681 [12:37<16:21, 2.56s/it] {'loss': 1.0681, 'grad_norm': 25.527212142944336, 'learning_rate': 3.474464683231698e-07, 'fcm_dpo/beta': 0.002258981578052044, 'fcm_dpo/q_t': 0.39616093039512634, 'fcm_dpo/delta': -0.07966307550668716, 'fcm_dpo/margin': 210.67361450195312, 'margin_dpo/margin_mean': 210.67361450195312, 'margin_dpo/margin_std': 302.4098815917969, 'logps/chosen': -394.0440673828125, 'logps/rejected': -654.0546875, 'logps/ref_chosen': -67.32516479492188, 'logps/ref_rejected': -116.66217041015625, 'KL/chosen_KL_mean': -326.7189025878906, 'KL/rejected_KL_mean': -537.3925170898438, 'KL/mean': -432.0556640625, 'KL/std': 260.4841003417969, 'logits/chosen': -0.3951565623283386, 'logits/rejected': -0.4159289598464966, 'epoch': 0.44} + 44%|████▍ | 298/681 [12:37<16:21, 2.56s/it] 44%|████▍ | 299/681 [12:39<16:17, 2.56s/it] {'loss': 1.0928, 'grad_norm': 37.44233322143555, 'learning_rate': 3.462633636266041e-07, 'fcm_dpo/beta': 0.0022493680007755756, 'fcm_dpo/q_t': 0.40638279914855957, 'fcm_dpo/delta': -0.016153991222381592, 'fcm_dpo/margin': 184.4800567626953, 'margin_dpo/margin_mean': 184.4800567626953, 'margin_dpo/margin_std': 256.15374755859375, 'logps/chosen': -316.5487060546875, 'logps/rejected': -536.3948974609375, 'logps/ref_chosen': -48.96209716796875, 'logps/ref_rejected': -84.32823944091797, 'KL/chosen_KL_mean': -267.58660888671875, 'KL/rejected_KL_mean': -452.066650390625, 'KL/mean': -359.8266296386719, 'KL/std': 204.48165893554688, 'logits/chosen': -0.37919020652770996, 'logits/rejected': -0.38419249653816223, 'epoch': 0.44} + 44%|████▍ | 299/681 [12:39<16:17, 2.56s/it] 44%|████▍ | 300/681 [12:42<16:25, 2.59s/it] {'loss': 1.0439, 'grad_norm': 33.15673828125, 'learning_rate': 3.4507772230088147e-07, 'fcm_dpo/beta': 0.0022087290417402983, 'fcm_dpo/q_t': 0.38775455951690674, 'fcm_dpo/delta': -0.0972696915268898, 'fcm_dpo/margin': 223.01821899414062, 'margin_dpo/margin_mean': 223.01821899414062, 'margin_dpo/margin_std': 281.2703857421875, 'logps/chosen': -381.46527099609375, 'logps/rejected': -641.376220703125, 'logps/ref_chosen': -59.07371139526367, 'logps/ref_rejected': -95.9664535522461, 'KL/chosen_KL_mean': -322.3915710449219, 'KL/rejected_KL_mean': -545.4097900390625, 'KL/mean': -433.9006652832031, 'KL/std': 238.47645568847656, 'logits/chosen': -0.39440402388572693, 'logits/rejected': -0.4011026620864868, 'epoch': 0.44} + 44%|████▍ | 300/681 [12:42<16:25, 2.59s/it] 44%|████▍ | 301/681 [12:44<15:49, 2.50s/it] {'loss': 1.0547, 'grad_norm': 23.70587921142578, 'learning_rate': 3.4388957558875316e-07, 'fcm_dpo/beta': 0.002167191356420517, 'fcm_dpo/q_t': 0.39748483896255493, 'fcm_dpo/delta': -0.05619215965270996, 'fcm_dpo/margin': 209.16920471191406, 'margin_dpo/margin_mean': 209.16921997070312, 'margin_dpo/margin_std': 258.6616516113281, 'logps/chosen': -333.045166015625, 'logps/rejected': -577.3185424804688, 'logps/ref_chosen': -57.249366760253906, 'logps/ref_rejected': -92.35354614257812, 'KL/chosen_KL_mean': -275.7957763671875, 'KL/rejected_KL_mean': -484.9649963378906, 'KL/mean': -380.38037109375, 'KL/std': 218.0830078125, 'logits/chosen': -0.40770500898361206, 'logits/rejected': -0.40985846519470215, 'epoch': 0.44} + 44%|████▍ | 301/681 [12:44<15:49, 2.50s/it] 44%|████▍ | 302/681 [12:47<16:01, 2.54s/it] {'loss': 1.0956, 'grad_norm': 27.502851486206055, 'learning_rate': 3.426989547989902e-07, 'fcm_dpo/beta': 0.002163384575396776, 'fcm_dpo/q_t': 0.4111108183860779, 'fcm_dpo/delta': 0.013927444815635681, 'fcm_dpo/margin': 178.58871459960938, 'margin_dpo/margin_mean': 178.58871459960938, 'margin_dpo/margin_std': 229.92686462402344, 'logps/chosen': -284.2615966796875, 'logps/rejected': -508.878662109375, 'logps/ref_chosen': -51.197994232177734, 'logps/ref_rejected': -97.22636413574219, 'KL/chosen_KL_mean': -233.06362915039062, 'KL/rejected_KL_mean': -411.6523132324219, 'KL/mean': -322.35797119140625, 'KL/std': 194.712158203125, 'logits/chosen': -0.3880677819252014, 'logits/rejected': -0.3943794369697571, 'epoch': 0.44} + 44%|████▍ | 302/681 [12:47<16:01, 2.54s/it] 44%|████▍ | 303/681 [12:50<16:30, 2.62s/it] {'loss': 1.1326, 'grad_norm': 26.236801147460938, 'learning_rate': 3.4150589130555773e-07, 'fcm_dpo/beta': 0.002184551674872637, 'fcm_dpo/q_t': 0.4194805920124054, 'fcm_dpo/delta': 0.05139891058206558, 'fcm_dpo/margin': 160.39822387695312, 'margin_dpo/margin_mean': 160.39822387695312, 'margin_dpo/margin_std': 240.5888671875, 'logps/chosen': -308.04437255859375, 'logps/rejected': -488.674072265625, 'logps/ref_chosen': -66.71394348144531, 'logps/ref_rejected': -86.94542694091797, 'KL/chosen_KL_mean': -241.3304443359375, 'KL/rejected_KL_mean': -401.7286376953125, 'KL/mean': -321.529541015625, 'KL/std': 198.59133911132812, 'logits/chosen': -0.41135263442993164, 'logits/rejected': -0.3971450924873352, 'epoch': 0.44} + 44%|████▍ | 303/681 [12:50<16:30, 2.62s/it] 45%|████▍ | 304/681 [12:52<16:40, 2.65s/it] {'loss': 1.0614, 'grad_norm': 33.5152473449707, 'learning_rate': 3.403104165467883e-07, 'fcm_dpo/beta': 0.002205474767833948, 'fcm_dpo/q_t': 0.40509599447250366, 'fcm_dpo/delta': 0.0024266578257083893, 'fcm_dpo/margin': 180.16664123535156, 'margin_dpo/margin_mean': 180.16664123535156, 'margin_dpo/margin_std': 164.29513549804688, 'logps/chosen': -289.8739013671875, 'logps/rejected': -488.56182861328125, 'logps/ref_chosen': -71.95069885253906, 'logps/ref_rejected': -90.47203063964844, 'KL/chosen_KL_mean': -217.92318725585938, 'KL/rejected_KL_mean': -398.0898132324219, 'KL/mean': -308.0065002441406, 'KL/std': 184.00167846679688, 'logits/chosen': -0.42003294825553894, 'logits/rejected': -0.4105939567089081, 'epoch': 0.45} + 45%|████▍ | 304/681 [12:52<16:40, 2.65s/it] 45%|████▍ | 305/681 [12:55<16:35, 2.65s/it] {'loss': 1.1316, 'grad_norm': 20.60869026184082, 'learning_rate': 3.391125620245535e-07, 'fcm_dpo/beta': 0.0022017783485352993, 'fcm_dpo/q_t': 0.42033082246780396, 'fcm_dpo/delta': 0.05001223459839821, 'fcm_dpo/margin': 159.6190643310547, 'margin_dpo/margin_mean': 159.6190643310547, 'margin_dpo/margin_std': 237.54547119140625, 'logps/chosen': -308.929443359375, 'logps/rejected': -494.50787353515625, 'logps/ref_chosen': -66.79523468017578, 'logps/ref_rejected': -92.75459289550781, 'KL/chosen_KL_mean': -242.13421630859375, 'KL/rejected_KL_mean': -401.7532653808594, 'KL/mean': -321.9437561035156, 'KL/std': 213.17129516601562, 'logits/chosen': -0.4197441339492798, 'logits/rejected': -0.4035117030143738, 'epoch': 0.45} + 45%|████▍ | 305/681 [12:55<16:35, 2.65s/it] 45%|████▍ | 306/681 [12:58<16:33, 2.65s/it] {'loss': 1.1079, 'grad_norm': 25.355138778686523, 'learning_rate': 3.3791235930343417e-07, 'fcm_dpo/beta': 0.0022371697705239058, 'fcm_dpo/q_t': 0.41680416464805603, 'fcm_dpo/delta': 0.045154958963394165, 'fcm_dpo/margin': 159.28915405273438, 'margin_dpo/margin_mean': 159.2891387939453, 'margin_dpo/margin_std': 199.42770385742188, 'logps/chosen': -308.4158630371094, 'logps/rejected': -483.1802978515625, 'logps/ref_chosen': -69.68389892578125, 'logps/ref_rejected': -85.15919494628906, 'KL/chosen_KL_mean': -238.73196411132812, 'KL/rejected_KL_mean': -398.0210876464844, 'KL/mean': -318.37652587890625, 'KL/std': 185.3416748046875, 'logits/chosen': -0.3622322082519531, 'logits/rejected': -0.33640217781066895, 'epoch': 0.45} + 45%|████▍ | 306/681 [12:58<16:33, 2.65s/it] 45%|████▌ | 307/681 [13:00<16:41, 2.68s/it] {'loss': 1.1025, 'grad_norm': 22.831409454345703, 'learning_rate': 3.367098400098881e-07, 'fcm_dpo/beta': 0.00224516075104475, 'fcm_dpo/q_t': 0.41345101594924927, 'fcm_dpo/delta': 0.029343584552407265, 'fcm_dpo/margin': 165.57266235351562, 'margin_dpo/margin_mean': 165.57266235351562, 'margin_dpo/margin_std': 212.66111755371094, 'logps/chosen': -289.02044677734375, 'logps/rejected': -471.39996337890625, 'logps/ref_chosen': -70.16542053222656, 'logps/ref_rejected': -86.97230529785156, 'KL/chosen_KL_mean': -218.85501098632812, 'KL/rejected_KL_mean': -384.42767333984375, 'KL/mean': -301.641357421875, 'KL/std': 174.05111694335938, 'logits/chosen': -0.3699556589126587, 'logits/rejected': -0.35030895471572876, 'epoch': 0.45} + 45%|████▌ | 307/681 [13:00<16:41, 2.68s/it] 45%|████▌ | 308/681 [13:03<16:27, 2.65s/it] {'loss': 1.0533, 'grad_norm': 28.529888153076172, 'learning_rate': 3.355050358314172e-07, 'fcm_dpo/beta': 0.002246259246021509, 'fcm_dpo/q_t': 0.40048325061798096, 'fcm_dpo/delta': -0.021481268107891083, 'fcm_dpo/margin': 187.23712158203125, 'margin_dpo/margin_mean': 187.2371368408203, 'margin_dpo/margin_std': 190.32083129882812, 'logps/chosen': -283.4674377441406, 'logps/rejected': -494.83184814453125, 'logps/ref_chosen': -55.2449951171875, 'logps/ref_rejected': -79.37226104736328, 'KL/chosen_KL_mean': -228.22244262695312, 'KL/rejected_KL_mean': -415.4595947265625, 'KL/mean': -321.84100341796875, 'KL/std': 197.64083862304688, 'logits/chosen': -0.3800508975982666, 'logits/rejected': -0.36786890029907227, 'epoch': 0.45} + 45%|████▌ | 308/681 [13:03<16:27, 2.65s/it] 45%|████▌ | 309/681 [13:06<16:29, 2.66s/it] {'loss': 1.0741, 'grad_norm': 31.124483108520508, 'learning_rate': 3.3429797851573183e-07, 'fcm_dpo/beta': 0.0022473763674497604, 'fcm_dpo/q_t': 0.40186938643455505, 'fcm_dpo/delta': -0.019291866570711136, 'fcm_dpo/margin': 186.04052734375, 'margin_dpo/margin_mean': 186.04052734375, 'margin_dpo/margin_std': 224.29818725585938, 'logps/chosen': -285.45819091796875, 'logps/rejected': -504.88031005859375, 'logps/ref_chosen': -48.959083557128906, 'logps/ref_rejected': -82.34072875976562, 'KL/chosen_KL_mean': -236.49908447265625, 'KL/rejected_KL_mean': -422.53961181640625, 'KL/mean': -329.51934814453125, 'KL/std': 211.5294189453125, 'logits/chosen': -0.33029186725616455, 'logits/rejected': -0.32252657413482666, 'epoch': 0.45} + 45%|████▌ | 309/681 [13:06<16:29, 2.66s/it] 46%|████▌ | 310/681 [13:08<16:28, 2.66s/it] {'loss': 1.0711, 'grad_norm': 29.155431747436523, 'learning_rate': 3.3308869986991487e-07, 'fcm_dpo/beta': 0.002242402173578739, 'fcm_dpo/q_t': 0.40604251623153687, 'fcm_dpo/delta': 0.001399170607328415, 'fcm_dpo/margin': 177.672607421875, 'margin_dpo/margin_mean': 177.672607421875, 'margin_dpo/margin_std': 188.24441528320312, 'logps/chosen': -344.3968200683594, 'logps/rejected': -539.2579345703125, 'logps/ref_chosen': -62.74177932739258, 'logps/ref_rejected': -79.9302978515625, 'KL/chosen_KL_mean': -281.655029296875, 'KL/rejected_KL_mean': -459.32763671875, 'KL/mean': -370.4913330078125, 'KL/std': 190.68673706054688, 'logits/chosen': -0.3663063645362854, 'logits/rejected': -0.35591787099838257, 'epoch': 0.46} + 46%|████▌ | 310/681 [13:08<16:28, 2.66s/it] 46%|████▌ | 311/681 [13:11<16:10, 2.62s/it] {'loss': 1.0653, 'grad_norm': 23.382389068603516, 'learning_rate': 3.3187723175958346e-07, 'fcm_dpo/beta': 0.0022182685788720846, 'fcm_dpo/q_t': 0.3989013433456421, 'fcm_dpo/delta': -0.04857812821865082, 'fcm_dpo/margin': 201.23855590820312, 'margin_dpo/margin_mean': 201.23855590820312, 'margin_dpo/margin_std': 260.4375915527344, 'logps/chosen': -356.16363525390625, 'logps/rejected': -581.8123779296875, 'logps/ref_chosen': -53.02798080444336, 'logps/ref_rejected': -77.43820190429688, 'KL/chosen_KL_mean': -303.1356201171875, 'KL/rejected_KL_mean': -504.37420654296875, 'KL/mean': -403.75494384765625, 'KL/std': 248.3344268798828, 'logits/chosen': -0.3620428442955017, 'logits/rejected': -0.3392923176288605, 'epoch': 0.46} + 46%|████▌ | 311/681 [13:11<16:10, 2.62s/it] 46%|████▌ | 312/681 [13:13<15:31, 2.53s/it] {'loss': 1.102, 'grad_norm': 34.67451095581055, 'learning_rate': 3.306636061080487e-07, 'fcm_dpo/beta': 0.002222396433353424, 'fcm_dpo/q_t': 0.41095060110092163, 'fcm_dpo/delta': 0.004101406782865524, 'fcm_dpo/margin': 178.0953826904297, 'margin_dpo/margin_mean': 178.09536743164062, 'margin_dpo/margin_std': 254.87359619140625, 'logps/chosen': -341.43353271484375, 'logps/rejected': -545.929443359375, 'logps/ref_chosen': -49.39221954345703, 'logps/ref_rejected': -75.79280853271484, 'KL/chosen_KL_mean': -292.04132080078125, 'KL/rejected_KL_mean': -470.13665771484375, 'KL/mean': -381.0889892578125, 'KL/std': 213.00611877441406, 'logits/chosen': -0.3244793117046356, 'logits/rejected': -0.3159826993942261, 'epoch': 0.46} + 46%|████▌ | 312/681 [13:13<15:31, 2.53s/it] 46%|████▌ | 313/681 [13:16<15:09, 2.47s/it] {'loss': 1.0837, 'grad_norm': 26.70942497253418, 'learning_rate': 3.2944785489547537e-07, 'fcm_dpo/beta': 0.002221351722255349, 'fcm_dpo/q_t': 0.40212100744247437, 'fcm_dpo/delta': -0.02877044305205345, 'fcm_dpo/margin': 192.10980224609375, 'margin_dpo/margin_mean': 192.10980224609375, 'margin_dpo/margin_std': 255.63043212890625, 'logps/chosen': -321.3615417480469, 'logps/rejected': -549.7247924804688, 'logps/ref_chosen': -50.152740478515625, 'logps/ref_rejected': -86.40620422363281, 'KL/chosen_KL_mean': -271.20880126953125, 'KL/rejected_KL_mean': -463.318603515625, 'KL/mean': -367.263671875, 'KL/std': 229.5218048095703, 'logits/chosen': -0.3600277602672577, 'logits/rejected': -0.3582299053668976, 'epoch': 0.46} + 46%|████▌ | 313/681 [13:16<15:09, 2.47s/it] 46%|████▌ | 314/681 [13:18<15:30, 2.53s/it] {'loss': 1.1215, 'grad_norm': 24.576322555541992, 'learning_rate': 3.2823001015803857e-07, 'fcm_dpo/beta': 0.002198255155235529, 'fcm_dpo/q_t': 0.4136376976966858, 'fcm_dpo/delta': 0.014935776591300964, 'fcm_dpo/margin': 175.31007385253906, 'margin_dpo/margin_mean': 175.31007385253906, 'margin_dpo/margin_std': 272.0423889160156, 'logps/chosen': -343.5220031738281, 'logps/rejected': -559.1910400390625, 'logps/ref_chosen': -57.237579345703125, 'logps/ref_rejected': -97.5965347290039, 'KL/chosen_KL_mean': -286.284423828125, 'KL/rejected_KL_mean': -461.594482421875, 'KL/mean': -373.939453125, 'KL/std': 214.23907470703125, 'logits/chosen': -0.4146174192428589, 'logits/rejected': -0.41618144512176514, 'epoch': 0.46} + 46%|████▌ | 314/681 [13:18<15:30, 2.53s/it] 46%|████▋ | 315/681 [13:21<15:35, 2.56s/it] {'loss': 1.1062, 'grad_norm': 20.407081604003906, 'learning_rate': 3.270101039870797e-07, 'fcm_dpo/beta': 0.0022171130403876305, 'fcm_dpo/q_t': 0.415382444858551, 'fcm_dpo/delta': 0.0351128987967968, 'fcm_dpo/margin': 165.16049194335938, 'margin_dpo/margin_mean': 165.16049194335938, 'margin_dpo/margin_std': 214.27072143554688, 'logps/chosen': -307.19732666015625, 'logps/rejected': -508.9691162109375, 'logps/ref_chosen': -49.06958770751953, 'logps/ref_rejected': -85.68087768554688, 'KL/chosen_KL_mean': -258.12774658203125, 'KL/rejected_KL_mean': -423.2882385253906, 'KL/mean': -340.7080078125, 'KL/std': 191.50604248046875, 'logits/chosen': -0.31862980127334595, 'logits/rejected': -0.32194432616233826, 'epoch': 0.46} + 46%|████▋ | 315/681 [13:21<15:35, 2.56s/it] 46%|████▋ | 316/681 [13:23<15:32, 2.55s/it] {'loss': 1.0163, 'grad_norm': 26.12361717224121, 'learning_rate': 3.2578816852826086e-07, 'fcm_dpo/beta': 0.0021798848174512386, 'fcm_dpo/q_t': 0.3863717317581177, 'fcm_dpo/delta': -0.10400072485208511, 'fcm_dpo/margin': 228.41297912597656, 'margin_dpo/margin_mean': 228.41297912597656, 'margin_dpo/margin_std': 247.7483367919922, 'logps/chosen': -307.31549072265625, 'logps/rejected': -582.7492065429688, 'logps/ref_chosen': -54.26074981689453, 'logps/ref_rejected': -101.2814712524414, 'KL/chosen_KL_mean': -253.05474853515625, 'KL/rejected_KL_mean': -481.4677429199219, 'KL/mean': -367.26123046875, 'KL/std': 226.0899658203125, 'logits/chosen': -0.3711768090724945, 'logits/rejected': -0.37630313634872437, 'epoch': 0.46} + 46%|████▋ | 316/681 [13:23<15:32, 2.55s/it] 47%|████▋ | 317/681 [13:26<15:35, 2.57s/it] {'loss': 0.9842, 'grad_norm': 26.812149047851562, 'learning_rate': 3.2456423598071783e-07, 'fcm_dpo/beta': 0.0021434309892356396, 'fcm_dpo/q_t': 0.3767782747745514, 'fcm_dpo/delta': -0.13644810020923615, 'fcm_dpo/margin': 247.001708984375, 'margin_dpo/margin_mean': 247.001708984375, 'margin_dpo/margin_std': 228.92449951171875, 'logps/chosen': -313.3270263671875, 'logps/rejected': -604.93359375, 'logps/ref_chosen': -56.094207763671875, 'logps/ref_rejected': -100.69905090332031, 'KL/chosen_KL_mean': -257.2327880859375, 'KL/rejected_KL_mean': -504.2344970703125, 'KL/mean': -380.733642578125, 'KL/std': 204.96792602539062, 'logits/chosen': -0.40798693895339966, 'logits/rejected': -0.4007112681865692, 'epoch': 0.47} + 47%|████▋ | 317/681 [13:26<15:35, 2.57s/it] 47%|████▋ | 318/681 [13:28<15:17, 2.53s/it] {'loss': 1.0887, 'grad_norm': 27.008316040039062, 'learning_rate': 3.233383385962115e-07, 'fcm_dpo/beta': 0.0021276050247251987, 'fcm_dpo/q_t': 0.40913063287734985, 'fcm_dpo/delta': 0.0070451050996780396, 'fcm_dpo/margin': 184.7557373046875, 'margin_dpo/margin_mean': 184.75575256347656, 'margin_dpo/margin_std': 232.82424926757812, 'logps/chosen': -342.21575927734375, 'logps/rejected': -545.090087890625, 'logps/ref_chosen': -64.64569854736328, 'logps/ref_rejected': -82.76425170898438, 'KL/chosen_KL_mean': -277.570068359375, 'KL/rejected_KL_mean': -462.3258361816406, 'KL/mean': -369.9479675292969, 'KL/std': 211.07015991210938, 'logits/chosen': -0.4423850178718567, 'logits/rejected': -0.414249062538147, 'epoch': 0.47} + 47%|████▋ | 318/681 [13:28<15:17, 2.53s/it] 47%|████▋ | 319/681 [13:31<15:33, 2.58s/it] {'loss': 1.0377, 'grad_norm': 22.64339828491211, 'learning_rate': 3.2211050867827805e-07, 'fcm_dpo/beta': 0.0021020234562456608, 'fcm_dpo/q_t': 0.39178723096847534, 'fcm_dpo/delta': -0.06988762319087982, 'fcm_dpo/margin': 222.0023193359375, 'margin_dpo/margin_mean': 222.00233459472656, 'margin_dpo/margin_std': 251.9779052734375, 'logps/chosen': -297.0338134765625, 'logps/rejected': -583.5589599609375, 'logps/ref_chosen': -49.383758544921875, 'logps/ref_rejected': -113.90650939941406, 'KL/chosen_KL_mean': -247.65008544921875, 'KL/rejected_KL_mean': -469.65240478515625, 'KL/mean': -358.6512451171875, 'KL/std': 232.62017822265625, 'logits/chosen': -0.41219907999038696, 'logits/rejected': -0.4256317913532257, 'epoch': 0.47} + 47%|████▋ | 319/681 [13:31<15:33, 2.58s/it] 47%|████▋ | 320/681 [13:34<15:37, 2.60s/it] {'loss': 1.0129, 'grad_norm': 28.402790069580078, 'learning_rate': 3.208807785813777e-07, 'fcm_dpo/beta': 0.0020547928288578987, 'fcm_dpo/q_t': 0.3849526047706604, 'fcm_dpo/delta': -0.10551808774471283, 'fcm_dpo/margin': 243.2246856689453, 'margin_dpo/margin_mean': 243.22470092773438, 'margin_dpo/margin_std': 259.66607666015625, 'logps/chosen': -321.4161376953125, 'logps/rejected': -602.8031005859375, 'logps/ref_chosen': -59.50489044189453, 'logps/ref_rejected': -97.66717529296875, 'KL/chosen_KL_mean': -261.9112548828125, 'KL/rejected_KL_mean': -505.13592529296875, 'KL/mean': -383.52362060546875, 'KL/std': 241.70034790039062, 'logits/chosen': -0.398881196975708, 'logits/rejected': -0.4026961922645569, 'epoch': 0.47} + 47%|████▋ | 320/681 [13:34<15:37, 2.60s/it] 47%|████▋ | 321/681 [13:36<15:21, 2.56s/it] {'loss': 1.0767, 'grad_norm': 25.453643798828125, 'learning_rate': 3.1964918071004217e-07, 'fcm_dpo/beta': 0.0020298874005675316, 'fcm_dpo/q_t': 0.4030313193798065, 'fcm_dpo/delta': -0.025036636739969254, 'fcm_dpo/margin': 208.4459228515625, 'margin_dpo/margin_mean': 208.4459228515625, 'margin_dpo/margin_std': 262.54095458984375, 'logps/chosen': -381.6781005859375, 'logps/rejected': -620.2163696289062, 'logps/ref_chosen': -61.548683166503906, 'logps/ref_rejected': -91.64103698730469, 'KL/chosen_KL_mean': -320.12939453125, 'KL/rejected_KL_mean': -528.5753173828125, 'KL/mean': -424.35235595703125, 'KL/std': 247.34034729003906, 'logits/chosen': -0.36571186780929565, 'logits/rejected': -0.3529800772666931, 'epoch': 0.47} + 47%|████▋ | 321/681 [13:36<15:21, 2.56s/it] 47%|████▋ | 322/681 [13:39<15:26, 2.58s/it] {'loss': 1.0374, 'grad_norm': 33.35853958129883, 'learning_rate': 3.184157475180207e-07, 'fcm_dpo/beta': 0.0020174758974462748, 'fcm_dpo/q_t': 0.39428332448005676, 'fcm_dpo/delta': -0.05392756685614586, 'fcm_dpo/margin': 223.640869140625, 'margin_dpo/margin_mean': 223.640869140625, 'margin_dpo/margin_std': 232.15565490722656, 'logps/chosen': -335.4730529785156, 'logps/rejected': -597.5737915039062, 'logps/ref_chosen': -57.29003143310547, 'logps/ref_rejected': -95.74992370605469, 'KL/chosen_KL_mean': -278.1830139160156, 'KL/rejected_KL_mean': -501.8238525390625, 'KL/mean': -390.0034484863281, 'KL/std': 217.16629028320312, 'logits/chosen': -0.3929744362831116, 'logits/rejected': -0.39414313435554504, 'epoch': 0.47} + 47%|████▋ | 322/681 [13:39<15:26, 2.58s/it] 47%|████▋ | 323/681 [13:42<15:40, 2.63s/it] {'loss': 1.086, 'grad_norm': 40.872833251953125, 'learning_rate': 3.171805115074251e-07, 'fcm_dpo/beta': 0.0020154546946287155, 'fcm_dpo/q_t': 0.4071164131164551, 'fcm_dpo/delta': 0.0015676822513341904, 'fcm_dpo/margin': 197.70355224609375, 'margin_dpo/margin_mean': 197.70355224609375, 'margin_dpo/margin_std': 243.023681640625, 'logps/chosen': -354.2584228515625, 'logps/rejected': -575.7899169921875, 'logps/ref_chosen': -51.23395919799805, 'logps/ref_rejected': -75.06192016601562, 'KL/chosen_KL_mean': -303.02447509765625, 'KL/rejected_KL_mean': -500.72802734375, 'KL/mean': -401.8762512207031, 'KL/std': 222.78414916992188, 'logits/chosen': -0.43304672837257385, 'logits/rejected': -0.4297791123390198, 'epoch': 0.47} + 47%|████▋ | 323/681 [13:42<15:40, 2.63s/it] 48%|████▊ | 324/681 [13:44<15:18, 2.57s/it] {'loss': 1.1178, 'grad_norm': 41.4670295715332, 'learning_rate': 3.1594350522787295e-07, 'fcm_dpo/beta': 0.0020376183092594147, 'fcm_dpo/q_t': 0.4135010838508606, 'fcm_dpo/delta': 0.016333594918251038, 'fcm_dpo/margin': 188.06114196777344, 'margin_dpo/margin_mean': 188.0611572265625, 'margin_dpo/margin_std': 271.6750183105469, 'logps/chosen': -412.4801940917969, 'logps/rejected': -621.8836669921875, 'logps/ref_chosen': -65.13516998291016, 'logps/ref_rejected': -86.47750854492188, 'KL/chosen_KL_mean': -347.34503173828125, 'KL/rejected_KL_mean': -535.4061889648438, 'KL/mean': -441.3756103515625, 'KL/std': 241.51780700683594, 'logits/chosen': -0.42671218514442444, 'logits/rejected': -0.41737309098243713, 'epoch': 0.48} + 48%|████▊ | 324/681 [13:44<15:18, 2.57s/it] 48%|████▊ | 325/681 [13:47<15:45, 2.66s/it] {'loss': 1.137, 'grad_norm': 27.271472930908203, 'learning_rate': 3.147047612756302e-07, 'fcm_dpo/beta': 0.002051199320703745, 'fcm_dpo/q_t': 0.4243575930595398, 'fcm_dpo/delta': 0.07886850088834763, 'fcm_dpo/margin': 157.7469482421875, 'margin_dpo/margin_mean': 157.7469482421875, 'margin_dpo/margin_std': 213.0748291015625, 'logps/chosen': -327.4698486328125, 'logps/rejected': -499.0871276855469, 'logps/ref_chosen': -56.215599060058594, 'logps/ref_rejected': -70.08592987060547, 'KL/chosen_KL_mean': -271.2542724609375, 'KL/rejected_KL_mean': -429.001220703125, 'KL/mean': -350.12774658203125, 'KL/std': 204.61688232421875, 'logits/chosen': -0.4851588308811188, 'logits/rejected': -0.4723260998725891, 'epoch': 0.48} + 48%|████▊ | 325/681 [13:47<15:45, 2.66s/it] 48%|████▊ | 326/681 [13:50<15:46, 2.67s/it] {'loss': 1.1457, 'grad_norm': 77.27185821533203, 'learning_rate': 3.134643122927519e-07, 'fcm_dpo/beta': 0.0020837995689362288, 'fcm_dpo/q_t': 0.4282350540161133, 'fcm_dpo/delta': 0.10001323372125626, 'fcm_dpo/margin': 145.47195434570312, 'margin_dpo/margin_mean': 145.47195434570312, 'margin_dpo/margin_std': 189.67251586914062, 'logps/chosen': -364.39520263671875, 'logps/rejected': -516.9889526367188, 'logps/ref_chosen': -72.72496032714844, 'logps/ref_rejected': -79.8467788696289, 'KL/chosen_KL_mean': -291.67022705078125, 'KL/rejected_KL_mean': -437.1421813964844, 'KL/mean': -364.40618896484375, 'KL/std': 193.47927856445312, 'logits/chosen': -0.4964238405227661, 'logits/rejected': -0.4864235520362854, 'epoch': 0.48} + 48%|████▊ | 326/681 [13:50<15:46, 2.67s/it] 48%|████▊ | 327/681 [13:52<15:48, 2.68s/it] {'loss': 1.0265, 'grad_norm': 71.85537719726562, 'learning_rate': 3.1222219096622264e-07, 'fcm_dpo/beta': 0.0020731650292873383, 'fcm_dpo/q_t': 0.39099863171577454, 'fcm_dpo/delta': -0.06503181904554367, 'fcm_dpo/margin': 222.725341796875, 'margin_dpo/margin_mean': 222.725341796875, 'margin_dpo/margin_std': 220.58840942382812, 'logps/chosen': -332.0361633300781, 'logps/rejected': -597.5609130859375, 'logps/ref_chosen': -69.13441467285156, 'logps/ref_rejected': -111.93377685546875, 'KL/chosen_KL_mean': -262.9017333984375, 'KL/rejected_KL_mean': -485.62713623046875, 'KL/mean': -374.2644348144531, 'KL/std': 206.111572265625, 'logits/chosen': -0.43772637844085693, 'logits/rejected': -0.4322543740272522, 'epoch': 0.48} + 48%|████▊ | 327/681 [13:52<15:48, 2.68s/it] 48%|████▊ | 328/681 [13:55<15:37, 2.66s/it] {'loss': 1.0657, 'grad_norm': 34.49656295776367, 'learning_rate': 3.1097843002709427e-07, 'fcm_dpo/beta': 0.0020635989494621754, 'fcm_dpo/q_t': 0.40090832114219666, 'fcm_dpo/delta': -0.03295481950044632, 'fcm_dpo/margin': 209.12203979492188, 'margin_dpo/margin_mean': 209.12203979492188, 'margin_dpo/margin_std': 258.40020751953125, 'logps/chosen': -350.34637451171875, 'logps/rejected': -590.6361694335938, 'logps/ref_chosen': -59.68719482421875, 'logps/ref_rejected': -90.85499572753906, 'KL/chosen_KL_mean': -290.6591796875, 'KL/rejected_KL_mean': -499.78118896484375, 'KL/mean': -395.22021484375, 'KL/std': 233.2576904296875, 'logits/chosen': -0.4391549825668335, 'logits/rejected': -0.44703197479248047, 'epoch': 0.48} + 48%|████▊ | 328/681 [13:55<15:37, 2.66s/it] 48%|████▊ | 329/681 [13:57<15:15, 2.60s/it] {'loss': 1.0751, 'grad_norm': 32.74344253540039, 'learning_rate': 3.0973306224962437e-07, 'fcm_dpo/beta': 0.0020334022119641304, 'fcm_dpo/q_t': 0.39808762073516846, 'fcm_dpo/delta': -0.047926321625709534, 'fcm_dpo/margin': 218.89981079101562, 'margin_dpo/margin_mean': 218.89981079101562, 'margin_dpo/margin_std': 289.7802734375, 'logps/chosen': -414.39813232421875, 'logps/rejected': -668.74951171875, 'logps/ref_chosen': -65.2461929321289, 'logps/ref_rejected': -100.69770812988281, 'KL/chosen_KL_mean': -349.1519470214844, 'KL/rejected_KL_mean': -568.0517578125, 'KL/mean': -458.60186767578125, 'KL/std': 267.2418212890625, 'logits/chosen': -0.4355248808860779, 'logits/rejected': -0.4343896210193634, 'epoch': 0.48} + 48%|████▊ | 329/681 [13:57<15:15, 2.60s/it] 48%|████▊ | 330/681 [14:00<15:17, 2.61s/it] {'loss': 1.035, 'grad_norm': 23.221515655517578, 'learning_rate': 3.084861204504122e-07, 'fcm_dpo/beta': 0.002014409750699997, 'fcm_dpo/q_t': 0.39114266633987427, 'fcm_dpo/delta': -0.0776476040482521, 'fcm_dpo/margin': 235.2794189453125, 'margin_dpo/margin_mean': 235.27944946289062, 'margin_dpo/margin_std': 269.7514343261719, 'logps/chosen': -340.2598876953125, 'logps/rejected': -615.4178466796875, 'logps/ref_chosen': -46.998348236083984, 'logps/ref_rejected': -86.87684631347656, 'KL/chosen_KL_mean': -293.26153564453125, 'KL/rejected_KL_mean': -528.5409545898438, 'KL/mean': -410.9012756347656, 'KL/std': 247.63658142089844, 'logits/chosen': -0.36399000883102417, 'logits/rejected': -0.3640822768211365, 'epoch': 0.48} + 48%|████▊ | 330/681 [14:00<15:17, 2.61s/it] 49%|████▊ | 331/681 [14:02<14:59, 2.57s/it] {'loss': 1.0209, 'grad_norm': 25.662797927856445, 'learning_rate': 3.072376374875335e-07, 'fcm_dpo/beta': 0.0019897697493433952, 'fcm_dpo/q_t': 0.39270973205566406, 'fcm_dpo/delta': -0.05556933581829071, 'fcm_dpo/margin': 227.68206787109375, 'margin_dpo/margin_mean': 227.68206787109375, 'margin_dpo/margin_std': 201.46847534179688, 'logps/chosen': -363.1827392578125, 'logps/rejected': -629.3560180664062, 'logps/ref_chosen': -50.52424621582031, 'logps/ref_rejected': -89.01544189453125, 'KL/chosen_KL_mean': -312.65850830078125, 'KL/rejected_KL_mean': -540.340576171875, 'KL/mean': -426.49957275390625, 'KL/std': 204.22564697265625, 'logits/chosen': -0.4090471863746643, 'logits/rejected': -0.40625983476638794, 'epoch': 0.49} + 49%|████▊ | 331/681 [14:02<14:59, 2.57s/it] 49%|████▉ | 332/681 [14:05<14:40, 2.52s/it] {'loss': 1.137, 'grad_norm': 21.15984535217285, 'learning_rate': 3.059876462596758e-07, 'fcm_dpo/beta': 0.002010452328249812, 'fcm_dpo/q_t': 0.42497020959854126, 'fcm_dpo/delta': 0.07582204043865204, 'fcm_dpo/margin': 162.3951873779297, 'margin_dpo/margin_mean': 162.3951873779297, 'margin_dpo/margin_std': 232.2877655029297, 'logps/chosen': -357.419677734375, 'logps/rejected': -547.1197509765625, 'logps/ref_chosen': -49.18028259277344, 'logps/ref_rejected': -76.48515319824219, 'KL/chosen_KL_mean': -308.2393798828125, 'KL/rejected_KL_mean': -470.63458251953125, 'KL/mean': -389.4369812011719, 'KL/std': 208.03164672851562, 'logits/chosen': -0.3901352882385254, 'logits/rejected': -0.3734605014324188, 'epoch': 0.49} + 49%|████▉ | 332/681 [14:05<14:40, 2.52s/it] 49%|████▉ | 333/681 [14:07<14:24, 2.48s/it] {'loss': 1.0854, 'grad_norm': 23.545013427734375, 'learning_rate': 3.0473617970527015e-07, 'fcm_dpo/beta': 0.0019922310020774603, 'fcm_dpo/q_t': 0.4023503363132477, 'fcm_dpo/delta': -0.031133878976106644, 'fcm_dpo/margin': 215.18930053710938, 'margin_dpo/margin_mean': 215.18930053710938, 'margin_dpo/margin_std': 291.11785888671875, 'logps/chosen': -381.8335266113281, 'logps/rejected': -628.3111572265625, 'logps/ref_chosen': -63.75574493408203, 'logps/ref_rejected': -95.04411315917969, 'KL/chosen_KL_mean': -318.0777893066406, 'KL/rejected_KL_mean': -533.26708984375, 'KL/mean': -425.67242431640625, 'KL/std': 241.38174438476562, 'logits/chosen': -0.4057619571685791, 'logits/rejected': -0.3980754017829895, 'epoch': 0.49} + 49%|████▉ | 333/681 [14:07<14:24, 2.48s/it] 49%|████▉ | 334/681 [14:10<14:22, 2.48s/it] {'loss': 1.1256, 'grad_norm': 22.068880081176758, 'learning_rate': 3.034832708016243e-07, 'fcm_dpo/beta': 0.0020080246031284332, 'fcm_dpo/q_t': 0.414248526096344, 'fcm_dpo/delta': 0.02357984334230423, 'fcm_dpo/margin': 187.9039306640625, 'margin_dpo/margin_mean': 187.9039306640625, 'margin_dpo/margin_std': 291.5281982421875, 'logps/chosen': -366.5747375488281, 'logps/rejected': -582.8158569335938, 'logps/ref_chosen': -66.97975158691406, 'logps/ref_rejected': -95.31692504882812, 'KL/chosen_KL_mean': -299.594970703125, 'KL/rejected_KL_mean': -487.4989318847656, 'KL/mean': -393.54693603515625, 'KL/std': 260.1100769042969, 'logits/chosen': -0.41038450598716736, 'logits/rejected': -0.40884631872177124, 'epoch': 0.49} + 49%|████▉ | 334/681 [14:10<14:22, 2.48s/it] 49%|████▉ | 335/681 [14:12<14:28, 2.51s/it] {'loss': 1.1711, 'grad_norm': 24.0537052154541, 'learning_rate': 3.022289525640531e-07, 'fcm_dpo/beta': 0.0020120900589972734, 'fcm_dpo/q_t': 0.4307052493095398, 'fcm_dpo/delta': 0.0005856315256096423, 'fcm_dpo/margin': 149.90032958984375, 'margin_dpo/margin_mean': 149.90032958984375, 'margin_dpo/margin_std': 253.32125854492188, 'logps/chosen': -378.1312561035156, 'logps/rejected': -553.1068115234375, 'logps/ref_chosen': -62.54248046875, 'logps/ref_rejected': -87.61770629882812, 'KL/chosen_KL_mean': -315.5887756347656, 'KL/rejected_KL_mean': -465.4891052246094, 'KL/mean': -390.5389404296875, 'KL/std': 233.7744140625, 'logits/chosen': -0.41110914945602417, 'logits/rejected': -0.38828492164611816, 'epoch': 0.49} + 49%|████▉ | 335/681 [14:12<14:28, 2.51s/it] 49%|████▉ | 336/681 [14:15<14:44, 2.56s/it] {'loss': 1.0597, 'grad_norm': 26.437314987182617, 'learning_rate': 3.009732580450086e-07, 'fcm_dpo/beta': 0.0019951139111071825, 'fcm_dpo/q_t': 0.39314505457878113, 'fcm_dpo/delta': -0.07861563563346863, 'fcm_dpo/margin': 238.04388427734375, 'margin_dpo/margin_mean': 238.04388427734375, 'margin_dpo/margin_std': 317.6496276855469, 'logps/chosen': -358.05596923828125, 'logps/rejected': -645.972900390625, 'logps/ref_chosen': -54.53115463256836, 'logps/ref_rejected': -104.40424346923828, 'KL/chosen_KL_mean': -303.5247802734375, 'KL/rejected_KL_mean': -541.5687255859375, 'KL/mean': -422.5467529296875, 'KL/std': 268.7447509765625, 'logits/chosen': -0.384580135345459, 'logits/rejected': -0.38513875007629395, 'epoch': 0.49} + 49%|████▉ | 336/681 [14:15<14:44, 2.56s/it] 49%|████▉ | 337/681 [14:17<14:35, 2.54s/it] {'loss': 1.0362, 'grad_norm': 28.5858154296875, 'learning_rate': 2.9971622033320914e-07, 'fcm_dpo/beta': 0.001958012580871582, 'fcm_dpo/q_t': 0.39352160692214966, 'fcm_dpo/delta': -0.06217820942401886, 'fcm_dpo/margin': 234.46224975585938, 'margin_dpo/margin_mean': 234.46224975585938, 'margin_dpo/margin_std': 254.32135009765625, 'logps/chosen': -337.088623046875, 'logps/rejected': -608.1492309570312, 'logps/ref_chosen': -65.12869262695312, 'logps/ref_rejected': -101.72701263427734, 'KL/chosen_KL_mean': -271.9599609375, 'KL/rejected_KL_mean': -506.4222106933594, 'KL/mean': -389.19110107421875, 'KL/std': 216.30596923828125, 'logits/chosen': -0.43051382899284363, 'logits/rejected': -0.42107608914375305, 'epoch': 0.49} + 49%|████▉ | 337/681 [14:18<14:35, 2.54s/it] 50%|████▉ | 338/681 [14:20<14:35, 2.55s/it] {'loss': 1.0451, 'grad_norm': 28.797653198242188, 'learning_rate': 2.984578725527675e-07, 'fcm_dpo/beta': 0.0019469063263386488, 'fcm_dpo/q_t': 0.3992460072040558, 'fcm_dpo/delta': -0.02767963334918022, 'fcm_dpo/margin': 219.05032348632812, 'margin_dpo/margin_mean': 219.0503387451172, 'margin_dpo/margin_std': 213.1126251220703, 'logps/chosen': -300.0381164550781, 'logps/rejected': -549.7342529296875, 'logps/ref_chosen': -58.422706604003906, 'logps/ref_rejected': -89.06854248046875, 'KL/chosen_KL_mean': -241.61541748046875, 'KL/rejected_KL_mean': -460.665771484375, 'KL/mean': -351.14056396484375, 'KL/std': 205.6318359375, 'logits/chosen': -0.4226057231426239, 'logits/rejected': -0.413729190826416, 'epoch': 0.5} + 50%|████▉ | 338/681 [14:20<14:35, 2.55s/it] 50%|████▉ | 339/681 [14:22<14:06, 2.48s/it] {'loss': 1.0555, 'grad_norm': 28.772863388061523, 'learning_rate': 2.9719824786231796e-07, 'fcm_dpo/beta': 0.0019529033452272415, 'fcm_dpo/q_t': 0.40049952268600464, 'fcm_dpo/delta': -0.025594212114810944, 'fcm_dpo/margin': 216.89700317382812, 'margin_dpo/margin_mean': 216.89700317382812, 'margin_dpo/margin_std': 217.88153076171875, 'logps/chosen': -323.0326843261719, 'logps/rejected': -583.8452758789062, 'logps/ref_chosen': -59.99531555175781, 'logps/ref_rejected': -103.9109115600586, 'KL/chosen_KL_mean': -263.037353515625, 'KL/rejected_KL_mean': -479.93438720703125, 'KL/mean': -371.4858703613281, 'KL/std': 212.88307189941406, 'logits/chosen': -0.4214293956756592, 'logits/rejected': -0.4066346287727356, 'epoch': 0.5} + 50%|████▉ | 339/681 [14:22<14:06, 2.48s/it] 50%|████▉ | 340/681 [14:25<14:15, 2.51s/it] {'loss': 1.1127, 'grad_norm': 23.684680938720703, 'learning_rate': 2.959373794541426e-07, 'fcm_dpo/beta': 0.0019388075452297926, 'fcm_dpo/q_t': 0.4153197407722473, 'fcm_dpo/delta': 0.0363488644361496, 'fcm_dpo/margin': 188.17184448242188, 'margin_dpo/margin_mean': 188.17184448242188, 'margin_dpo/margin_std': 253.51083374023438, 'logps/chosen': -338.403564453125, 'logps/rejected': -546.8524169921875, 'logps/ref_chosen': -52.83022689819336, 'logps/ref_rejected': -73.10723114013672, 'KL/chosen_KL_mean': -285.5733337402344, 'KL/rejected_KL_mean': -473.7452087402344, 'KL/mean': -379.6592712402344, 'KL/std': 208.79730224609375, 'logits/chosen': -0.3585469722747803, 'logits/rejected': -0.3364448547363281, 'epoch': 0.5} + 50%|████▉ | 340/681 [14:25<14:15, 2.51s/it] 50%|█████ | 341/681 [14:27<13:49, 2.44s/it] {'loss': 1.0474, 'grad_norm': 23.347299575805664, 'learning_rate': 2.946753005532965e-07, 'fcm_dpo/beta': 0.0019295980455353856, 'fcm_dpo/q_t': 0.39680999517440796, 'fcm_dpo/delta': -0.04842275381088257, 'fcm_dpo/margin': 231.09503173828125, 'margin_dpo/margin_mean': 231.0950164794922, 'margin_dpo/margin_std': 254.49545288085938, 'logps/chosen': -334.8046875, 'logps/rejected': -619.809814453125, 'logps/ref_chosen': -47.899803161621094, 'logps/ref_rejected': -101.80987548828125, 'KL/chosen_KL_mean': -286.9049072265625, 'KL/rejected_KL_mean': -517.9999389648438, 'KL/mean': -402.452392578125, 'KL/std': 244.80047607421875, 'logits/chosen': -0.38709864020347595, 'logits/rejected': -0.38579249382019043, 'epoch': 0.5} + 50%|█████ | 341/681 [14:27<13:49, 2.44s/it] 50%|█████ | 342/681 [14:30<14:09, 2.51s/it] {'loss': 1.0955, 'grad_norm': 22.63515853881836, 'learning_rate': 2.934120444167326e-07, 'fcm_dpo/beta': 0.0019408478401601315, 'fcm_dpo/q_t': 0.4091772437095642, 'fcm_dpo/delta': 0.005411949008703232, 'fcm_dpo/margin': 203.21817016601562, 'margin_dpo/margin_mean': 203.21817016601562, 'margin_dpo/margin_std': 270.35748291015625, 'logps/chosen': -374.0428771972656, 'logps/rejected': -597.85400390625, 'logps/ref_chosen': -71.99664306640625, 'logps/ref_rejected': -92.58959197998047, 'KL/chosen_KL_mean': -302.0462341308594, 'KL/rejected_KL_mean': -505.2643737792969, 'KL/mean': -403.65533447265625, 'KL/std': 232.10491943359375, 'logits/chosen': -0.41977885365486145, 'logits/rejected': -0.39589670300483704, 'epoch': 0.5} + 50%|█████ | 342/681 [14:30<14:09, 2.51s/it] 50%|█████ | 343/681 [14:32<13:50, 2.46s/it] {'loss': 1.0164, 'grad_norm': 26.55186653137207, 'learning_rate': 2.9214764433242476e-07, 'fcm_dpo/beta': 0.001911632250994444, 'fcm_dpo/q_t': 0.389914333820343, 'fcm_dpo/delta': -0.06870149821043015, 'fcm_dpo/margin': 243.47164916992188, 'margin_dpo/margin_mean': 243.47164916992188, 'margin_dpo/margin_std': 218.26107788085938, 'logps/chosen': -356.21539306640625, 'logps/rejected': -656.3228759765625, 'logps/ref_chosen': -54.405616760253906, 'logps/ref_rejected': -111.04142761230469, 'KL/chosen_KL_mean': -301.8097839355469, 'KL/rejected_KL_mean': -545.2814331054688, 'KL/mean': -423.54559326171875, 'KL/std': 240.10980224609375, 'logits/chosen': -0.40133050084114075, 'logits/rejected': -0.4047289490699768, 'epoch': 0.5} + 50%|█████ | 343/681 [14:32<13:50, 2.46s/it] 51%|█████ | 344/681 [14:35<13:47, 2.45s/it] {'loss': 1.09, 'grad_norm': 30.049617767333984, 'learning_rate': 2.9088213361849126e-07, 'fcm_dpo/beta': 0.0019231976475566626, 'fcm_dpo/q_t': 0.40852880477905273, 'fcm_dpo/delta': -0.008610613644123077, 'fcm_dpo/margin': 211.33169555664062, 'margin_dpo/margin_mean': 211.33169555664062, 'margin_dpo/margin_std': 268.55743408203125, 'logps/chosen': -360.11138916015625, 'logps/rejected': -608.101806640625, 'logps/ref_chosen': -53.96466827392578, 'logps/ref_rejected': -90.62336730957031, 'KL/chosen_KL_mean': -306.146728515625, 'KL/rejected_KL_mean': -517.4783935546875, 'KL/mean': -411.81256103515625, 'KL/std': 263.5313720703125, 'logits/chosen': -0.436582088470459, 'logits/rejected': -0.4415278732776642, 'epoch': 0.51} + 51%|█████ | 344/681 [14:35<13:47, 2.45s/it] 51%|█████ | 345/681 [14:37<14:09, 2.53s/it] {'loss': 1.0516, 'grad_norm': 21.18876075744629, 'learning_rate': 2.896155456223163e-07, 'fcm_dpo/beta': 0.001892891013994813, 'fcm_dpo/q_t': 0.39596062898635864, 'fcm_dpo/delta': -0.052630215883255005, 'fcm_dpo/margin': 237.8755645751953, 'margin_dpo/margin_mean': 237.8755645751953, 'margin_dpo/margin_std': 280.4576721191406, 'logps/chosen': -426.9808654785156, 'logps/rejected': -702.6611328125, 'logps/ref_chosen': -61.685699462890625, 'logps/ref_rejected': -99.49041748046875, 'KL/chosen_KL_mean': -365.295166015625, 'KL/rejected_KL_mean': -603.1707153320312, 'KL/mean': -484.23297119140625, 'KL/std': 253.80105590820312, 'logits/chosen': -0.46160662174224854, 'logits/rejected': -0.46070683002471924, 'epoch': 0.51} + 51%|█████ | 345/681 [14:37<14:09, 2.53s/it] 51%|█████ | 346/681 [14:40<14:01, 2.51s/it] {'loss': 1.0685, 'grad_norm': 23.943134307861328, 'learning_rate': 2.883479137196714e-07, 'fcm_dpo/beta': 0.0018771484028548002, 'fcm_dpo/q_t': 0.40137046575546265, 'fcm_dpo/delta': -0.02152422070503235, 'fcm_dpo/margin': 224.07159423828125, 'margin_dpo/margin_mean': 224.07159423828125, 'margin_dpo/margin_std': 267.4580993652344, 'logps/chosen': -421.2921142578125, 'logps/rejected': -667.5227661132812, 'logps/ref_chosen': -55.256263732910156, 'logps/ref_rejected': -77.41532135009766, 'KL/chosen_KL_mean': -366.03582763671875, 'KL/rejected_KL_mean': -590.107421875, 'KL/mean': -478.0716552734375, 'KL/std': 247.56460571289062, 'logits/chosen': -0.38838616013526917, 'logits/rejected': -0.3767489790916443, 'epoch': 0.51} + 51%|█████ | 346/681 [14:40<14:01, 2.51s/it] 51%|█████ | 347/681 [14:42<13:26, 2.41s/it] {'loss': 1.0869, 'grad_norm': 25.194826126098633, 'learning_rate': 2.8707927131383614e-07, 'fcm_dpo/beta': 0.001872351742349565, 'fcm_dpo/q_t': 0.40555307269096375, 'fcm_dpo/delta': -0.012826315127313137, 'fcm_dpo/margin': 220.20404052734375, 'margin_dpo/margin_mean': 220.20404052734375, 'margin_dpo/margin_std': 294.784423828125, 'logps/chosen': -421.9984130859375, 'logps/rejected': -676.9913330078125, 'logps/ref_chosen': -57.56623840332031, 'logps/ref_rejected': -92.35509490966797, 'KL/chosen_KL_mean': -364.4321594238281, 'KL/rejected_KL_mean': -584.63623046875, 'KL/mean': -474.5341796875, 'KL/std': 261.7486877441406, 'logits/chosen': -0.4138352870941162, 'logits/rejected': -0.40841221809387207, 'epoch': 0.51} + 51%|█████ | 347/681 [14:42<13:26, 2.41s/it] 51%|█████ | 348/681 [14:44<13:20, 2.40s/it] {'loss': 1.1197, 'grad_norm': 21.97551727294922, 'learning_rate': 2.858096518347179e-07, 'fcm_dpo/beta': 0.0018715888727456331, 'fcm_dpo/q_t': 0.41821640729904175, 'fcm_dpo/delta': 0.04614096134901047, 'fcm_dpo/margin': 189.6708221435547, 'margin_dpo/margin_mean': 189.6708221435547, 'margin_dpo/margin_std': 254.99215698242188, 'logps/chosen': -373.14263916015625, 'logps/rejected': -595.6341552734375, 'logps/ref_chosen': -56.31770324707031, 'logps/ref_rejected': -89.13836669921875, 'KL/chosen_KL_mean': -316.824951171875, 'KL/rejected_KL_mean': -506.4957580566406, 'KL/mean': -411.66033935546875, 'KL/std': 220.3616180419922, 'logits/chosen': -0.44485563039779663, 'logits/rejected': -0.446816623210907, 'epoch': 0.51} + 51%|█████ | 348/681 [14:44<13:20, 2.40s/it] 51%|█████ | 349/681 [14:47<13:49, 2.50s/it] {'loss': 1.0971, 'grad_norm': 20.3768367767334, 'learning_rate': 2.845390887379706e-07, 'fcm_dpo/beta': 0.0018918917048722506, 'fcm_dpo/q_t': 0.40753403306007385, 'fcm_dpo/delta': -0.010010870173573494, 'fcm_dpo/margin': 216.3872528076172, 'margin_dpo/margin_mean': 216.3872528076172, 'margin_dpo/margin_std': 307.3798828125, 'logps/chosen': -359.80670166015625, 'logps/rejected': -615.673583984375, 'logps/ref_chosen': -58.025516510009766, 'logps/ref_rejected': -97.50515747070312, 'KL/chosen_KL_mean': -301.78118896484375, 'KL/rejected_KL_mean': -518.1683959960938, 'KL/mean': -409.97479248046875, 'KL/std': 247.78256225585938, 'logits/chosen': -0.4286951422691345, 'logits/rejected': -0.42969733476638794, 'epoch': 0.51} + 51%|█████ | 349/681 [14:47<13:49, 2.50s/it] 51%|█████▏ | 350/681 [14:50<14:10, 2.57s/it] {'loss': 1.1014, 'grad_norm': 30.007631301879883, 'learning_rate': 2.8326761550411346e-07, 'fcm_dpo/beta': 0.0018784052226692438, 'fcm_dpo/q_t': 0.40847963094711304, 'fcm_dpo/delta': 0.007296178489923477, 'fcm_dpo/margin': 209.05239868164062, 'margin_dpo/margin_mean': 209.0524139404297, 'margin_dpo/margin_std': 286.0767822265625, 'logps/chosen': -383.2506103515625, 'logps/rejected': -617.84423828125, 'logps/ref_chosen': -64.33049011230469, 'logps/ref_rejected': -89.87164306640625, 'KL/chosen_KL_mean': -318.9201354980469, 'KL/rejected_KL_mean': -527.9725341796875, 'KL/mean': -423.44635009765625, 'KL/std': 231.78794860839844, 'logits/chosen': -0.4662485122680664, 'logits/rejected': -0.46992364525794983, 'epoch': 0.51} + 51%|█████▏ | 350/681 [14:50<14:10, 2.57s/it] 52%|█████▏ | 351/681 [14:52<14:08, 2.57s/it] {'loss': 1.049, 'grad_norm': 30.30561637878418, 'learning_rate': 2.819952656376487e-07, 'fcm_dpo/beta': 0.0018615357112139463, 'fcm_dpo/q_t': 0.39380645751953125, 'fcm_dpo/delta': -0.07428093254566193, 'fcm_dpo/margin': 252.7449951171875, 'margin_dpo/margin_mean': 252.7449951171875, 'margin_dpo/margin_std': 314.3124084472656, 'logps/chosen': -351.78643798828125, 'logps/rejected': -645.4246826171875, 'logps/ref_chosen': -60.6721305847168, 'logps/ref_rejected': -101.5654296875, 'KL/chosen_KL_mean': -291.11431884765625, 'KL/rejected_KL_mean': -543.8592529296875, 'KL/mean': -417.4867858886719, 'KL/std': 271.51190185546875, 'logits/chosen': -0.45949965715408325, 'logits/rejected': -0.4594254493713379, 'epoch': 0.52} + 52%|█████▏ | 351/681 [14:52<14:08, 2.57s/it] 52%|█████▏ | 352/681 [14:55<14:11, 2.59s/it] {'loss': 1.1823, 'grad_norm': 31.560047149658203, 'learning_rate': 2.8072207266617854e-07, 'fcm_dpo/beta': 0.0018662881338968873, 'fcm_dpo/q_t': 0.4333202540874481, 'fcm_dpo/delta': 0.01947195641696453, 'fcm_dpo/margin': 154.27169799804688, 'margin_dpo/margin_mean': 154.27169799804688, 'margin_dpo/margin_std': 272.5928039550781, 'logps/chosen': -420.6194152832031, 'logps/rejected': -580.589599609375, 'logps/ref_chosen': -70.9434585571289, 'logps/ref_rejected': -76.6419677734375, 'KL/chosen_KL_mean': -349.67596435546875, 'KL/rejected_KL_mean': -503.9476623535156, 'KL/mean': -426.81182861328125, 'KL/std': 249.33474731445312, 'logits/chosen': -0.427381694316864, 'logits/rejected': -0.39572709798812866, 'epoch': 0.52} + 52%|█████▏ | 352/681 [14:55<14:11, 2.59s/it] 52%|█████▏ | 353/681 [14:58<14:02, 2.57s/it] {'loss': 1.1089, 'grad_norm': 25.82649040222168, 'learning_rate': 2.794480701395219e-07, 'fcm_dpo/beta': 0.001863989164121449, 'fcm_dpo/q_t': 0.4135817885398865, 'fcm_dpo/delta': 0.023936476558446884, 'fcm_dpo/margin': 202.05075073242188, 'margin_dpo/margin_mean': 202.0507354736328, 'margin_dpo/margin_std': 275.25592041015625, 'logps/chosen': -382.982421875, 'logps/rejected': -606.973388671875, 'logps/ref_chosen': -58.39533996582031, 'logps/ref_rejected': -80.33553314208984, 'KL/chosen_KL_mean': -324.58709716796875, 'KL/rejected_KL_mean': -526.6378173828125, 'KL/mean': -425.61248779296875, 'KL/std': 249.03515625, 'logits/chosen': -0.47480159997940063, 'logits/rejected': -0.46382421255111694, 'epoch': 0.52} + 52%|█████▏ | 353/681 [14:58<14:02, 2.57s/it] 52%|█████▏ | 354/681 [15:00<14:11, 2.60s/it] {'loss': 1.0478, 'grad_norm': 27.079309463500977, 'learning_rate': 2.781732916288303e-07, 'fcm_dpo/beta': 0.0018658683402463794, 'fcm_dpo/q_t': 0.399784117937088, 'fcm_dpo/delta': -0.026406319811940193, 'fcm_dpo/margin': 227.90101623535156, 'margin_dpo/margin_mean': 227.90103149414062, 'margin_dpo/margin_std': 226.9383544921875, 'logps/chosen': -330.74676513671875, 'logps/rejected': -587.602294921875, 'logps/ref_chosen': -59.80299377441406, 'logps/ref_rejected': -88.75750732421875, 'KL/chosen_KL_mean': -270.94378662109375, 'KL/rejected_KL_mean': -498.8448181152344, 'KL/mean': -384.894287109375, 'KL/std': 226.17709350585938, 'logits/chosen': -0.43044692277908325, 'logits/rejected': -0.42145881056785583, 'epoch': 0.52} + 52%|█████▏ | 354/681 [15:00<14:11, 2.60s/it] 52%|█████▏ | 355/681 [15:03<13:48, 2.54s/it] {'loss': 1.0533, 'grad_norm': 35.046512603759766, 'learning_rate': 2.7689777072570284e-07, 'fcm_dpo/beta': 0.0018558851443231106, 'fcm_dpo/q_t': 0.40093863010406494, 'fcm_dpo/delta': -0.019903086125850677, 'fcm_dpo/margin': 225.7357177734375, 'margin_dpo/margin_mean': 225.7357177734375, 'margin_dpo/margin_std': 227.40325927734375, 'logps/chosen': -324.4056091308594, 'logps/rejected': -578.4189453125, 'logps/ref_chosen': -54.12849807739258, 'logps/ref_rejected': -82.40606689453125, 'KL/chosen_KL_mean': -270.277099609375, 'KL/rejected_KL_mean': -496.0128479003906, 'KL/mean': -383.14495849609375, 'KL/std': 231.57516479492188, 'logits/chosen': -0.538011908531189, 'logits/rejected': -0.5325556993484497, 'epoch': 0.52} + 52%|█████▏ | 355/681 [15:03<13:48, 2.54s/it] 52%|█████▏ | 356/681 [15:05<14:04, 2.60s/it] {'loss': 1.2411, 'grad_norm': 32.40851974487305, 'learning_rate': 2.7562154104130176e-07, 'fcm_dpo/beta': 0.0018649199046194553, 'fcm_dpo/q_t': 0.44756919145584106, 'fcm_dpo/delta': 0.037342458963394165, 'fcm_dpo/margin': 123.73867797851562, 'margin_dpo/margin_mean': 123.73868560791016, 'margin_dpo/margin_std': 291.8314208984375, 'logps/chosen': -401.5526123046875, 'logps/rejected': -536.5167236328125, 'logps/ref_chosen': -64.6738052368164, 'logps/ref_rejected': -75.89926147460938, 'KL/chosen_KL_mean': -336.8787841796875, 'KL/rejected_KL_mean': -460.6174621582031, 'KL/mean': -398.74810791015625, 'KL/std': 237.61134338378906, 'logits/chosen': -0.500129222869873, 'logits/rejected': -0.48293763399124146, 'epoch': 0.52} + 52%|█████▏ | 356/681 [15:05<14:04, 2.60s/it] 52%|█████▏ | 357/681 [15:08<14:05, 2.61s/it] {'loss': 1.0927, 'grad_norm': 24.0694522857666, 'learning_rate': 2.7434463620546594e-07, 'fcm_dpo/beta': 0.001869656378403306, 'fcm_dpo/q_t': 0.4108501672744751, 'fcm_dpo/delta': 0.021622397005558014, 'fcm_dpo/margin': 202.65980529785156, 'margin_dpo/margin_mean': 202.65982055664062, 'margin_dpo/margin_std': 240.59239196777344, 'logps/chosen': -355.0642395019531, 'logps/rejected': -591.8394165039062, 'logps/ref_chosen': -52.725799560546875, 'logps/ref_rejected': -86.84115600585938, 'KL/chosen_KL_mean': -302.33843994140625, 'KL/rejected_KL_mean': -504.9982604980469, 'KL/mean': -403.6683349609375, 'KL/std': 235.599365234375, 'logits/chosen': -0.47579270601272583, 'logits/rejected': -0.4654581546783447, 'epoch': 0.52} + 52%|█████▏ | 357/681 [15:08<14:05, 2.61s/it] 53%|█████▎ | 358/681 [15:11<14:18, 2.66s/it] {'loss': 1.1069, 'grad_norm': 23.27127456665039, 'learning_rate': 2.730670898658255e-07, 'fcm_dpo/beta': 0.0018902610754594207, 'fcm_dpo/q_t': 0.4157891273498535, 'fcm_dpo/delta': 0.03711070120334625, 'fcm_dpo/margin': 192.69509887695312, 'margin_dpo/margin_mean': 192.69509887695312, 'margin_dpo/margin_std': 249.3919219970703, 'logps/chosen': -335.076904296875, 'logps/rejected': -552.9398803710938, 'logps/ref_chosen': -63.20543670654297, 'logps/ref_rejected': -88.373291015625, 'KL/chosen_KL_mean': -271.8714599609375, 'KL/rejected_KL_mean': -464.56658935546875, 'KL/mean': -368.218994140625, 'KL/std': 229.80101013183594, 'logits/chosen': -0.5084176063537598, 'logits/rejected': -0.4954051971435547, 'epoch': 0.53} + 53%|█████▎ | 358/681 [15:11<14:18, 2.66s/it] 53%|█████▎ | 359/681 [15:13<14:06, 2.63s/it] {'loss': 1.0759, 'grad_norm': 25.26753044128418, 'learning_rate': 2.717889356869146e-07, 'fcm_dpo/beta': 0.0018845023587346077, 'fcm_dpo/q_t': 0.4034174680709839, 'fcm_dpo/delta': -0.023675762116909027, 'fcm_dpo/margin': 224.2485809326172, 'margin_dpo/margin_mean': 224.2485809326172, 'margin_dpo/margin_std': 283.8497009277344, 'logps/chosen': -370.797119140625, 'logps/rejected': -620.8492431640625, 'logps/ref_chosen': -56.370216369628906, 'logps/ref_rejected': -82.17375183105469, 'KL/chosen_KL_mean': -314.4269104003906, 'KL/rejected_KL_mean': -538.675537109375, 'KL/mean': -426.55120849609375, 'KL/std': 234.15951538085938, 'logits/chosen': -0.4363176226615906, 'logits/rejected': -0.4269408583641052, 'epoch': 0.53} + 53%|█████▎ | 359/681 [15:13<14:06, 2.63s/it] 53%|█████▎ | 360/681 [15:16<14:01, 2.62s/it] {'loss': 1.1216, 'grad_norm': 39.744529724121094, 'learning_rate': 2.7051020734928443e-07, 'fcm_dpo/beta': 0.0019088031258434057, 'fcm_dpo/q_t': 0.42239513993263245, 'fcm_dpo/delta': 0.07367773354053497, 'fcm_dpo/margin': 172.14376831054688, 'margin_dpo/margin_mean': 172.1437530517578, 'margin_dpo/margin_std': 204.19671630859375, 'logps/chosen': -364.80419921875, 'logps/rejected': -555.3265380859375, 'logps/ref_chosen': -51.460384368896484, 'logps/ref_rejected': -69.83892059326172, 'KL/chosen_KL_mean': -313.34381103515625, 'KL/rejected_KL_mean': -485.4875793457031, 'KL/mean': -399.41571044921875, 'KL/std': 200.4339599609375, 'logits/chosen': -0.44749805331230164, 'logits/rejected': -0.4344269633293152, 'epoch': 0.53} + 53%|█████▎ | 360/681 [15:16<14:01, 2.62s/it] 53%|█████▎ | 361/681 [15:19<14:05, 2.64s/it] {'loss': 1.1322, 'grad_norm': 28.265932083129883, 'learning_rate': 2.6923093854861593e-07, 'fcm_dpo/beta': 0.0019349538488313556, 'fcm_dpo/q_t': 0.4206019639968872, 'fcm_dpo/delta': 0.05939781665802002, 'fcm_dpo/margin': 176.93667602539062, 'margin_dpo/margin_mean': 176.93667602539062, 'margin_dpo/margin_std': 257.4376220703125, 'logps/chosen': -383.2627258300781, 'logps/rejected': -597.09912109375, 'logps/ref_chosen': -53.86951446533203, 'logps/ref_rejected': -90.7692642211914, 'KL/chosen_KL_mean': -329.3931884765625, 'KL/rejected_KL_mean': -506.329833984375, 'KL/mean': -417.86151123046875, 'KL/std': 236.83558654785156, 'logits/chosen': -0.42435145378112793, 'logits/rejected': -0.4203334152698517, 'epoch': 0.53} + 53%|█████▎ | 361/681 [15:19<14:05, 2.64s/it] 53%|█████▎ | 362/681 [15:21<13:53, 2.61s/it] {'loss': 0.9867, 'grad_norm': 24.199586868286133, 'learning_rate': 2.679511629948319e-07, 'fcm_dpo/beta': 0.0018921452574431896, 'fcm_dpo/q_t': 0.3790006637573242, 'fcm_dpo/delta': -0.13069821894168854, 'fcm_dpo/margin': 276.513916015625, 'margin_dpo/margin_mean': 276.5138854980469, 'margin_dpo/margin_std': 264.4261474609375, 'logps/chosen': -348.55487060546875, 'logps/rejected': -672.01171875, 'logps/ref_chosen': -58.639060974121094, 'logps/ref_rejected': -105.58195495605469, 'KL/chosen_KL_mean': -289.91583251953125, 'KL/rejected_KL_mean': -566.4297485351562, 'KL/mean': -428.1727600097656, 'KL/std': 248.63421630859375, 'logits/chosen': -0.45070475339889526, 'logits/rejected': -0.46132344007492065, 'epoch': 0.53} + 53%|█████▎ | 362/681 [15:21<13:53, 2.61s/it] 53%|█████▎ | 363/681 [15:24<13:21, 2.52s/it] {'loss': 0.9918, 'grad_norm': 23.536636352539062, 'learning_rate': 2.6667091441120816e-07, 'fcm_dpo/beta': 0.001857282593846321, 'fcm_dpo/q_t': 0.38030678033828735, 'fcm_dpo/delta': -0.12296949326992035, 'fcm_dpo/margin': 278.2501220703125, 'margin_dpo/margin_mean': 278.2501220703125, 'margin_dpo/margin_std': 267.5613098144531, 'logps/chosen': -309.455078125, 'logps/rejected': -617.841796875, 'logps/ref_chosen': -44.558380126953125, 'logps/ref_rejected': -74.69496154785156, 'KL/chosen_KL_mean': -264.89666748046875, 'KL/rejected_KL_mean': -543.1467895507812, 'KL/mean': -404.021728515625, 'KL/std': 245.37258911132812, 'logits/chosen': -0.4141322076320648, 'logits/rejected': -0.4084208607673645, 'epoch': 0.53} + 53%|█████▎ | 363/681 [15:24<13:21, 2.52s/it] 53%|█████▎ | 364/681 [15:26<13:24, 2.54s/it] {'loss': 1.1034, 'grad_norm': 24.284122467041016, 'learning_rate': 2.6539022653348575e-07, 'fcm_dpo/beta': 0.001852140761911869, 'fcm_dpo/q_t': 0.41120392084121704, 'fcm_dpo/delta': 0.017322657629847527, 'fcm_dpo/margin': 206.77392578125, 'margin_dpo/margin_mean': 206.77394104003906, 'margin_dpo/margin_std': 278.9082946777344, 'logps/chosen': -343.513671875, 'logps/rejected': -592.7887573242188, 'logps/ref_chosen': -48.894622802734375, 'logps/ref_rejected': -91.395751953125, 'KL/chosen_KL_mean': -294.61907958984375, 'KL/rejected_KL_mean': -501.39300537109375, 'KL/mean': -398.00604248046875, 'KL/std': 241.02774047851562, 'logits/chosen': -0.46633392572402954, 'logits/rejected': -0.4776257276535034, 'epoch': 0.53} + 53%|█████▎ | 364/681 [15:26<13:24, 2.54s/it] 54%|█████▎ | 365/681 [15:29<13:30, 2.57s/it] {'loss': 1.0708, 'grad_norm': 23.80178451538086, 'learning_rate': 2.641091331089811e-07, 'fcm_dpo/beta': 0.0018444794695824385, 'fcm_dpo/q_t': 0.40571504831314087, 'fcm_dpo/delta': -0.009514345787465572, 'fcm_dpo/margin': 221.81109619140625, 'margin_dpo/margin_mean': 221.81109619140625, 'margin_dpo/margin_std': 257.0599060058594, 'logps/chosen': -334.2415771484375, 'logps/rejected': -597.2615966796875, 'logps/ref_chosen': -51.49274444580078, 'logps/ref_rejected': -92.70166778564453, 'KL/chosen_KL_mean': -282.74884033203125, 'KL/rejected_KL_mean': -504.5599060058594, 'KL/mean': -393.6543884277344, 'KL/std': 251.020751953125, 'logits/chosen': -0.42873144149780273, 'logits/rejected': -0.440301775932312, 'epoch': 0.54} + 54%|█████▎ | 365/681 [15:29<13:30, 2.57s/it] 54%|█████▎ | 366/681 [15:31<13:29, 2.57s/it] {'loss': 1.0851, 'grad_norm': 21.787899017333984, 'learning_rate': 2.6282766789569736e-07, 'fcm_dpo/beta': 0.001833090209402144, 'fcm_dpo/q_t': 0.40598738193511963, 'fcm_dpo/delta': -0.009195588529109955, 'fcm_dpo/margin': 222.82901000976562, 'margin_dpo/margin_mean': 222.82901000976562, 'margin_dpo/margin_std': 287.36468505859375, 'logps/chosen': -307.76556396484375, 'logps/rejected': -569.1844482421875, 'logps/ref_chosen': -44.7205696105957, 'logps/ref_rejected': -83.31040954589844, 'KL/chosen_KL_mean': -263.0450134277344, 'KL/rejected_KL_mean': -485.8740234375, 'KL/mean': -374.4595031738281, 'KL/std': 239.58175659179688, 'logits/chosen': -0.41914117336273193, 'logits/rejected': -0.43506374955177307, 'epoch': 0.54} + 54%|█████▎ | 366/681 [15:31<13:29, 2.57s/it] 54%|█████▍ | 367/681 [15:34<13:45, 2.63s/it] {'loss': 1.1158, 'grad_norm': 20.662567138671875, 'learning_rate': 2.615458646614349e-07, 'fcm_dpo/beta': 0.0018576278816908598, 'fcm_dpo/q_t': 0.41822659969329834, 'fcm_dpo/delta': 0.05456267669796944, 'fcm_dpo/margin': 186.90333557128906, 'margin_dpo/margin_mean': 186.90333557128906, 'margin_dpo/margin_std': 237.801025390625, 'logps/chosen': -329.8546142578125, 'logps/rejected': -535.1038208007812, 'logps/ref_chosen': -58.405418395996094, 'logps/ref_rejected': -76.75132751464844, 'KL/chosen_KL_mean': -271.44921875, 'KL/rejected_KL_mean': -458.3525085449219, 'KL/mean': -364.90087890625, 'KL/std': 220.12733459472656, 'logits/chosen': -0.44316402077674866, 'logits/rejected': -0.4273492395877838, 'epoch': 0.54} + 54%|█████▍ | 367/681 [15:34<13:45, 2.63s/it] 54%|█████▍ | 368/681 [15:37<13:39, 2.62s/it] {'loss': 0.9699, 'grad_norm': 35.373626708984375, 'learning_rate': 2.6026375718290083e-07, 'fcm_dpo/beta': 0.0018250863067805767, 'fcm_dpo/q_t': 0.37577325105667114, 'fcm_dpo/delta': -0.12947417795658112, 'fcm_dpo/margin': 286.42449951171875, 'margin_dpo/margin_mean': 286.42449951171875, 'margin_dpo/margin_std': 225.23828125, 'logps/chosen': -301.3831787109375, 'logps/rejected': -641.910400390625, 'logps/ref_chosen': -44.452518463134766, 'logps/ref_rejected': -98.55526733398438, 'KL/chosen_KL_mean': -256.9306640625, 'KL/rejected_KL_mean': -543.3551025390625, 'KL/mean': -400.14288330078125, 'KL/std': 246.394287109375, 'logits/chosen': -0.46660637855529785, 'logits/rejected': -0.4749259352684021, 'epoch': 0.54} + 54%|█████▍ | 368/681 [15:37<13:39, 2.62s/it] 54%|█████▍ | 369/681 [15:39<13:41, 2.63s/it] {'loss': 1.205, 'grad_norm': 29.09714126586914, 'learning_rate': 2.589813792448196e-07, 'fcm_dpo/beta': 0.001818750286474824, 'fcm_dpo/q_t': 0.43869489431381226, 'fcm_dpo/delta': 0.0356462262570858, 'fcm_dpo/margin': 144.11106872558594, 'margin_dpo/margin_mean': 144.11106872558594, 'margin_dpo/margin_std': 282.6583251953125, 'logps/chosen': -414.572509765625, 'logps/rejected': -578.597900390625, 'logps/ref_chosen': -71.38150024414062, 'logps/ref_rejected': -91.29582214355469, 'KL/chosen_KL_mean': -343.1910095214844, 'KL/rejected_KL_mean': -487.3021240234375, 'KL/mean': -415.24658203125, 'KL/std': 250.22268676757812, 'logits/chosen': -0.47453856468200684, 'logits/rejected': -0.4585055708885193, 'epoch': 0.54} + 54%|█████▍ | 369/681 [15:39<13:41, 2.63s/it] 54%|█████▍ | 370/681 [15:42<13:55, 2.69s/it] {'loss': 1.1911, 'grad_norm': 29.563688278198242, 'learning_rate': 2.5769876463904263e-07, 'fcm_dpo/beta': 0.0018614260479807854, 'fcm_dpo/q_t': 0.4348163604736328, 'fcm_dpo/delta': 0.11835242807865143, 'fcm_dpo/margin': 152.9559783935547, 'margin_dpo/margin_mean': 152.95596313476562, 'margin_dpo/margin_std': 288.8004150390625, 'logps/chosen': -416.52154541015625, 'logps/rejected': -595.1297607421875, 'logps/ref_chosen': -71.60749816894531, 'logps/ref_rejected': -97.25978088378906, 'KL/chosen_KL_mean': -344.9140625, 'KL/rejected_KL_mean': -497.8699951171875, 'KL/mean': -421.39202880859375, 'KL/std': 256.6206359863281, 'logits/chosen': -0.46674686670303345, 'logits/rejected': -0.45883116126060486, 'epoch': 0.54} + 54%|█████▍ | 370/681 [15:42<13:55, 2.69s/it] 54%|█████▍ | 371/681 [15:45<13:52, 2.69s/it] {'loss': 1.1043, 'grad_norm': 27.87566566467285, 'learning_rate': 2.5641594716365744e-07, 'fcm_dpo/beta': 0.0018742081010714173, 'fcm_dpo/q_t': 0.40980789065361023, 'fcm_dpo/delta': 0.003576137125492096, 'fcm_dpo/margin': 211.5328369140625, 'margin_dpo/margin_mean': 211.5328369140625, 'margin_dpo/margin_std': 302.67376708984375, 'logps/chosen': -405.1045837402344, 'logps/rejected': -646.3951416015625, 'logps/ref_chosen': -69.41448974609375, 'logps/ref_rejected': -99.17217254638672, 'KL/chosen_KL_mean': -335.6900939941406, 'KL/rejected_KL_mean': -547.222900390625, 'KL/mean': -441.45654296875, 'KL/std': 263.390625, 'logits/chosen': -0.5134047269821167, 'logits/rejected': -0.49832814931869507, 'epoch': 0.54} + 54%|█████▍ | 371/681 [15:45<13:52, 2.69s/it] 55%|█████▍ | 372/681 [15:47<13:11, 2.56s/it] {'loss': 1.0673, 'grad_norm': 25.24636459350586, 'learning_rate': 2.551329606220976e-07, 'fcm_dpo/beta': 0.0018502443563193083, 'fcm_dpo/q_t': 0.398201584815979, 'fcm_dpo/delta': -0.05988244712352753, 'fcm_dpo/margin': 246.97903442382812, 'margin_dpo/margin_mean': 246.97903442382812, 'margin_dpo/margin_std': 337.23712158203125, 'logps/chosen': -382.14849853515625, 'logps/rejected': -645.8489990234375, 'logps/ref_chosen': -61.8179931640625, 'logps/ref_rejected': -78.53948974609375, 'KL/chosen_KL_mean': -320.33050537109375, 'KL/rejected_KL_mean': -567.3095703125, 'KL/mean': -443.8200378417969, 'KL/std': 299.37408447265625, 'logits/chosen': -0.5078925490379333, 'logits/rejected': -0.49255937337875366, 'epoch': 0.55} + 55%|█████▍ | 372/681 [15:47<13:11, 2.56s/it] 55%|█████▍ | 373/681 [15:50<13:03, 2.54s/it] {'loss': 1.0556, 'grad_norm': 32.10969161987305, 'learning_rate': 2.538498388222517e-07, 'fcm_dpo/beta': 0.0018451586365699768, 'fcm_dpo/q_t': 0.3970082998275757, 'fcm_dpo/delta': -0.044221822172403336, 'fcm_dpo/margin': 239.539794921875, 'margin_dpo/margin_mean': 239.539794921875, 'margin_dpo/margin_std': 271.01666259765625, 'logps/chosen': -410.78961181640625, 'logps/rejected': -672.0718994140625, 'logps/ref_chosen': -64.21713256835938, 'logps/ref_rejected': -85.95960998535156, 'KL/chosen_KL_mean': -346.5724792480469, 'KL/rejected_KL_mean': -586.1122436523438, 'KL/mean': -466.34234619140625, 'KL/std': 286.9017639160156, 'logits/chosen': -0.4709147810935974, 'logits/rejected': -0.4543595016002655, 'epoch': 0.55} + 55%|█████▍ | 373/681 [15:50<13:03, 2.54s/it] 55%|█████▍ | 374/681 [15:52<13:21, 2.61s/it] {'loss': 1.1254, 'grad_norm': 24.107498168945312, 'learning_rate': 2.525666155755725e-07, 'fcm_dpo/beta': 0.0018260091310366988, 'fcm_dpo/q_t': 0.413374662399292, 'fcm_dpo/delta': 0.007001262158155441, 'fcm_dpo/margin': 215.20005798339844, 'margin_dpo/margin_mean': 215.2000732421875, 'margin_dpo/margin_std': 354.08026123046875, 'logps/chosen': -376.5108947753906, 'logps/rejected': -614.700927734375, 'logps/ref_chosen': -70.65018463134766, 'logps/ref_rejected': -93.64016723632812, 'KL/chosen_KL_mean': -305.8607177734375, 'KL/rejected_KL_mean': -521.060791015625, 'KL/mean': -413.46075439453125, 'KL/std': 309.17022705078125, 'logits/chosen': -0.5528968572616577, 'logits/rejected': -0.5381832122802734, 'epoch': 0.55} + 55%|█████▍ | 374/681 [15:52<13:21, 2.61s/it] 55%|█████▌ | 375/681 [15:55<13:27, 2.64s/it] {'loss': 1.1373, 'grad_norm': 28.248476028442383, 'learning_rate': 2.512833246961859e-07, 'fcm_dpo/beta': 0.0018346281722187996, 'fcm_dpo/q_t': 0.416460245847702, 'fcm_dpo/delta': 0.03891323506832123, 'fcm_dpo/margin': 197.35801696777344, 'margin_dpo/margin_mean': 197.35800170898438, 'margin_dpo/margin_std': 312.4132995605469, 'logps/chosen': -377.0509948730469, 'logps/rejected': -603.26708984375, 'logps/ref_chosen': -60.080223083496094, 'logps/ref_rejected': -88.93830871582031, 'KL/chosen_KL_mean': -316.97076416015625, 'KL/rejected_KL_mean': -514.3287353515625, 'KL/mean': -415.6497802734375, 'KL/std': 248.89346313476562, 'logits/chosen': -0.48038458824157715, 'logits/rejected': -0.4829840064048767, 'epoch': 0.55} + 55%|█████▌ | 375/681 [15:55<13:27, 2.64s/it] 55%|█████▌ | 376/681 [15:58<13:28, 2.65s/it] {'loss': 1.0513, 'grad_norm': 25.545181274414062, 'learning_rate': 2.5e-07, 'fcm_dpo/beta': 0.0018291289452463388, 'fcm_dpo/q_t': 0.39261579513549805, 'fcm_dpo/delta': -0.07235552370548248, 'fcm_dpo/margin': 256.3907470703125, 'margin_dpo/margin_mean': 256.3907470703125, 'margin_dpo/margin_std': 320.7281188964844, 'logps/chosen': -362.66876220703125, 'logps/rejected': -661.9258422851562, 'logps/ref_chosen': -62.660308837890625, 'logps/ref_rejected': -105.52660369873047, 'KL/chosen_KL_mean': -300.00848388671875, 'KL/rejected_KL_mean': -556.3992919921875, 'KL/mean': -428.203857421875, 'KL/std': 273.4169616699219, 'logits/chosen': -0.44510942697525024, 'logits/rejected': -0.43642458319664, 'epoch': 0.55} + 55%|█████▌ | 376/681 [15:58<13:28, 2.65s/it] 55%|█████▌ | 377/681 [16:00<13:11, 2.61s/it] {'loss': 1.0517, 'grad_norm': 25.23697853088379, 'learning_rate': 2.487166753038141e-07, 'fcm_dpo/beta': 0.0018094563856720924, 'fcm_dpo/q_t': 0.39543959498405457, 'fcm_dpo/delta': -0.05844918638467789, 'fcm_dpo/margin': 251.91156005859375, 'margin_dpo/margin_mean': 251.91156005859375, 'margin_dpo/margin_std': 303.8875732421875, 'logps/chosen': -355.6353759765625, 'logps/rejected': -651.7716064453125, 'logps/ref_chosen': -54.478736877441406, 'logps/ref_rejected': -98.70335388183594, 'KL/chosen_KL_mean': -301.1566467285156, 'KL/rejected_KL_mean': -553.0682373046875, 'KL/mean': -427.1124267578125, 'KL/std': 280.93780517578125, 'logits/chosen': -0.397521436214447, 'logits/rejected': -0.39853352308273315, 'epoch': 0.55} + 55%|█████▌ | 377/681 [16:00<13:11, 2.61s/it] 56%|█████▌ | 378/681 [16:02<12:28, 2.47s/it] {'loss': 1.0286, 'grad_norm': 30.339950561523438, 'learning_rate': 2.4743338442442754e-07, 'fcm_dpo/beta': 0.001778826816007495, 'fcm_dpo/q_t': 0.3893394470214844, 'fcm_dpo/delta': -0.07667370140552521, 'fcm_dpo/margin': 265.88409423828125, 'margin_dpo/margin_mean': 265.88409423828125, 'margin_dpo/margin_std': 285.3680419921875, 'logps/chosen': -321.08538818359375, 'logps/rejected': -629.9959716796875, 'logps/ref_chosen': -45.02053451538086, 'logps/ref_rejected': -88.0469741821289, 'KL/chosen_KL_mean': -276.0648498535156, 'KL/rejected_KL_mean': -541.948974609375, 'KL/mean': -409.00689697265625, 'KL/std': 263.941162109375, 'logits/chosen': -0.4182929992675781, 'logits/rejected': -0.4355580806732178, 'epoch': 0.56} + 56%|█████▌ | 378/681 [16:02<12:28, 2.47s/it] 56%|█████▌ | 379/681 [16:05<12:23, 2.46s/it] {'loss': 1.0496, 'grad_norm': 23.810928344726562, 'learning_rate': 2.461501611777483e-07, 'fcm_dpo/beta': 0.0017488367157056928, 'fcm_dpo/q_t': 0.39461731910705566, 'fcm_dpo/delta': -0.06558392941951752, 'fcm_dpo/margin': 264.138916015625, 'margin_dpo/margin_mean': 264.1388854980469, 'margin_dpo/margin_std': 319.9960021972656, 'logps/chosen': -377.43035888671875, 'logps/rejected': -702.687255859375, 'logps/ref_chosen': -53.182098388671875, 'logps/ref_rejected': -114.3001708984375, 'KL/chosen_KL_mean': -324.24822998046875, 'KL/rejected_KL_mean': -588.3870849609375, 'KL/mean': -456.31768798828125, 'KL/std': 270.865478515625, 'logits/chosen': -0.3999977111816406, 'logits/rejected': -0.4254748225212097, 'epoch': 0.56} + 56%|█████▌ | 379/681 [16:05<12:23, 2.46s/it] 56%|█████▌ | 380/681 [16:07<12:10, 2.43s/it] {'loss': 1.0222, 'grad_norm': 27.789323806762695, 'learning_rate': 2.4486703937790243e-07, 'fcm_dpo/beta': 0.0017266274662688375, 'fcm_dpo/q_t': 0.3840462565422058, 'fcm_dpo/delta': -0.10273480415344238, 'fcm_dpo/margin': 288.2655029296875, 'margin_dpo/margin_mean': 288.2655029296875, 'margin_dpo/margin_std': 325.3714599609375, 'logps/chosen': -378.78692626953125, 'logps/rejected': -719.8910522460938, 'logps/ref_chosen': -51.3530387878418, 'logps/ref_rejected': -104.19169616699219, 'KL/chosen_KL_mean': -327.43389892578125, 'KL/rejected_KL_mean': -615.6993408203125, 'KL/mean': -471.5666198730469, 'KL/std': 298.45489501953125, 'logits/chosen': -0.4401572346687317, 'logits/rejected': -0.46762269735336304, 'epoch': 0.56} + 56%|█████▌ | 380/681 [16:07<12:10, 2.43s/it] 56%|█████▌ | 381/681 [16:10<12:12, 2.44s/it] {'loss': 1.1515, 'grad_norm': 25.380268096923828, 'learning_rate': 2.435840528363426e-07, 'fcm_dpo/beta': 0.0017242280300706625, 'fcm_dpo/q_t': 0.42024338245391846, 'fcm_dpo/delta': 0.04968461021780968, 'fcm_dpo/margin': 204.19798278808594, 'margin_dpo/margin_mean': 204.197998046875, 'margin_dpo/margin_std': 352.91259765625, 'logps/chosen': -406.5311279296875, 'logps/rejected': -632.1454467773438, 'logps/ref_chosen': -57.80306625366211, 'logps/ref_rejected': -79.21940612792969, 'KL/chosen_KL_mean': -348.72802734375, 'KL/rejected_KL_mean': -552.926025390625, 'KL/mean': -450.8270263671875, 'KL/std': 260.6754455566406, 'logits/chosen': -0.43789827823638916, 'logits/rejected': -0.4210563898086548, 'epoch': 0.56} + 56%|█████▌ | 381/681 [16:10<12:12, 2.44s/it] 56%|█████▌ | 382/681 [16:12<12:28, 2.50s/it] {'loss': 1.0578, 'grad_norm': 30.70073890686035, 'learning_rate': 2.4230123536095745e-07, 'fcm_dpo/beta': 0.0017251023091375828, 'fcm_dpo/q_t': 0.4009873569011688, 'fcm_dpo/delta': -0.02524741366505623, 'fcm_dpo/margin': 245.88180541992188, 'margin_dpo/margin_mean': 245.88180541992188, 'margin_dpo/margin_std': 270.6402587890625, 'logps/chosen': -418.14453125, 'logps/rejected': -708.7161865234375, 'logps/ref_chosen': -66.02030181884766, 'logps/ref_rejected': -110.71016693115234, 'KL/chosen_KL_mean': -352.124267578125, 'KL/rejected_KL_mean': -598.0060424804688, 'KL/mean': -475.0651550292969, 'KL/std': 246.15951538085938, 'logits/chosen': -0.46005573868751526, 'logits/rejected': -0.4692569375038147, 'epoch': 0.56} + 56%|█████▌ | 382/681 [16:12<12:28, 2.50s/it] 56%|█████▌ | 383/681 [16:15<12:31, 2.52s/it] {'loss': 1.1034, 'grad_norm': 29.060136795043945, 'learning_rate': 2.4101862075518037e-07, 'fcm_dpo/beta': 0.0017189650097861886, 'fcm_dpo/q_t': 0.40572264790534973, 'fcm_dpo/delta': -0.011835414916276932, 'fcm_dpo/margin': 239.28997802734375, 'margin_dpo/margin_mean': 239.28997802734375, 'margin_dpo/margin_std': 355.65509033203125, 'logps/chosen': -408.6180114746094, 'logps/rejected': -691.232421875, 'logps/ref_chosen': -50.39148712158203, 'logps/ref_rejected': -93.71589660644531, 'KL/chosen_KL_mean': -358.22650146484375, 'KL/rejected_KL_mean': -597.5165405273438, 'KL/mean': -477.87152099609375, 'KL/std': 270.5577392578125, 'logits/chosen': -0.4785361886024475, 'logits/rejected': -0.4892638325691223, 'epoch': 0.56} + 56%|█████▌ | 383/681 [16:15<12:31, 2.52s/it] 56%|█████▋ | 384/681 [16:17<12:31, 2.53s/it] {'loss': 1.1205, 'grad_norm': 28.971044540405273, 'learning_rate': 2.397362428170992e-07, 'fcm_dpo/beta': 0.0017420074436813593, 'fcm_dpo/q_t': 0.4199580252170563, 'fcm_dpo/delta': 0.062150660902261734, 'fcm_dpo/margin': 194.84095764160156, 'margin_dpo/margin_mean': 194.84097290039062, 'margin_dpo/margin_std': 245.9820556640625, 'logps/chosen': -428.5443420410156, 'logps/rejected': -657.10009765625, 'logps/ref_chosen': -52.046104431152344, 'logps/ref_rejected': -85.76089477539062, 'KL/chosen_KL_mean': -376.49822998046875, 'KL/rejected_KL_mean': -571.3392333984375, 'KL/mean': -473.9187316894531, 'KL/std': 258.77777099609375, 'logits/chosen': -0.49737972021102905, 'logits/rejected': -0.4919343590736389, 'epoch': 0.56} + 56%|█████▋ | 384/681 [16:17<12:31, 2.53s/it] 57%|█████▋ | 385/681 [16:20<12:24, 2.52s/it] {'loss': 1.0656, 'grad_norm': 34.824005126953125, 'learning_rate': 2.3845413533856514e-07, 'fcm_dpo/beta': 0.0017391443252563477, 'fcm_dpo/q_t': 0.4041079580783844, 'fcm_dpo/delta': -0.006142602767795324, 'fcm_dpo/margin': 233.38693237304688, 'margin_dpo/margin_mean': 233.38693237304688, 'margin_dpo/margin_std': 248.42532348632812, 'logps/chosen': -427.3863220214844, 'logps/rejected': -673.049072265625, 'logps/ref_chosen': -65.55215454101562, 'logps/ref_rejected': -77.82792663574219, 'KL/chosen_KL_mean': -361.83416748046875, 'KL/rejected_KL_mean': -595.2210693359375, 'KL/mean': -478.52764892578125, 'KL/std': 224.6497802734375, 'logits/chosen': -0.5053662061691284, 'logits/rejected': -0.48552972078323364, 'epoch': 0.57} + 57%|█████▋ | 385/681 [16:20<12:24, 2.52s/it] 57%|█████▋ | 386/681 [16:23<12:31, 2.55s/it] {'loss': 1.066, 'grad_norm': 34.94272994995117, 'learning_rate': 2.3717233210430254e-07, 'fcm_dpo/beta': 0.0017280435422435403, 'fcm_dpo/q_t': 0.39897212386131287, 'fcm_dpo/delta': -0.046092525124549866, 'fcm_dpo/margin': 256.977783203125, 'margin_dpo/margin_mean': 256.977783203125, 'margin_dpo/margin_std': 332.15789794921875, 'logps/chosen': -437.1575927734375, 'logps/rejected': -728.2409057617188, 'logps/ref_chosen': -58.22185516357422, 'logps/ref_rejected': -92.32742309570312, 'KL/chosen_KL_mean': -378.93572998046875, 'KL/rejected_KL_mean': -635.9134521484375, 'KL/mean': -507.42462158203125, 'KL/std': 285.33013916015625, 'logits/chosen': -0.4857565760612488, 'logits/rejected': -0.4821171760559082, 'epoch': 0.57} + 57%|█████▋ | 386/681 [16:23<12:31, 2.55s/it] 57%|█████▋ | 387/681 [16:25<12:45, 2.61s/it] {'loss': 1.093, 'grad_norm': 37.69805908203125, 'learning_rate': 2.3589086689101889e-07, 'fcm_dpo/beta': 0.001718209940008819, 'fcm_dpo/q_t': 0.41014280915260315, 'fcm_dpo/delta': 0.01678801327943802, 'fcm_dpo/margin': 223.16220092773438, 'margin_dpo/margin_mean': 223.16221618652344, 'margin_dpo/margin_std': 270.58795166015625, 'logps/chosen': -459.8216552734375, 'logps/rejected': -708.733642578125, 'logps/ref_chosen': -66.41944885253906, 'logps/ref_rejected': -92.16915893554688, 'KL/chosen_KL_mean': -393.4022216796875, 'KL/rejected_KL_mean': -616.564453125, 'KL/mean': -504.98333740234375, 'KL/std': 264.5252685546875, 'logits/chosen': -0.5313735008239746, 'logits/rejected': -0.5198484063148499, 'epoch': 0.57} + 57%|█████▋ | 387/681 [16:25<12:45, 2.61s/it] 57%|█████▋ | 388/681 [16:28<12:21, 2.53s/it] {'loss': 1.0371, 'grad_norm': 34.622745513916016, 'learning_rate': 2.3460977346651428e-07, 'fcm_dpo/beta': 0.0016994503093883395, 'fcm_dpo/q_t': 0.39002934098243713, 'fcm_dpo/delta': -0.09461631625890732, 'fcm_dpo/margin': 288.0831604003906, 'margin_dpo/margin_mean': 288.0831604003906, 'margin_dpo/margin_std': 356.5264892578125, 'logps/chosen': -430.4327392578125, 'logps/rejected': -772.8195190429688, 'logps/ref_chosen': -50.129459381103516, 'logps/ref_rejected': -104.43305969238281, 'KL/chosen_KL_mean': -380.30328369140625, 'KL/rejected_KL_mean': -668.386474609375, 'KL/mean': -524.3448486328125, 'KL/std': 307.99615478515625, 'logits/chosen': -0.49403852224349976, 'logits/rejected': -0.5061089396476746, 'epoch': 0.57} + 57%|█████▋ | 388/681 [16:28<12:21, 2.53s/it] 57%|█████▋ | 389/681 [16:30<12:07, 2.49s/it] {'loss': 1.0973, 'grad_norm': 31.105833053588867, 'learning_rate': 2.3332908558879177e-07, 'fcm_dpo/beta': 0.001694181701168418, 'fcm_dpo/q_t': 0.40893417596817017, 'fcm_dpo/delta': 0.003580855205655098, 'fcm_dpo/margin': 234.0470733642578, 'margin_dpo/margin_mean': 234.04705810546875, 'margin_dpo/margin_std': 320.1253967285156, 'logps/chosen': -461.0569763183594, 'logps/rejected': -715.1119995117188, 'logps/ref_chosen': -57.906593322753906, 'logps/ref_rejected': -77.91454315185547, 'KL/chosen_KL_mean': -403.150390625, 'KL/rejected_KL_mean': -637.1974487304688, 'KL/mean': -520.1739501953125, 'KL/std': 292.00225830078125, 'logits/chosen': -0.5170685648918152, 'logits/rejected': -0.5164707899093628, 'epoch': 0.57} + 57%|█████▋ | 389/681 [16:30<12:07, 2.49s/it] 57%|█████▋ | 390/681 [16:33<12:06, 2.50s/it] {'loss': 1.1107, 'grad_norm': 31.483285903930664, 'learning_rate': 2.320488370051681e-07, 'fcm_dpo/beta': 0.0016903409268707037, 'fcm_dpo/q_t': 0.41132819652557373, 'fcm_dpo/delta': -0.00112185999751091, 'fcm_dpo/margin': 237.10052490234375, 'margin_dpo/margin_mean': 237.10049438476562, 'margin_dpo/margin_std': 363.8388366699219, 'logps/chosen': -437.5489501953125, 'logps/rejected': -710.9517211914062, 'logps/ref_chosen': -49.22591781616211, 'logps/ref_rejected': -85.5281982421875, 'KL/chosen_KL_mean': -388.3230285644531, 'KL/rejected_KL_mean': -625.4235229492188, 'KL/mean': -506.873291015625, 'KL/std': 285.4632568359375, 'logits/chosen': -0.449199914932251, 'logits/rejected': -0.4437105655670166, 'epoch': 0.57} + 57%|█████▋ | 390/681 [16:33<12:06, 2.50s/it] 57%|█████▋ | 391/681 [16:35<12:01, 2.49s/it] {'loss': 1.2092, 'grad_norm': 39.402198791503906, 'learning_rate': 2.3076906145138405e-07, 'fcm_dpo/beta': 0.001737719401717186, 'fcm_dpo/q_t': 0.44300517439842224, 'fcm_dpo/delta': 0.1569492220878601, 'fcm_dpo/margin': 142.06271362304688, 'margin_dpo/margin_mean': 142.0626983642578, 'margin_dpo/margin_std': 269.4259033203125, 'logps/chosen': -430.6678771972656, 'logps/rejected': -595.13916015625, 'logps/ref_chosen': -64.32965087890625, 'logps/ref_rejected': -86.73820495605469, 'KL/chosen_KL_mean': -366.3382263183594, 'KL/rejected_KL_mean': -508.40093994140625, 'KL/mean': -437.36956787109375, 'KL/std': 255.61053466796875, 'logits/chosen': -0.4606980085372925, 'logits/rejected': -0.4532572627067566, 'epoch': 0.57} + 57%|█████▋ | 391/681 [16:35<12:01, 2.49s/it] 58%|█████▊ | 392/681 [16:38<12:13, 2.54s/it] {'loss': 1.0146, 'grad_norm': 29.656238555908203, 'learning_rate': 2.294897926507156e-07, 'fcm_dpo/beta': 0.0017368567641824484, 'fcm_dpo/q_t': 0.38911527395248413, 'fcm_dpo/delta': -0.07744710892438889, 'fcm_dpo/margin': 272.79132080078125, 'margin_dpo/margin_mean': 272.7912902832031, 'margin_dpo/margin_std': 255.12586975097656, 'logps/chosen': -357.18487548828125, 'logps/rejected': -678.8179931640625, 'logps/ref_chosen': -53.50397872924805, 'logps/ref_rejected': -102.34584045410156, 'KL/chosen_KL_mean': -303.680908203125, 'KL/rejected_KL_mean': -576.47216796875, 'KL/mean': -440.0765380859375, 'KL/std': 265.79547119140625, 'logits/chosen': -0.4456912875175476, 'logits/rejected': -0.4384923577308655, 'epoch': 0.58} + 58%|█████▊ | 392/681 [16:38<12:13, 2.54s/it] 58%|█████▊ | 393/681 [16:40<11:56, 2.49s/it] {'loss': 1.124, 'grad_norm': 25.39501190185547, 'learning_rate': 2.2821106431308543e-07, 'fcm_dpo/beta': 0.0017312290146946907, 'fcm_dpo/q_t': 0.41823697090148926, 'fcm_dpo/delta': 0.03820331022143364, 'fcm_dpo/margin': 209.79046630859375, 'margin_dpo/margin_mean': 209.79046630859375, 'margin_dpo/margin_std': 317.695068359375, 'logps/chosen': -340.1102294921875, 'logps/rejected': -575.3956298828125, 'logps/ref_chosen': -46.473915100097656, 'logps/ref_rejected': -71.96885681152344, 'KL/chosen_KL_mean': -293.63629150390625, 'KL/rejected_KL_mean': -503.4267578125, 'KL/mean': -398.53155517578125, 'KL/std': 257.4723205566406, 'logits/chosen': -0.4462127685546875, 'logits/rejected': -0.445779412984848, 'epoch': 0.58} + 58%|█████▊ | 393/681 [16:40<11:56, 2.49s/it] 58%|█████▊ | 394/681 [16:43<12:11, 2.55s/it] {'loss': 1.0947, 'grad_norm': 22.869054794311523, 'learning_rate': 2.2693291013417452e-07, 'fcm_dpo/beta': 0.001739653293043375, 'fcm_dpo/q_t': 0.4115217924118042, 'fcm_dpo/delta': 0.017899950966238976, 'fcm_dpo/margin': 220.0300750732422, 'margin_dpo/margin_mean': 220.0300750732422, 'margin_dpo/margin_std': 278.54339599609375, 'logps/chosen': -374.0125732421875, 'logps/rejected': -631.9537353515625, 'logps/ref_chosen': -52.91154861450195, 'logps/ref_rejected': -90.8226318359375, 'KL/chosen_KL_mean': -321.10101318359375, 'KL/rejected_KL_mean': -541.131103515625, 'KL/mean': -431.11602783203125, 'KL/std': 263.2855529785156, 'logits/chosen': -0.46011653542518616, 'logits/rejected': -0.4607650935649872, 'epoch': 0.58} + 58%|█████▊ | 394/681 [16:43<12:11, 2.55s/it] 58%|█████▊ | 395/681 [16:45<11:52, 2.49s/it] {'loss': 1.0802, 'grad_norm': 21.308685302734375, 'learning_rate': 2.2565536379453404e-07, 'fcm_dpo/beta': 0.0017342737410217524, 'fcm_dpo/q_t': 0.40489462018013, 'fcm_dpo/delta': -0.015743978321552277, 'fcm_dpo/margin': 239.2242431640625, 'margin_dpo/margin_mean': 239.2242431640625, 'margin_dpo/margin_std': 307.40301513671875, 'logps/chosen': -382.2861022949219, 'logps/rejected': -642.7468872070312, 'logps/ref_chosen': -62.546112060546875, 'logps/ref_rejected': -83.78262329101562, 'KL/chosen_KL_mean': -319.739990234375, 'KL/rejected_KL_mean': -558.9642333984375, 'KL/mean': -439.35211181640625, 'KL/std': 267.854248046875, 'logits/chosen': -0.4947971701622009, 'logits/rejected': -0.49258559942245483, 'epoch': 0.58} + 58%|█████▊ | 395/681 [16:45<11:52, 2.49s/it] 58%|█████▊ | 396/681 [16:48<11:54, 2.51s/it] {'loss': 1.0806, 'grad_norm': 23.98872947692871, 'learning_rate': 2.2437845895869825e-07, 'fcm_dpo/beta': 0.0017403149977326393, 'fcm_dpo/q_t': 0.41025522351264954, 'fcm_dpo/delta': 0.019010702148079872, 'fcm_dpo/margin': 219.32781982421875, 'margin_dpo/margin_mean': 219.32781982421875, 'margin_dpo/margin_std': 237.82656860351562, 'logps/chosen': -396.52801513671875, 'logps/rejected': -635.506591796875, 'logps/ref_chosen': -68.99594116210938, 'logps/ref_rejected': -88.64665985107422, 'KL/chosen_KL_mean': -327.5321044921875, 'KL/rejected_KL_mean': -546.8599853515625, 'KL/mean': -437.19598388671875, 'KL/std': 254.036865234375, 'logits/chosen': -0.4915603995323181, 'logits/rejected': -0.475990355014801, 'epoch': 0.58} + 58%|█████▊ | 396/681 [16:48<11:54, 2.51s/it] 58%|█████▊ | 397/681 [16:50<11:49, 2.50s/it] {'loss': 1.0139, 'grad_norm': 34.52851867675781, 'learning_rate': 2.2310222927429716e-07, 'fcm_dpo/beta': 0.0017182010924443603, 'fcm_dpo/q_t': 0.38798123598098755, 'fcm_dpo/delta': -0.08137989044189453, 'fcm_dpo/margin': 277.6547546386719, 'margin_dpo/margin_mean': 277.65472412109375, 'margin_dpo/margin_std': 265.4613037109375, 'logps/chosen': -366.5556945800781, 'logps/rejected': -686.0493774414062, 'logps/ref_chosen': -61.27716827392578, 'logps/ref_rejected': -103.11612701416016, 'KL/chosen_KL_mean': -305.2785339355469, 'KL/rejected_KL_mean': -582.9332275390625, 'KL/mean': -444.10589599609375, 'KL/std': 257.37060546875, 'logits/chosen': -0.434369832277298, 'logits/rejected': -0.43857717514038086, 'epoch': 0.58} + 58%|█████▊ | 397/681 [16:50<11:49, 2.50s/it] 58%|█████▊ | 398/681 [16:52<11:31, 2.44s/it] {'loss': 1.0739, 'grad_norm': 21.167795181274414, 'learning_rate': 2.2182670837116972e-07, 'fcm_dpo/beta': 0.0017101437551900744, 'fcm_dpo/q_t': 0.40368321537971497, 'fcm_dpo/delta': -0.022855112329125404, 'fcm_dpo/margin': 246.69528198242188, 'margin_dpo/margin_mean': 246.69528198242188, 'margin_dpo/margin_std': 314.19610595703125, 'logps/chosen': -407.53924560546875, 'logps/rejected': -694.6066284179688, 'logps/ref_chosen': -68.15155029296875, 'logps/ref_rejected': -108.52360534667969, 'KL/chosen_KL_mean': -339.3876953125, 'KL/rejected_KL_mean': -586.0830078125, 'KL/mean': -462.7353515625, 'KL/std': 268.5569152832031, 'logits/chosen': -0.504738986492157, 'logits/rejected': -0.5006571412086487, 'epoch': 0.58} + 58%|█████▊ | 398/681 [16:52<11:31, 2.44s/it] 59%|█████▊ | 399/681 [16:55<11:33, 2.46s/it] {'loss': 1.0882, 'grad_norm': 26.88203239440918, 'learning_rate': 2.2055192986047804e-07, 'fcm_dpo/beta': 0.0017040125094354153, 'fcm_dpo/q_t': 0.40744373202323914, 'fcm_dpo/delta': 0.000902075320482254, 'fcm_dpo/margin': 234.1426544189453, 'margin_dpo/margin_mean': 234.1426544189453, 'margin_dpo/margin_std': 298.5992431640625, 'logps/chosen': -353.3929138183594, 'logps/rejected': -604.611328125, 'logps/ref_chosen': -60.889801025390625, 'logps/ref_rejected': -77.965576171875, 'KL/chosen_KL_mean': -292.50311279296875, 'KL/rejected_KL_mean': -526.645751953125, 'KL/mean': -409.574462890625, 'KL/std': 240.19448852539062, 'logits/chosen': -0.4666723310947418, 'logits/rejected': -0.42695629596710205, 'epoch': 0.59} + 59%|█████▊ | 399/681 [16:55<11:33, 2.46s/it] 59%|█████▊ | 400/681 [16:57<11:35, 2.48s/it] {'loss': 0.9628, 'grad_norm': 28.340599060058594, 'learning_rate': 2.192779273338215e-07, 'fcm_dpo/beta': 0.0016647314187139273, 'fcm_dpo/q_t': 0.3706052005290985, 'fcm_dpo/delta': -0.158945232629776, 'fcm_dpo/margin': 330.199951171875, 'margin_dpo/margin_mean': 330.199951171875, 'margin_dpo/margin_std': 286.12255859375, 'logps/chosen': -345.1142272949219, 'logps/rejected': -716.9232788085938, 'logps/ref_chosen': -63.64359664916992, 'logps/ref_rejected': -105.252685546875, 'KL/chosen_KL_mean': -281.47064208984375, 'KL/rejected_KL_mean': -611.6705932617188, 'KL/mean': -446.57061767578125, 'KL/std': 261.82757568359375, 'logits/chosen': -0.4434245228767395, 'logits/rejected': -0.43936118483543396, 'epoch': 0.59} + 59%|█████▊ | 400/681 [16:57<11:35, 2.48s/it] 59%|█████▉ | 401/681 [17:00<11:41, 2.50s/it] {'loss': 1.1851, 'grad_norm': 23.37901496887207, 'learning_rate': 2.1800473436235136e-07, 'fcm_dpo/beta': 0.0016737841069698334, 'fcm_dpo/q_t': 0.4304611086845398, 'fcm_dpo/delta': 0.09229836612939835, 'fcm_dpo/margin': 185.60690307617188, 'margin_dpo/margin_mean': 185.60690307617188, 'margin_dpo/margin_std': 357.864013671875, 'logps/chosen': -398.4688720703125, 'logps/rejected': -610.7052001953125, 'logps/ref_chosen': -57.16303253173828, 'logps/ref_rejected': -83.79249572753906, 'KL/chosen_KL_mean': -341.30584716796875, 'KL/rejected_KL_mean': -526.9127197265625, 'KL/mean': -434.10931396484375, 'KL/std': 270.2348937988281, 'logits/chosen': -0.4471530318260193, 'logits/rejected': -0.4397915005683899, 'epoch': 0.59} + 59%|█████▉ | 401/681 [17:00<11:41, 2.50s/it] 59%|█████▉ | 402/681 [17:02<11:20, 2.44s/it] {'loss': 0.9545, 'grad_norm': 21.227405548095703, 'learning_rate': 2.1673238449588665e-07, 'fcm_dpo/beta': 0.0016427625669166446, 'fcm_dpo/q_t': 0.3684191107749939, 'fcm_dpo/delta': -0.17618390917778015, 'fcm_dpo/margin': 344.6689758300781, 'margin_dpo/margin_mean': 344.6689758300781, 'margin_dpo/margin_std': 306.67181396484375, 'logps/chosen': -305.7143249511719, 'logps/rejected': -680.68896484375, 'logps/ref_chosen': -50.74037170410156, 'logps/ref_rejected': -81.0460433959961, 'KL/chosen_KL_mean': -254.9739532470703, 'KL/rejected_KL_mean': -599.6429443359375, 'KL/mean': -427.3084716796875, 'KL/std': 300.2375793457031, 'logits/chosen': -0.490563303232193, 'logits/rejected': -0.4836328625679016, 'epoch': 0.59} + 59%|█████▉ | 402/681 [17:02<11:20, 2.44s/it] 59%|█████▉ | 403/681 [17:05<11:15, 2.43s/it] {'loss': 1.056, 'grad_norm': 23.255849838256836, 'learning_rate': 2.154609112620295e-07, 'fcm_dpo/beta': 0.0016190607566386461, 'fcm_dpo/q_t': 0.3999168574810028, 'fcm_dpo/delta': -0.03328249230980873, 'fcm_dpo/margin': 266.7139892578125, 'margin_dpo/margin_mean': 266.7139892578125, 'margin_dpo/margin_std': 298.5339050292969, 'logps/chosen': -333.8660888671875, 'logps/rejected': -630.6993408203125, 'logps/ref_chosen': -47.14731216430664, 'logps/ref_rejected': -77.2666015625, 'KL/chosen_KL_mean': -286.7187805175781, 'KL/rejected_KL_mean': -553.4327392578125, 'KL/mean': -420.0758056640625, 'KL/std': 288.28253173828125, 'logits/chosen': -0.4604523479938507, 'logits/rejected': -0.4616071879863739, 'epoch': 0.59} + 59%|█████▉ | 403/681 [17:05<11:15, 2.43s/it] 59%|█████▉ | 404/681 [17:07<11:18, 2.45s/it] {'loss': 1.0819, 'grad_norm': 30.493053436279297, 'learning_rate': 2.1419034816528218e-07, 'fcm_dpo/beta': 0.0016095450846478343, 'fcm_dpo/q_t': 0.40320682525634766, 'fcm_dpo/delta': -0.022908374667167664, 'fcm_dpo/margin': 262.11383056640625, 'margin_dpo/margin_mean': 262.11383056640625, 'margin_dpo/margin_std': 347.977783203125, 'logps/chosen': -375.13690185546875, 'logps/rejected': -666.5303955078125, 'logps/ref_chosen': -47.875274658203125, 'logps/ref_rejected': -77.15499877929688, 'KL/chosen_KL_mean': -327.2615966796875, 'KL/rejected_KL_mean': -589.3754272460938, 'KL/mean': -458.3185119628906, 'KL/std': 274.367919921875, 'logits/chosen': -0.4503590166568756, 'logits/rejected': -0.4424477815628052, 'epoch': 0.59} + 59%|█████▉ | 404/681 [17:07<11:18, 2.45s/it] 59%|█████▉ | 405/681 [17:09<11:03, 2.40s/it] {'loss': 1.1666, 'grad_norm': 32.75885772705078, 'learning_rate': 2.129207286861638e-07, 'fcm_dpo/beta': 0.001602754695340991, 'fcm_dpo/q_t': 0.4246622323989868, 'fcm_dpo/delta': -0.043147142976522446, 'fcm_dpo/margin': 207.8790283203125, 'margin_dpo/margin_mean': 207.8790283203125, 'margin_dpo/margin_std': 365.72845458984375, 'logps/chosen': -445.6051025390625, 'logps/rejected': -675.508056640625, 'logps/ref_chosen': -65.16290283203125, 'logps/ref_rejected': -87.18678283691406, 'KL/chosen_KL_mean': -380.44219970703125, 'KL/rejected_KL_mean': -588.3212890625, 'KL/mean': -484.3817443847656, 'KL/std': 300.7879943847656, 'logits/chosen': -0.44959819316864014, 'logits/rejected': -0.4414255619049072, 'epoch': 0.59} + 59%|█████▉ | 405/681 [17:10<11:03, 2.40s/it] 60%|█████▉ | 406/681 [17:12<10:57, 2.39s/it] {'loss': 1.0517, 'grad_norm': 23.16806983947754, 'learning_rate': 2.1165208628032861e-07, 'fcm_dpo/beta': 0.0015887843910604715, 'fcm_dpo/q_t': 0.39615678787231445, 'fcm_dpo/delta': -0.05800767242908478, 'fcm_dpo/margin': 286.5443115234375, 'margin_dpo/margin_mean': 286.5443115234375, 'margin_dpo/margin_std': 344.0789794921875, 'logps/chosen': -392.65576171875, 'logps/rejected': -721.5379028320312, 'logps/ref_chosen': -49.740814208984375, 'logps/ref_rejected': -92.07862854003906, 'KL/chosen_KL_mean': -342.9149475097656, 'KL/rejected_KL_mean': -629.459228515625, 'KL/mean': -486.18707275390625, 'KL/std': 308.88067626953125, 'logits/chosen': -0.48730766773223877, 'logits/rejected': -0.5009229183197021, 'epoch': 0.6} + 60%|█████▉ | 406/681 [17:12<10:57, 2.39s/it] 60%|█████▉ | 407/681 [17:14<11:10, 2.45s/it] {'loss': 1.1991, 'grad_norm': 27.538284301757812, 'learning_rate': 2.1038445437768375e-07, 'fcm_dpo/beta': 0.0015788807068020105, 'fcm_dpo/q_t': 0.43600770831108093, 'fcm_dpo/delta': 0.009174516424536705, 'fcm_dpo/margin': 172.09339904785156, 'margin_dpo/margin_mean': 172.09341430664062, 'margin_dpo/margin_std': 325.55999755859375, 'logps/chosen': -428.81915283203125, 'logps/rejected': -622.093994140625, 'logps/ref_chosen': -56.33069610595703, 'logps/ref_rejected': -77.51209259033203, 'KL/chosen_KL_mean': -372.48846435546875, 'KL/rejected_KL_mean': -544.5818481445312, 'KL/mean': -458.53515625, 'KL/std': 246.12185668945312, 'logits/chosen': -0.4871164560317993, 'logits/rejected': -0.45956844091415405, 'epoch': 0.6} + 60%|█████▉ | 407/681 [17:14<11:10, 2.45s/it] 60%|█████▉ | 408/681 [17:17<11:25, 2.51s/it] {'loss': 1.1292, 'grad_norm': 33.735374450683594, 'learning_rate': 2.0911786638150872e-07, 'fcm_dpo/beta': 0.0015977565199136734, 'fcm_dpo/q_t': 0.42254602909088135, 'fcm_dpo/delta': 0.07305292040109634, 'fcm_dpo/margin': 206.1318359375, 'margin_dpo/margin_mean': 206.1318359375, 'margin_dpo/margin_std': 271.8330993652344, 'logps/chosen': -441.90179443359375, 'logps/rejected': -668.34130859375, 'logps/ref_chosen': -69.789306640625, 'logps/ref_rejected': -90.09693908691406, 'KL/chosen_KL_mean': -372.11248779296875, 'KL/rejected_KL_mean': -578.244384765625, 'KL/mean': -475.17840576171875, 'KL/std': 243.3355255126953, 'logits/chosen': -0.5174983143806458, 'logits/rejected': -0.4989047050476074, 'epoch': 0.6} + 60%|█████▉ | 408/681 [17:17<11:25, 2.51s/it] 60%|██████ | 409/681 [17:20<11:38, 2.57s/it] {'loss': 1.1368, 'grad_norm': 37.519432067871094, 'learning_rate': 2.0785235566757517e-07, 'fcm_dpo/beta': 0.0016257348470389843, 'fcm_dpo/q_t': 0.42412787675857544, 'fcm_dpo/delta': 0.07655191421508789, 'fcm_dpo/margin': 200.32015991210938, 'margin_dpo/margin_mean': 200.32015991210938, 'margin_dpo/margin_std': 280.4720153808594, 'logps/chosen': -419.590576171875, 'logps/rejected': -637.4976196289062, 'logps/ref_chosen': -67.31744384765625, 'logps/ref_rejected': -84.904296875, 'KL/chosen_KL_mean': -352.27313232421875, 'KL/rejected_KL_mean': -552.5933837890625, 'KL/mean': -452.4332275390625, 'KL/std': 252.9176025390625, 'logits/chosen': -0.4930969476699829, 'logits/rejected': -0.48252415657043457, 'epoch': 0.6} + 60%|██████ | 409/681 [17:20<11:38, 2.57s/it] 60%|██████ | 410/681 [17:22<11:42, 2.59s/it] {'loss': 1.0884, 'grad_norm': 31.696603775024414, 'learning_rate': 2.065879555832674e-07, 'fcm_dpo/beta': 0.0016305126482620835, 'fcm_dpo/q_t': 0.40897810459136963, 'fcm_dpo/delta': 0.014659320935606956, 'fcm_dpo/margin': 236.67666625976562, 'margin_dpo/margin_mean': 236.67666625976562, 'margin_dpo/margin_std': 284.35211181640625, 'logps/chosen': -382.63433837890625, 'logps/rejected': -651.0446166992188, 'logps/ref_chosen': -51.465354919433594, 'logps/ref_rejected': -83.198974609375, 'KL/chosen_KL_mean': -331.1689453125, 'KL/rejected_KL_mean': -567.8456420898438, 'KL/mean': -449.50732421875, 'KL/std': 254.5586395263672, 'logits/chosen': -0.4725341796875, 'logits/rejected': -0.47504281997680664, 'epoch': 0.6} + 60%|██████ | 410/681 [17:22<11:42, 2.59s/it] 60%|██████ | 411/681 [17:25<11:18, 2.51s/it] {'loss': 1.1138, 'grad_norm': 44.872047424316406, 'learning_rate': 2.0532469944670343e-07, 'fcm_dpo/beta': 0.001626357901841402, 'fcm_dpo/q_t': 0.41455578804016113, 'fcm_dpo/delta': 0.02442072331905365, 'fcm_dpo/margin': 230.87094116210938, 'margin_dpo/margin_mean': 230.87094116210938, 'margin_dpo/margin_std': 319.9390869140625, 'logps/chosen': -405.6380615234375, 'logps/rejected': -664.8966674804688, 'logps/ref_chosen': -52.30727005004883, 'logps/ref_rejected': -80.69495391845703, 'KL/chosen_KL_mean': -353.330810546875, 'KL/rejected_KL_mean': -584.20166015625, 'KL/mean': -468.7662658691406, 'KL/std': 282.9371337890625, 'logits/chosen': -0.48675569891929626, 'logits/rejected': -0.5024890303611755, 'epoch': 0.6} + 60%|██████ | 411/681 [17:25<11:18, 2.51s/it] 60%|██████ | 412/681 [17:27<11:09, 2.49s/it] {'loss': 1.1019, 'grad_norm': 37.48828887939453, 'learning_rate': 2.0406262054585738e-07, 'fcm_dpo/beta': 0.0016456831945106387, 'fcm_dpo/q_t': 0.4107934236526489, 'fcm_dpo/delta': 0.014591998420655727, 'fcm_dpo/margin': 234.52587890625, 'margin_dpo/margin_mean': 234.52587890625, 'margin_dpo/margin_std': 319.07745361328125, 'logps/chosen': -411.86700439453125, 'logps/rejected': -693.3095703125, 'logps/ref_chosen': -53.144126892089844, 'logps/ref_rejected': -100.0608139038086, 'KL/chosen_KL_mean': -358.72283935546875, 'KL/rejected_KL_mean': -593.2487182617188, 'KL/mean': -475.9858093261719, 'KL/std': 272.87725830078125, 'logits/chosen': -0.5510473251342773, 'logits/rejected': -0.5846823453903198, 'epoch': 0.6} + 60%|██████ | 412/681 [17:27<11:09, 2.49s/it] 61%|██████ | 413/681 [17:30<11:21, 2.54s/it] {'loss': 1.0936, 'grad_norm': 33.53278732299805, 'learning_rate': 2.0280175213768205e-07, 'fcm_dpo/beta': 0.0016510069835931063, 'fcm_dpo/q_t': 0.40730637311935425, 'fcm_dpo/delta': 0.0064473580569028854, 'fcm_dpo/margin': 238.46292114257812, 'margin_dpo/margin_mean': 238.4629364013672, 'margin_dpo/margin_std': 311.4296875, 'logps/chosen': -453.14434814453125, 'logps/rejected': -729.498779296875, 'logps/ref_chosen': -61.58196258544922, 'logps/ref_rejected': -99.47340393066406, 'KL/chosen_KL_mean': -391.5623779296875, 'KL/rejected_KL_mean': -630.025390625, 'KL/mean': -510.7938537597656, 'KL/std': 288.8511962890625, 'logits/chosen': -0.5170902013778687, 'logits/rejected': -0.5278250575065613, 'epoch': 0.61} + 61%|██████ | 413/681 [17:30<11:21, 2.54s/it] 61%|██████ | 414/681 [17:32<11:21, 2.55s/it] {'loss': 1.0711, 'grad_norm': 35.41373062133789, 'learning_rate': 2.0154212744723247e-07, 'fcm_dpo/beta': 0.001652669394388795, 'fcm_dpo/q_t': 0.4015337824821472, 'fcm_dpo/delta': -0.025241520255804062, 'fcm_dpo/margin': 256.247802734375, 'margin_dpo/margin_mean': 256.2478332519531, 'margin_dpo/margin_std': 307.007080078125, 'logps/chosen': -411.1938171386719, 'logps/rejected': -708.4566650390625, 'logps/ref_chosen': -46.63148498535156, 'logps/ref_rejected': -87.64653015136719, 'KL/chosen_KL_mean': -364.56231689453125, 'KL/rejected_KL_mean': -620.8101806640625, 'KL/mean': -492.68621826171875, 'KL/std': 266.0479736328125, 'logits/chosen': -0.5074818134307861, 'logits/rejected': -0.5076801776885986, 'epoch': 0.61} + 61%|██████ | 414/681 [17:32<11:21, 2.55s/it] 61%|██████ | 415/681 [17:35<11:34, 2.61s/it] {'loss': 1.1517, 'grad_norm': 29.98908233642578, 'learning_rate': 2.002837796667909e-07, 'fcm_dpo/beta': 0.0016560875810682774, 'fcm_dpo/q_t': 0.42633694410324097, 'fcm_dpo/delta': 0.08733348548412323, 'fcm_dpo/margin': 190.50540161132812, 'margin_dpo/margin_mean': 190.50540161132812, 'margin_dpo/margin_std': 290.72491455078125, 'logps/chosen': -472.95635986328125, 'logps/rejected': -685.321044921875, 'logps/ref_chosen': -78.6182861328125, 'logps/ref_rejected': -100.47752380371094, 'KL/chosen_KL_mean': -394.33807373046875, 'KL/rejected_KL_mean': -584.843505859375, 'KL/mean': -489.5907897949219, 'KL/std': 265.6529541015625, 'logits/chosen': -0.5528023838996887, 'logits/rejected': -0.5534902811050415, 'epoch': 0.61} + 61%|██████ | 415/681 [17:35<11:34, 2.61s/it] 61%|██████ | 416/681 [17:38<11:28, 2.60s/it] {'loss': 0.9956, 'grad_norm': 37.14603042602539, 'learning_rate': 1.990267419549914e-07, 'fcm_dpo/beta': 0.0016415867721661925, 'fcm_dpo/q_t': 0.38012266159057617, 'fcm_dpo/delta': -0.12347446382045746, 'fcm_dpo/margin': 315.0876770019531, 'margin_dpo/margin_mean': 315.0876770019531, 'margin_dpo/margin_std': 312.2025146484375, 'logps/chosen': -441.1697082519531, 'logps/rejected': -788.5469360351562, 'logps/ref_chosen': -58.27912521362305, 'logps/ref_rejected': -90.56871795654297, 'KL/chosen_KL_mean': -382.89056396484375, 'KL/rejected_KL_mean': -697.9782104492188, 'KL/mean': -540.4343872070312, 'KL/std': 309.90460205078125, 'logits/chosen': -0.5974301099777222, 'logits/rejected': -0.6131728887557983, 'epoch': 0.61} + 61%|██████ | 416/681 [17:38<11:28, 2.60s/it] 61%|██████ | 417/681 [17:40<11:07, 2.53s/it] {'loss': 1.0467, 'grad_norm': 29.581459045410156, 'learning_rate': 1.9777104743594686e-07, 'fcm_dpo/beta': 0.0016207349253818393, 'fcm_dpo/q_t': 0.3975698947906494, 'fcm_dpo/delta': -0.039116691797971725, 'fcm_dpo/margin': 269.8897399902344, 'margin_dpo/margin_mean': 269.8897705078125, 'margin_dpo/margin_std': 286.52203369140625, 'logps/chosen': -433.0568542480469, 'logps/rejected': -720.8997192382812, 'logps/ref_chosen': -50.1987190246582, 'logps/ref_rejected': -68.15184020996094, 'KL/chosen_KL_mean': -382.858154296875, 'KL/rejected_KL_mean': -652.7478637695312, 'KL/mean': -517.802978515625, 'KL/std': 286.8901672363281, 'logits/chosen': -0.5895746946334839, 'logits/rejected': -0.583941638469696, 'epoch': 0.61} + 61%|██████ | 417/681 [17:40<11:07, 2.53s/it] 61%|██████▏ | 418/681 [17:43<11:07, 2.54s/it] {'loss': 1.0912, 'grad_norm': 33.6888427734375, 'learning_rate': 1.965167291983757e-07, 'fcm_dpo/beta': 0.0016204738058149815, 'fcm_dpo/q_t': 0.403804749250412, 'fcm_dpo/delta': -0.03766999393701553, 'fcm_dpo/margin': 268.33917236328125, 'margin_dpo/margin_mean': 268.33917236328125, 'margin_dpo/margin_std': 380.9486999511719, 'logps/chosen': -505.4068603515625, 'logps/rejected': -796.4591064453125, 'logps/ref_chosen': -81.97846984863281, 'logps/ref_rejected': -104.69148254394531, 'KL/chosen_KL_mean': -423.42840576171875, 'KL/rejected_KL_mean': -691.767578125, 'KL/mean': -557.5980224609375, 'KL/std': 328.6751708984375, 'logits/chosen': -0.6470938920974731, 'logits/rejected': -0.6322601437568665, 'epoch': 0.61} + 61%|██████▏ | 418/681 [17:43<11:07, 2.54s/it] 62%|██████▏ | 419/681 [17:45<11:15, 2.58s/it] {'loss': 1.0281, 'grad_norm': 34.53618240356445, 'learning_rate': 1.9526382029472988e-07, 'fcm_dpo/beta': 0.0015830930788069963, 'fcm_dpo/q_t': 0.38735997676849365, 'fcm_dpo/delta': -0.09028756618499756, 'fcm_dpo/margin': 306.96807861328125, 'margin_dpo/margin_mean': 306.9680480957031, 'margin_dpo/margin_std': 345.80462646484375, 'logps/chosen': -436.57745361328125, 'logps/rejected': -782.179931640625, 'logps/ref_chosen': -52.948646545410156, 'logps/ref_rejected': -91.58309936523438, 'KL/chosen_KL_mean': -383.6288146972656, 'KL/rejected_KL_mean': -690.5968017578125, 'KL/mean': -537.1128540039062, 'KL/std': 298.24053955078125, 'logits/chosen': -0.6050629019737244, 'logits/rejected': -0.6123736500740051, 'epoch': 0.62} + 62%|██████▏ | 419/681 [17:45<11:15, 2.58s/it] 62%|██████▏ | 420/681 [17:48<11:08, 2.56s/it] {'loss': 1.195, 'grad_norm': 62.545352935791016, 'learning_rate': 1.9401235374032425e-07, 'fcm_dpo/beta': 0.0015975853893905878, 'fcm_dpo/q_t': 0.43039628863334656, 'fcm_dpo/delta': 0.08602797240018845, 'fcm_dpo/margin': 198.1026153564453, 'margin_dpo/margin_mean': 198.1026153564453, 'margin_dpo/margin_std': 413.45147705078125, 'logps/chosen': -561.2822265625, 'logps/rejected': -750.934814453125, 'logps/ref_chosen': -77.7699203491211, 'logps/ref_rejected': -69.31985473632812, 'KL/chosen_KL_mean': -483.5123291015625, 'KL/rejected_KL_mean': -681.614990234375, 'KL/mean': -582.5636596679688, 'KL/std': 315.52850341796875, 'logits/chosen': -0.652934730052948, 'logits/rejected': -0.626418948173523, 'epoch': 0.62} + 62%|██████▏ | 420/681 [17:48<11:08, 2.56s/it] 62%|██████▏ | 421/681 [17:51<11:16, 2.60s/it] {'loss': 1.1261, 'grad_norm': 31.18450927734375, 'learning_rate': 1.9276236251246653e-07, 'fcm_dpo/beta': 0.001619070884771645, 'fcm_dpo/q_t': 0.4164373278617859, 'fcm_dpo/delta': 0.04750995337963104, 'fcm_dpo/margin': 218.4813232421875, 'margin_dpo/margin_mean': 218.4813232421875, 'margin_dpo/margin_std': 307.9769287109375, 'logps/chosen': -454.25714111328125, 'logps/rejected': -708.2540283203125, 'logps/ref_chosen': -53.765865325927734, 'logps/ref_rejected': -89.28144836425781, 'KL/chosen_KL_mean': -400.49127197265625, 'KL/rejected_KL_mean': -618.9725341796875, 'KL/mean': -509.7319030761719, 'KL/std': 300.7071533203125, 'logits/chosen': -0.6628319025039673, 'logits/rejected': -0.6538623571395874, 'epoch': 0.62} + 62%|██████▏ | 421/681 [17:51<11:16, 2.60s/it] 62%|██████▏ | 422/681 [17:53<11:31, 2.67s/it] {'loss': 1.1048, 'grad_norm': 39.39680480957031, 'learning_rate': 1.9151387954958792e-07, 'fcm_dpo/beta': 0.001614267472177744, 'fcm_dpo/q_t': 0.4061550498008728, 'fcm_dpo/delta': -0.005787511821836233, 'fcm_dpo/margin': 251.21852111816406, 'margin_dpo/margin_mean': 251.21853637695312, 'margin_dpo/margin_std': 371.8038330078125, 'logps/chosen': -516.1905517578125, 'logps/rejected': -786.6387939453125, 'logps/ref_chosen': -68.6337661743164, 'logps/ref_rejected': -87.86351013183594, 'KL/chosen_KL_mean': -447.5567626953125, 'KL/rejected_KL_mean': -698.7752685546875, 'KL/mean': -573.166015625, 'KL/std': 324.9825134277344, 'logits/chosen': -0.6519845724105835, 'logits/rejected': -0.6586755514144897, 'epoch': 0.62} + 62%|██████▏ | 422/681 [17:53<11:31, 2.67s/it] 62%|██████▏ | 423/681 [17:56<11:08, 2.59s/it] {'loss': 1.0554, 'grad_norm': 34.45840835571289, 'learning_rate': 1.902669377503756e-07, 'fcm_dpo/beta': 0.0016041090711951256, 'fcm_dpo/q_t': 0.39793986082077026, 'fcm_dpo/delta': -0.04313413053750992, 'fcm_dpo/margin': 275.04901123046875, 'margin_dpo/margin_mean': 275.04901123046875, 'margin_dpo/margin_std': 323.095947265625, 'logps/chosen': -467.91363525390625, 'logps/rejected': -774.2788696289062, 'logps/ref_chosen': -54.99030303955078, 'logps/ref_rejected': -86.30654907226562, 'KL/chosen_KL_mean': -412.92333984375, 'KL/rejected_KL_mean': -687.9722900390625, 'KL/mean': -550.4478149414062, 'KL/std': 291.155029296875, 'logits/chosen': -0.6469055414199829, 'logits/rejected': -0.6586620807647705, 'epoch': 0.62} + 62%|██████▏ | 423/681 [17:56<11:08, 2.59s/it] 62%|██████▏ | 424/681 [17:58<11:13, 2.62s/it] {'loss': 1.0894, 'grad_norm': 39.47249221801758, 'learning_rate': 1.890215699729057e-07, 'fcm_dpo/beta': 0.0015932890819385648, 'fcm_dpo/q_t': 0.4079640209674835, 'fcm_dpo/delta': -0.008194293826818466, 'fcm_dpo/margin': 255.79061889648438, 'margin_dpo/margin_mean': 255.79061889648438, 'margin_dpo/margin_std': 346.87823486328125, 'logps/chosen': -422.38446044921875, 'logps/rejected': -688.6422119140625, 'logps/ref_chosen': -56.01192092895508, 'logps/ref_rejected': -66.47896575927734, 'KL/chosen_KL_mean': -366.37255859375, 'KL/rejected_KL_mean': -622.1632080078125, 'KL/mean': -494.26788330078125, 'KL/std': 289.16485595703125, 'logits/chosen': -0.6213667392730713, 'logits/rejected': -0.6005524396896362, 'epoch': 0.62} + 62%|██████▏ | 424/681 [17:59<11:13, 2.62s/it] 62%|██████▏ | 425/681 [18:01<11:03, 2.59s/it] {'loss': 1.1745, 'grad_norm': 33.893001556396484, 'learning_rate': 1.8777780903377732e-07, 'fcm_dpo/beta': 0.0016285117017105222, 'fcm_dpo/q_t': 0.42912036180496216, 'fcm_dpo/delta': 0.10236521810293198, 'fcm_dpo/margin': 184.3304901123047, 'margin_dpo/margin_mean': 184.3304901123047, 'margin_dpo/margin_std': 319.94305419921875, 'logps/chosen': -455.64453125, 'logps/rejected': -689.031494140625, 'logps/ref_chosen': -46.86899948120117, 'logps/ref_rejected': -95.92545318603516, 'KL/chosen_KL_mean': -408.7755126953125, 'KL/rejected_KL_mean': -593.10595703125, 'KL/mean': -500.9407958984375, 'KL/std': 269.2953186035156, 'logits/chosen': -0.6225741505622864, 'logits/rejected': -0.6225865483283997, 'epoch': 0.62} + 62%|██████▏ | 425/681 [18:01<11:03, 2.59s/it] 63%|██████▎ | 426/681 [18:04<11:06, 2.62s/it] {'loss': 1.0796, 'grad_norm': 24.651735305786133, 'learning_rate': 1.8653568770724803e-07, 'fcm_dpo/beta': 0.0016375456470996141, 'fcm_dpo/q_t': 0.40605005621910095, 'fcm_dpo/delta': -0.0028386712074279785, 'fcm_dpo/margin': 245.7498321533203, 'margin_dpo/margin_mean': 245.7498321533203, 'margin_dpo/margin_std': 290.891357421875, 'logps/chosen': -447.58917236328125, 'logps/rejected': -698.0220336914062, 'logps/ref_chosen': -76.58354187011719, 'logps/ref_rejected': -81.26658630371094, 'KL/chosen_KL_mean': -371.005615234375, 'KL/rejected_KL_mean': -616.7554321289062, 'KL/mean': -493.8805236816406, 'KL/std': 271.8358154296875, 'logits/chosen': -0.603665292263031, 'logits/rejected': -0.5721160173416138, 'epoch': 0.63} + 63%|██████▎ | 426/681 [18:04<11:06, 2.62s/it] 63%|██████▎ | 427/681 [18:06<11:03, 2.61s/it] {'loss': 1.1536, 'grad_norm': 25.280298233032227, 'learning_rate': 1.8529523872436977e-07, 'fcm_dpo/beta': 0.0016515168827027082, 'fcm_dpo/q_t': 0.4287715554237366, 'fcm_dpo/delta': 0.09603013098239899, 'fcm_dpo/margin': 185.9020233154297, 'margin_dpo/margin_mean': 185.9020233154297, 'margin_dpo/margin_std': 277.96832275390625, 'logps/chosen': -398.0404357910156, 'logps/rejected': -597.654541015625, 'logps/ref_chosen': -64.8538818359375, 'logps/ref_rejected': -78.5660171508789, 'KL/chosen_KL_mean': -333.1865539550781, 'KL/rejected_KL_mean': -519.0885620117188, 'KL/mean': -426.1375427246094, 'KL/std': 232.90151977539062, 'logits/chosen': -0.6069827079772949, 'logits/rejected': -0.5897522568702698, 'epoch': 0.63} + 63%|██████▎ | 427/681 [18:06<11:03, 2.61s/it] 63%|██████▎ | 428/681 [18:09<11:06, 2.64s/it] {'loss': 1.1004, 'grad_norm': 31.78797721862793, 'learning_rate': 1.8405649477212697e-07, 'fcm_dpo/beta': 0.001651531783863902, 'fcm_dpo/q_t': 0.4038928151130676, 'fcm_dpo/delta': -0.019138701260089874, 'fcm_dpo/margin': 253.09681701660156, 'margin_dpo/margin_mean': 253.09683227539062, 'margin_dpo/margin_std': 371.6815185546875, 'logps/chosen': -484.37890625, 'logps/rejected': -778.120849609375, 'logps/ref_chosen': -62.63666534423828, 'logps/ref_rejected': -103.28181457519531, 'KL/chosen_KL_mean': -421.74224853515625, 'KL/rejected_KL_mean': -674.8390502929688, 'KL/mean': -548.2906494140625, 'KL/std': 312.8956298828125, 'logits/chosen': -0.5810732245445251, 'logits/rejected': -0.5857928395271301, 'epoch': 0.63} + 63%|██████▎ | 428/681 [18:09<11:06, 2.64s/it] 63%|██████▎ | 429/681 [18:12<11:01, 2.62s/it] {'loss': 1.1706, 'grad_norm': 31.24496078491211, 'learning_rate': 1.828194884925749e-07, 'fcm_dpo/beta': 0.0016507648397237062, 'fcm_dpo/q_t': 0.4247916340827942, 'fcm_dpo/delta': -0.03221222758293152, 'fcm_dpo/margin': 198.1272430419922, 'margin_dpo/margin_mean': 198.1272430419922, 'margin_dpo/margin_std': 350.78887939453125, 'logps/chosen': -516.2745971679688, 'logps/rejected': -724.9627685546875, 'logps/ref_chosen': -81.23401641845703, 'logps/ref_rejected': -91.79493713378906, 'KL/chosen_KL_mean': -435.04058837890625, 'KL/rejected_KL_mean': -633.1678466796875, 'KL/mean': -534.104248046875, 'KL/std': 291.46722412109375, 'logits/chosen': -0.5936084985733032, 'logits/rejected': -0.570778489112854, 'epoch': 0.63} + 63%|██████▎ | 429/681 [18:12<11:01, 2.62s/it] 63%|██████▎ | 430/681 [18:14<11:07, 2.66s/it] {'loss': 1.122, 'grad_norm': 36.20037078857422, 'learning_rate': 1.8158425248197928e-07, 'fcm_dpo/beta': 0.0016623124247416854, 'fcm_dpo/q_t': 0.41966137290000916, 'fcm_dpo/delta': 0.055517442524433136, 'fcm_dpo/margin': 208.2819061279297, 'margin_dpo/margin_mean': 208.2819061279297, 'margin_dpo/margin_std': 283.18389892578125, 'logps/chosen': -409.8028564453125, 'logps/rejected': -661.5872802734375, 'logps/ref_chosen': -60.920326232910156, 'logps/ref_rejected': -104.42280578613281, 'KL/chosen_KL_mean': -348.882568359375, 'KL/rejected_KL_mean': -557.1644287109375, 'KL/mean': -453.02349853515625, 'KL/std': 258.8692321777344, 'logits/chosen': -0.5428692102432251, 'logits/rejected': -0.5417746305465698, 'epoch': 0.63} + 63%|██████▎ | 430/681 [18:14<11:07, 2.66s/it] 63%|██████▎ | 431/681 [18:17<11:05, 2.66s/it] {'loss': 1.0176, 'grad_norm': 29.066301345825195, 'learning_rate': 1.8035081928995788e-07, 'fcm_dpo/beta': 0.0016387823270633817, 'fcm_dpo/q_t': 0.3862955570220947, 'fcm_dpo/delta': -0.09255114197731018, 'fcm_dpo/margin': 297.705322265625, 'margin_dpo/margin_mean': 297.705322265625, 'margin_dpo/margin_std': 304.55963134765625, 'logps/chosen': -376.17279052734375, 'logps/rejected': -709.3695678710938, 'logps/ref_chosen': -57.34874725341797, 'logps/ref_rejected': -92.84022521972656, 'KL/chosen_KL_mean': -318.82403564453125, 'KL/rejected_KL_mean': -616.529296875, 'KL/mean': -467.67669677734375, 'KL/std': 278.1539001464844, 'logits/chosen': -0.5722877383232117, 'logits/rejected': -0.5794203281402588, 'epoch': 0.63} + 63%|██████▎ | 431/681 [18:17<11:05, 2.66s/it] 63%|██████▎ | 432/681 [18:20<11:16, 2.72s/it] {'loss': 1.0323, 'grad_norm': 31.19976043701172, 'learning_rate': 1.791192214186223e-07, 'fcm_dpo/beta': 0.0016283730510622263, 'fcm_dpo/q_t': 0.3932652473449707, 'fcm_dpo/delta': -0.06209279224276543, 'fcm_dpo/margin': 281.7933654785156, 'margin_dpo/margin_mean': 281.7933654785156, 'margin_dpo/margin_std': 281.0911560058594, 'logps/chosen': -401.75726318359375, 'logps/rejected': -711.055419921875, 'logps/ref_chosen': -71.07479095458984, 'logps/ref_rejected': -98.57952880859375, 'KL/chosen_KL_mean': -330.6824645996094, 'KL/rejected_KL_mean': -612.475830078125, 'KL/mean': -471.57916259765625, 'KL/std': 277.7702941894531, 'logits/chosen': -0.5432115793228149, 'logits/rejected': -0.5331372618675232, 'epoch': 0.63} + 63%|██████▎ | 432/681 [18:20<11:16, 2.72s/it] 64%|██████▎ | 433/681 [18:22<11:07, 2.69s/it] {'loss': 1.1752, 'grad_norm': 37.97488784790039, 'learning_rate': 1.7788949132172193e-07, 'fcm_dpo/beta': 0.0016389940865337849, 'fcm_dpo/q_t': 0.4270647466182709, 'fcm_dpo/delta': 0.09798242151737213, 'fcm_dpo/margin': 185.99978637695312, 'margin_dpo/margin_mean': 185.99978637695312, 'margin_dpo/margin_std': 330.22344970703125, 'logps/chosen': -477.8890380859375, 'logps/rejected': -701.5665283203125, 'logps/ref_chosen': -58.273193359375, 'logps/ref_rejected': -95.95089721679688, 'KL/chosen_KL_mean': -419.6158447265625, 'KL/rejected_KL_mean': -605.6156005859375, 'KL/mean': -512.61572265625, 'KL/std': 285.9530029296875, 'logits/chosen': -0.6025904417037964, 'logits/rejected': -0.591471791267395, 'epoch': 0.64} + 64%|██████▎ | 433/681 [18:23<11:07, 2.69s/it] 64%|██████▎ | 434/681 [18:25<11:01, 2.68s/it] {'loss': 1.112, 'grad_norm': 20.974876403808594, 'learning_rate': 1.7666166140378853e-07, 'fcm_dpo/beta': 0.0016441468615084887, 'fcm_dpo/q_t': 0.41576558351516724, 'fcm_dpo/delta': 0.017331628128886223, 'fcm_dpo/margin': 233.06314086914062, 'margin_dpo/margin_mean': 233.06314086914062, 'margin_dpo/margin_std': 350.6734619140625, 'logps/chosen': -416.8576354980469, 'logps/rejected': -666.4456787109375, 'logps/ref_chosen': -61.97370147705078, 'logps/ref_rejected': -78.49861145019531, 'KL/chosen_KL_mean': -354.8839111328125, 'KL/rejected_KL_mean': -587.9470825195312, 'KL/mean': -471.41546630859375, 'KL/std': 279.48162841796875, 'logits/chosen': -0.5894551873207092, 'logits/rejected': -0.5889327526092529, 'epoch': 0.64} + 64%|██████▎ | 434/681 [18:25<11:01, 2.68s/it] 64%|██████▍ | 435/681 [18:27<10:26, 2.55s/it] {'loss': 1.0747, 'grad_norm': 31.517038345336914, 'learning_rate': 1.7543576401928218e-07, 'fcm_dpo/beta': 0.001645256532356143, 'fcm_dpo/q_t': 0.40314286947250366, 'fcm_dpo/delta': -0.016068164259195328, 'fcm_dpo/margin': 252.4639129638672, 'margin_dpo/margin_mean': 252.46392822265625, 'margin_dpo/margin_std': 302.1197509765625, 'logps/chosen': -391.21160888671875, 'logps/rejected': -679.7403564453125, 'logps/ref_chosen': -51.502052307128906, 'logps/ref_rejected': -87.56689453125, 'KL/chosen_KL_mean': -339.70953369140625, 'KL/rejected_KL_mean': -592.1734619140625, 'KL/mean': -465.9414978027344, 'KL/std': 277.168701171875, 'logits/chosen': -0.6623108386993408, 'logits/rejected': -0.656818151473999, 'epoch': 0.64} + 64%|██████▍ | 435/681 [18:27<10:26, 2.55s/it] 64%|██████▍ | 436/681 [18:30<10:29, 2.57s/it] {'loss': 1.1117, 'grad_norm': 41.7802619934082, 'learning_rate': 1.742118314717391e-07, 'fcm_dpo/beta': 0.0016492058057338, 'fcm_dpo/q_t': 0.4155094027519226, 'fcm_dpo/delta': 0.03687084838747978, 'fcm_dpo/margin': 220.94302368164062, 'margin_dpo/margin_mean': 220.94302368164062, 'margin_dpo/margin_std': 296.26336669921875, 'logps/chosen': -432.8529052734375, 'logps/rejected': -665.1199951171875, 'logps/ref_chosen': -71.40371704101562, 'logps/ref_rejected': -82.72775268554688, 'KL/chosen_KL_mean': -361.44921875, 'KL/rejected_KL_mean': -582.3922119140625, 'KL/mean': -471.92071533203125, 'KL/std': 250.78880310058594, 'logits/chosen': -0.604122519493103, 'logits/rejected': -0.5753868222236633, 'epoch': 0.64} + 64%|██████▍ | 436/681 [18:30<10:29, 2.57s/it] 64%|██████▍ | 437/681 [18:33<10:36, 2.61s/it] {'loss': 1.0971, 'grad_norm': 22.802751541137695, 'learning_rate': 1.7298989601292036e-07, 'fcm_dpo/beta': 0.0016615703934803605, 'fcm_dpo/q_t': 0.4112783670425415, 'fcm_dpo/delta': 0.021829720586538315, 'fcm_dpo/margin': 228.0978240966797, 'margin_dpo/margin_mean': 228.09780883789062, 'margin_dpo/margin_std': 287.695556640625, 'logps/chosen': -428.69610595703125, 'logps/rejected': -674.0932006835938, 'logps/ref_chosen': -64.7442626953125, 'logps/ref_rejected': -82.04356384277344, 'KL/chosen_KL_mean': -363.95184326171875, 'KL/rejected_KL_mean': -592.0496826171875, 'KL/mean': -478.0007629394531, 'KL/std': 241.47042846679688, 'logits/chosen': -0.6008163690567017, 'logits/rejected': -0.5768181681632996, 'epoch': 0.64} + 64%|██████▍ | 437/681 [18:33<10:36, 2.61s/it] 64%|██████▍ | 438/681 [18:35<10:19, 2.55s/it] {'loss': 1.0575, 'grad_norm': 27.77182388305664, 'learning_rate': 1.7176998984196144e-07, 'fcm_dpo/beta': 0.0016493103466928005, 'fcm_dpo/q_t': 0.39925286173820496, 'fcm_dpo/delta': -0.03368060290813446, 'fcm_dpo/margin': 261.7484436035156, 'margin_dpo/margin_mean': 261.7484436035156, 'margin_dpo/margin_std': 294.62152099609375, 'logps/chosen': -434.455810546875, 'logps/rejected': -720.2623901367188, 'logps/ref_chosen': -59.0186653137207, 'logps/ref_rejected': -83.07682800292969, 'KL/chosen_KL_mean': -375.4371337890625, 'KL/rejected_KL_mean': -637.185546875, 'KL/mean': -506.3113708496094, 'KL/std': 277.33953857421875, 'logits/chosen': -0.6229407787322998, 'logits/rejected': -0.6052076816558838, 'epoch': 0.64} + 64%|██████▍ | 438/681 [18:35<10:19, 2.55s/it] 64%|██████▍ | 439/681 [18:38<10:07, 2.51s/it] {'loss': 1.1492, 'grad_norm': 28.284347534179688, 'learning_rate': 1.7055214510452458e-07, 'fcm_dpo/beta': 0.001632218947634101, 'fcm_dpo/q_t': 0.42261505126953125, 'fcm_dpo/delta': -0.06549854576587677, 'fcm_dpo/margin': 206.4326171875, 'margin_dpo/margin_mean': 206.43260192871094, 'margin_dpo/margin_std': 320.11016845703125, 'logps/chosen': -450.8058776855469, 'logps/rejected': -687.4398803710938, 'logps/ref_chosen': -53.78407669067383, 'logps/ref_rejected': -83.98545837402344, 'KL/chosen_KL_mean': -397.02178955078125, 'KL/rejected_KL_mean': -603.4544067382812, 'KL/mean': -500.23809814453125, 'KL/std': 279.9316711425781, 'logits/chosen': -0.6147496700286865, 'logits/rejected': -0.6177977323532104, 'epoch': 0.64} + 64%|██████▍ | 439/681 [18:38<10:07, 2.51s/it] 65%|██████▍ | 440/681 [18:40<09:55, 2.47s/it] {'loss': 1.0939, 'grad_norm': 33.926116943359375, 'learning_rate': 1.6933639389195134e-07, 'fcm_dpo/beta': 0.0016363917384296656, 'fcm_dpo/q_t': 0.4104476571083069, 'fcm_dpo/delta': -0.0012616775929927826, 'fcm_dpo/margin': 245.09262084960938, 'margin_dpo/margin_mean': 245.0926513671875, 'margin_dpo/margin_std': 338.4553527832031, 'logps/chosen': -505.5780944824219, 'logps/rejected': -768.601806640625, 'logps/ref_chosen': -78.56671905517578, 'logps/ref_rejected': -96.49775695800781, 'KL/chosen_KL_mean': -427.0113525390625, 'KL/rejected_KL_mean': -672.10400390625, 'KL/mean': -549.5577392578125, 'KL/std': 337.44097900390625, 'logits/chosen': -0.6553194522857666, 'logits/rejected': -0.6481237411499023, 'epoch': 0.65} + 65%|██████▍ | 440/681 [18:40<09:55, 2.47s/it] 65%|██████▍ | 441/681 [18:43<10:05, 2.52s/it] {'loss': 1.1354, 'grad_norm': 35.49950408935547, 'learning_rate': 1.681227682404166e-07, 'fcm_dpo/beta': 0.0016381317982450128, 'fcm_dpo/q_t': 0.4130883812904358, 'fcm_dpo/delta': 0.0013750754296779633, 'fcm_dpo/margin': 243.21163940429688, 'margin_dpo/margin_mean': 243.21163940429688, 'margin_dpo/margin_std': 415.7077331542969, 'logps/chosen': -538.86083984375, 'logps/rejected': -817.7189331054688, 'logps/ref_chosen': -60.824440002441406, 'logps/ref_rejected': -96.47080993652344, 'KL/chosen_KL_mean': -478.03643798828125, 'KL/rejected_KL_mean': -721.2481689453125, 'KL/mean': -599.6422729492188, 'KL/std': 347.5352783203125, 'logits/chosen': -0.6869616508483887, 'logits/rejected': -0.6749493479728699, 'epoch': 0.65} + 65%|██████▍ | 441/681 [18:43<10:05, 2.52s/it] 65%|██████▍ | 442/681 [18:45<09:53, 2.48s/it] {'loss': 1.0672, 'grad_norm': 30.064672470092773, 'learning_rate': 1.669113001300851e-07, 'fcm_dpo/beta': 0.0016285094898194075, 'fcm_dpo/q_t': 0.39779287576675415, 'fcm_dpo/delta': -0.06479034572839737, 'fcm_dpo/margin': 283.212890625, 'margin_dpo/margin_mean': 283.212890625, 'margin_dpo/margin_std': 374.67840576171875, 'logps/chosen': -462.8663635253906, 'logps/rejected': -775.6072998046875, 'logps/ref_chosen': -47.01121520996094, 'logps/ref_rejected': -76.53926086425781, 'KL/chosen_KL_mean': -415.85516357421875, 'KL/rejected_KL_mean': -699.0679931640625, 'KL/mean': -557.4616088867188, 'KL/std': 336.6131591796875, 'logits/chosen': -0.6426968574523926, 'logits/rejected': -0.6355198621749878, 'epoch': 0.65} + 65%|██████▍ | 442/681 [18:45<09:53, 2.48s/it] 65%|██████▌ | 443/681 [18:47<09:57, 2.51s/it] {'loss': 1.2139, 'grad_norm': 37.92525863647461, 'learning_rate': 1.6570202148426815e-07, 'fcm_dpo/beta': 0.0016116888727992773, 'fcm_dpo/q_t': 0.4350769817829132, 'fcm_dpo/delta': 0.0013880077749490738, 'fcm_dpo/margin': 179.22032165527344, 'margin_dpo/margin_mean': 179.22032165527344, 'margin_dpo/margin_std': 391.09747314453125, 'logps/chosen': -543.9017333984375, 'logps/rejected': -738.5289306640625, 'logps/ref_chosen': -71.27301788330078, 'logps/ref_rejected': -86.679931640625, 'KL/chosen_KL_mean': -472.62872314453125, 'KL/rejected_KL_mean': -651.8489990234375, 'KL/mean': -562.2388305664062, 'KL/std': 327.6013488769531, 'logits/chosen': -0.6569748520851135, 'logits/rejected': -0.636266827583313, 'epoch': 0.65} + 65%|██████▌ | 443/681 [18:48<09:57, 2.51s/it] 65%|██████▌ | 444/681 [18:50<10:00, 2.53s/it] {'loss': 1.0506, 'grad_norm': 28.29939842224121, 'learning_rate': 1.6449496416858282e-07, 'fcm_dpo/beta': 0.001586769474670291, 'fcm_dpo/q_t': 0.3927891254425049, 'fcm_dpo/delta': -0.0820961445569992, 'fcm_dpo/margin': 301.04541015625, 'margin_dpo/margin_mean': 301.04541015625, 'margin_dpo/margin_std': 390.89599609375, 'logps/chosen': -518.9641723632812, 'logps/rejected': -860.05078125, 'logps/ref_chosen': -57.213706970214844, 'logps/ref_rejected': -97.25489807128906, 'KL/chosen_KL_mean': -461.75042724609375, 'KL/rejected_KL_mean': -762.7958984375, 'KL/mean': -612.273193359375, 'KL/std': 354.7071533203125, 'logits/chosen': -0.606819748878479, 'logits/rejected': -0.6156275272369385, 'epoch': 0.65} + 65%|██████▌ | 444/681 [18:50<10:00, 2.53s/it] 65%|██████▌ | 445/681 [18:53<10:20, 2.63s/it] {'loss': 1.0706, 'grad_norm': 31.61493492126465, 'learning_rate': 1.6329015999011182e-07, 'fcm_dpo/beta': 0.0015799321699887514, 'fcm_dpo/q_t': 0.4012291133403778, 'fcm_dpo/delta': -0.03441721200942993, 'fcm_dpo/margin': 274.0172119140625, 'margin_dpo/margin_mean': 274.0172424316406, 'margin_dpo/margin_std': 343.6361083984375, 'logps/chosen': -470.21807861328125, 'logps/rejected': -769.6182250976562, 'logps/ref_chosen': -67.29979705810547, 'logps/ref_rejected': -92.68267059326172, 'KL/chosen_KL_mean': -402.9183044433594, 'KL/rejected_KL_mean': -676.935546875, 'KL/mean': -539.9268798828125, 'KL/std': 278.75872802734375, 'logits/chosen': -0.6219183206558228, 'logits/rejected': -0.6123214960098267, 'epoch': 0.65} + 65%|██████▌ | 445/681 [18:53<10:20, 2.63s/it] 65%|██████▌ | 446/681 [18:56<10:15, 2.62s/it] {'loss': 1.0286, 'grad_norm': 33.535675048828125, 'learning_rate': 1.6208764069656578e-07, 'fcm_dpo/beta': 0.0015673264861106873, 'fcm_dpo/q_t': 0.3905888795852661, 'fcm_dpo/delta': -0.0700267031788826, 'fcm_dpo/margin': 297.6376037597656, 'margin_dpo/margin_mean': 297.6376037597656, 'margin_dpo/margin_std': 300.60076904296875, 'logps/chosen': -417.61981201171875, 'logps/rejected': -757.423095703125, 'logps/ref_chosen': -59.098487854003906, 'logps/ref_rejected': -101.26419067382812, 'KL/chosen_KL_mean': -358.52130126953125, 'KL/rejected_KL_mean': -656.158935546875, 'KL/mean': -507.340087890625, 'KL/std': 294.689453125, 'logits/chosen': -0.6319636106491089, 'logits/rejected': -0.6446236371994019, 'epoch': 0.65} + 65%|██████▌ | 446/681 [18:56<10:15, 2.62s/it] 66%|██████▌ | 447/681 [18:58<09:56, 2.55s/it] {'loss': 1.0422, 'grad_norm': 30.70143699645996, 'learning_rate': 1.608874379754465e-07, 'fcm_dpo/beta': 0.001533093280158937, 'fcm_dpo/q_t': 0.39436084032058716, 'fcm_dpo/delta': -0.06830260902643204, 'fcm_dpo/margin': 303.20330810546875, 'margin_dpo/margin_mean': 303.20330810546875, 'margin_dpo/margin_std': 361.6494140625, 'logps/chosen': -426.53448486328125, 'logps/rejected': -772.357177734375, 'logps/ref_chosen': -56.07533264160156, 'logps/ref_rejected': -98.69475555419922, 'KL/chosen_KL_mean': -370.45916748046875, 'KL/rejected_KL_mean': -673.6624755859375, 'KL/mean': -522.060791015625, 'KL/std': 334.0948486328125, 'logits/chosen': -0.6682947874069214, 'logits/rejected': -0.6798413395881653, 'epoch': 0.66} + 66%|██████▌ | 447/681 [18:58<09:56, 2.55s/it] 66%|██████▌ | 448/681 [19:00<09:55, 2.55s/it] {'loss': 1.0647, 'grad_norm': 32.050010681152344, 'learning_rate': 1.5968958345321177e-07, 'fcm_dpo/beta': 0.001529197907075286, 'fcm_dpo/q_t': 0.4007987380027771, 'fcm_dpo/delta': -0.03181178867816925, 'fcm_dpo/margin': 281.41815185546875, 'margin_dpo/margin_mean': 281.41815185546875, 'margin_dpo/margin_std': 337.77032470703125, 'logps/chosen': -472.7381591796875, 'logps/rejected': -796.4171142578125, 'logps/ref_chosen': -60.00384521484375, 'logps/ref_rejected': -102.26465606689453, 'KL/chosen_KL_mean': -412.73431396484375, 'KL/rejected_KL_mean': -694.1524658203125, 'KL/mean': -553.443359375, 'KL/std': 292.61798095703125, 'logits/chosen': -0.5674072504043579, 'logits/rejected': -0.5757938623428345, 'epoch': 0.66} + 66%|██████▌ | 448/681 [19:01<09:55, 2.55s/it] 66%|██████▌ | 449/681 [19:03<09:50, 2.55s/it] {'loss': 1.0779, 'grad_norm': 29.442913055419922, 'learning_rate': 1.584941086944423e-07, 'fcm_dpo/beta': 0.0015087838983163238, 'fcm_dpo/q_t': 0.4015204608440399, 'fcm_dpo/delta': -0.046950291842222214, 'fcm_dpo/margin': 294.76324462890625, 'margin_dpo/margin_mean': 294.76324462890625, 'margin_dpo/margin_std': 421.296142578125, 'logps/chosen': -472.2169189453125, 'logps/rejected': -788.0504760742188, 'logps/ref_chosen': -67.52661895751953, 'logps/ref_rejected': -88.59690856933594, 'KL/chosen_KL_mean': -404.6903076171875, 'KL/rejected_KL_mean': -699.4535522460938, 'KL/mean': -552.0719604492188, 'KL/std': 354.35406494140625, 'logits/chosen': -0.6377573013305664, 'logits/rejected': -0.6385862231254578, 'epoch': 0.66} + 66%|██████▌ | 449/681 [19:03<09:50, 2.55s/it] 66%|██████▌ | 450/681 [19:06<09:47, 2.54s/it] {'loss': 1.0229, 'grad_norm': 41.06943893432617, 'learning_rate': 1.573010452010098e-07, 'fcm_dpo/beta': 0.0014938064850866795, 'fcm_dpo/q_t': 0.3899462819099426, 'fcm_dpo/delta': -0.07037577033042908, 'fcm_dpo/margin': 312.7186584472656, 'margin_dpo/margin_mean': 312.7186584472656, 'margin_dpo/margin_std': 310.42205810546875, 'logps/chosen': -388.0813293457031, 'logps/rejected': -746.44677734375, 'logps/ref_chosen': -57.10811996459961, 'logps/ref_rejected': -102.75494384765625, 'KL/chosen_KL_mean': -330.97320556640625, 'KL/rejected_KL_mean': -643.69189453125, 'KL/mean': -487.33258056640625, 'KL/std': 313.515625, 'logits/chosen': -0.6237972974777222, 'logits/rejected': -0.6363176107406616, 'epoch': 0.66} + 66%|██████▌ | 450/681 [19:06<09:47, 2.54s/it] 66%|██████▌ | 451/681 [19:08<09:23, 2.45s/it] {'loss': 1.1564, 'grad_norm': 32.329261779785156, 'learning_rate': 1.5611042441124687e-07, 'fcm_dpo/beta': 0.0015017553232610226, 'fcm_dpo/q_t': 0.41802603006362915, 'fcm_dpo/delta': 0.052446845918893814, 'fcm_dpo/margin': 232.50131225585938, 'margin_dpo/margin_mean': 232.50131225585938, 'margin_dpo/margin_std': 410.9925537109375, 'logps/chosen': -498.35693359375, 'logps/rejected': -745.3187866210938, 'logps/ref_chosen': -58.46883010864258, 'logps/ref_rejected': -72.92941284179688, 'KL/chosen_KL_mean': -439.8880920410156, 'KL/rejected_KL_mean': -672.389404296875, 'KL/mean': -556.1387329101562, 'KL/std': 351.8367004394531, 'logits/chosen': -0.6703058481216431, 'logits/rejected': -0.6499719619750977, 'epoch': 0.66} + 66%|██████▌ | 451/681 [19:08<09:23, 2.45s/it] 66%|██████▋ | 452/681 [19:10<09:20, 2.45s/it] {'loss': 1.0545, 'grad_norm': 22.36501693725586, 'learning_rate': 1.549222776991186e-07, 'fcm_dpo/beta': 0.001496224314905703, 'fcm_dpo/q_t': 0.4016070067882538, 'fcm_dpo/delta': -0.015626225620508194, 'fcm_dpo/margin': 277.29498291015625, 'margin_dpo/margin_mean': 277.29498291015625, 'margin_dpo/margin_std': 275.4556579589844, 'logps/chosen': -357.9068908691406, 'logps/rejected': -682.582763671875, 'logps/ref_chosen': -50.39055252075195, 'logps/ref_rejected': -97.77142333984375, 'KL/chosen_KL_mean': -307.5163269042969, 'KL/rejected_KL_mean': -584.8113403320312, 'KL/mean': -446.1638488769531, 'KL/std': 270.72735595703125, 'logits/chosen': -0.5967873930931091, 'logits/rejected': -0.6146633625030518, 'epoch': 0.66} + 66%|██████▋ | 452/681 [19:10<09:20, 2.45s/it] 67%|██████▋ | 453/681 [19:13<09:08, 2.41s/it] {'loss': 1.0961, 'grad_norm': 27.039974212646484, 'learning_rate': 1.5373663637339584e-07, 'fcm_dpo/beta': 0.001497291261330247, 'fcm_dpo/q_t': 0.4115417003631592, 'fcm_dpo/delta': 0.013492653146386147, 'fcm_dpo/margin': 258.44171142578125, 'margin_dpo/margin_mean': 258.44171142578125, 'margin_dpo/margin_std': 340.037353515625, 'logps/chosen': -428.0203857421875, 'logps/rejected': -710.9547119140625, 'logps/ref_chosen': -57.71485137939453, 'logps/ref_rejected': -82.20741271972656, 'KL/chosen_KL_mean': -370.3055419921875, 'KL/rejected_KL_mean': -628.7472534179688, 'KL/mean': -499.5263671875, 'KL/std': 281.7098388671875, 'logits/chosen': -0.6106635332107544, 'logits/rejected': -0.5978103876113892, 'epoch': 0.67} + 67%|██████▋ | 453/681 [19:13<09:08, 2.41s/it] 67%|██████▋ | 454/681 [19:15<09:12, 2.44s/it] {'loss': 1.0565, 'grad_norm': 27.240726470947266, 'learning_rate': 1.5255353167683017e-07, 'fcm_dpo/beta': 0.0014888541772961617, 'fcm_dpo/q_t': 0.3986594080924988, 'fcm_dpo/delta': -0.041121020913124084, 'fcm_dpo/margin': 294.9285888671875, 'margin_dpo/margin_mean': 294.9285888671875, 'margin_dpo/margin_std': 350.4466857910156, 'logps/chosen': -501.8115234375, 'logps/rejected': -820.7452392578125, 'logps/ref_chosen': -60.945648193359375, 'logps/ref_rejected': -84.95079040527344, 'KL/chosen_KL_mean': -440.86590576171875, 'KL/rejected_KL_mean': -735.7944946289062, 'KL/mean': -588.3302001953125, 'KL/std': 319.46673583984375, 'logits/chosen': -0.6478193402290344, 'logits/rejected': -0.6373401880264282, 'epoch': 0.67} + 67%|██████▋ | 454/681 [19:15<09:12, 2.44s/it] 67%|██████▋ | 455/681 [19:17<09:07, 2.42s/it] {'loss': 1.0187, 'grad_norm': 32.356143951416016, 'learning_rate': 1.5137299478533064e-07, 'fcm_dpo/beta': 0.0014744448708370328, 'fcm_dpo/q_t': 0.3856102526187897, 'fcm_dpo/delta': -0.10616149008274078, 'fcm_dpo/margin': 339.64556884765625, 'margin_dpo/margin_mean': 339.64556884765625, 'margin_dpo/margin_std': 368.43084716796875, 'logps/chosen': -429.5153503417969, 'logps/rejected': -839.57568359375, 'logps/ref_chosen': -44.88671112060547, 'logps/ref_rejected': -115.30147552490234, 'KL/chosen_KL_mean': -384.628662109375, 'KL/rejected_KL_mean': -724.274169921875, 'KL/mean': -554.451416015625, 'KL/std': 327.93011474609375, 'logits/chosen': -0.6396904587745667, 'logits/rejected': -0.6583499908447266, 'epoch': 0.67} + 67%|██████▋ | 455/681 [19:17<09:07, 2.42s/it] 67%|██████▋ | 456/681 [19:20<09:15, 2.47s/it] {'loss': 1.0142, 'grad_norm': 29.948514938354492, 'learning_rate': 1.5019505680714232e-07, 'fcm_dpo/beta': 0.0014414741890504956, 'fcm_dpo/q_t': 0.38874322175979614, 'fcm_dpo/delta': -0.08550744503736496, 'fcm_dpo/margin': 334.0072021484375, 'margin_dpo/margin_mean': 334.0072021484375, 'margin_dpo/margin_std': 335.52606201171875, 'logps/chosen': -468.43817138671875, 'logps/rejected': -850.6264038085938, 'logps/ref_chosen': -57.036781311035156, 'logps/ref_rejected': -105.21784210205078, 'KL/chosen_KL_mean': -411.4013977050781, 'KL/rejected_KL_mean': -745.4085693359375, 'KL/mean': -578.405029296875, 'KL/std': 345.68756103515625, 'logits/chosen': -0.6326814889907837, 'logits/rejected': -0.6543838977813721, 'epoch': 0.67} + 67%|██████▋ | 456/681 [19:20<09:15, 2.47s/it] 67%|██████▋ | 457/681 [19:23<09:35, 2.57s/it] {'loss': 1.0146, 'grad_norm': 36.677040100097656, 'learning_rate': 1.4901974878202627e-07, 'fcm_dpo/beta': 0.0014121406711637974, 'fcm_dpo/q_t': 0.3872183859348297, 'fcm_dpo/delta': -0.08430389314889908, 'fcm_dpo/margin': 339.88275146484375, 'margin_dpo/margin_mean': 339.8827209472656, 'margin_dpo/margin_std': 330.81744384765625, 'logps/chosen': -465.0486145019531, 'logps/rejected': -835.79833984375, 'logps/ref_chosen': -54.24253845214844, 'logps/ref_rejected': -85.10956573486328, 'KL/chosen_KL_mean': -410.80609130859375, 'KL/rejected_KL_mean': -750.6888427734375, 'KL/mean': -580.7474365234375, 'KL/std': 341.3404541015625, 'logits/chosen': -0.6665825843811035, 'logits/rejected': -0.6668688058853149, 'epoch': 0.67} + 67%|██████▋ | 457/681 [19:23<09:35, 2.57s/it] 67%|██████▋ | 458/681 [19:25<09:28, 2.55s/it] {'loss': 1.0701, 'grad_norm': 24.763858795166016, 'learning_rate': 1.4784710168044212e-07, 'fcm_dpo/beta': 0.0013981210067868233, 'fcm_dpo/q_t': 0.40338659286499023, 'fcm_dpo/delta': -0.0246875062584877, 'fcm_dpo/margin': 302.67303466796875, 'margin_dpo/margin_mean': 302.6730651855469, 'margin_dpo/margin_std': 369.3429260253906, 'logps/chosen': -485.75799560546875, 'logps/rejected': -830.7054443359375, 'logps/ref_chosen': -55.40888214111328, 'logps/ref_rejected': -97.68325805664062, 'KL/chosen_KL_mean': -430.34912109375, 'KL/rejected_KL_mean': -733.022216796875, 'KL/mean': -581.6856689453125, 'KL/std': 319.516357421875, 'logits/chosen': -0.6812525987625122, 'logits/rejected': -0.6768727898597717, 'epoch': 0.67} + 67%|██████▋ | 458/681 [19:25<09:28, 2.55s/it] 67%|██████▋ | 459/681 [19:28<09:27, 2.56s/it] {'loss': 1.0536, 'grad_norm': 32.20987319946289, 'learning_rate': 1.466771464027316e-07, 'fcm_dpo/beta': 0.001385183772072196, 'fcm_dpo/q_t': 0.3948526680469513, 'fcm_dpo/delta': -0.0625496357679367, 'fcm_dpo/margin': 331.5897216796875, 'margin_dpo/margin_mean': 331.5897521972656, 'margin_dpo/margin_std': 406.781982421875, 'logps/chosen': -505.96185302734375, 'logps/rejected': -877.1627197265625, 'logps/ref_chosen': -46.55748748779297, 'logps/ref_rejected': -86.16854095458984, 'KL/chosen_KL_mean': -459.40435791015625, 'KL/rejected_KL_mean': -790.994140625, 'KL/mean': -625.1992797851562, 'KL/std': 361.43035888671875, 'logits/chosen': -0.6852984428405762, 'logits/rejected': -0.7049773931503296, 'epoch': 0.67} + 67%|██████▋ | 459/681 [19:28<09:27, 2.56s/it] 68%|██████▊ | 460/681 [19:31<09:32, 2.59s/it] {'loss': 1.0176, 'grad_norm': 40.046512603759766, 'learning_rate': 1.4550991377830423e-07, 'fcm_dpo/beta': 0.0013653924688696861, 'fcm_dpo/q_t': 0.3863321542739868, 'fcm_dpo/delta': -0.10034875571727753, 'fcm_dpo/margin': 362.849609375, 'margin_dpo/margin_mean': 362.849609375, 'margin_dpo/margin_std': 396.6766357421875, 'logps/chosen': -564.1296997070312, 'logps/rejected': -979.4637451171875, 'logps/ref_chosen': -51.63489532470703, 'logps/ref_rejected': -104.11935424804688, 'KL/chosen_KL_mean': -512.4947509765625, 'KL/rejected_KL_mean': -875.3443603515625, 'KL/mean': -693.9195556640625, 'KL/std': 360.5986328125, 'logits/chosen': -0.7233697772026062, 'logits/rejected': -0.7532409429550171, 'epoch': 0.68} + 68%|██████▊ | 460/681 [19:31<09:32, 2.59s/it] 68%|██████▊ | 461/681 [19:33<09:29, 2.59s/it] {'loss': 1.1035, 'grad_norm': 25.319110870361328, 'learning_rate': 1.4434543456482518e-07, 'fcm_dpo/beta': 0.0013628401793539524, 'fcm_dpo/q_t': 0.41146761178970337, 'fcm_dpo/delta': 0.008031336590647697, 'fcm_dpo/margin': 287.7176208496094, 'margin_dpo/margin_mean': 287.71759033203125, 'margin_dpo/margin_std': 409.4786071777344, 'logps/chosen': -604.1192626953125, 'logps/rejected': -923.1318359375, 'logps/ref_chosen': -55.18195724487305, 'logps/ref_rejected': -86.47689819335938, 'KL/chosen_KL_mean': -548.9373779296875, 'KL/rejected_KL_mean': -836.6549072265625, 'KL/mean': -692.796142578125, 'KL/std': 373.7737731933594, 'logits/chosen': -0.7724506855010986, 'logits/rejected': -0.7853858470916748, 'epoch': 0.68} + 68%|██████▊ | 461/681 [19:33<09:29, 2.59s/it] 68%|██████▊ | 462/681 [19:36<09:20, 2.56s/it] {'loss': 1.1705, 'grad_norm': 43.80839920043945, 'learning_rate': 1.4318373944740484e-07, 'fcm_dpo/beta': 0.0013765160692855716, 'fcm_dpo/q_t': 0.428949773311615, 'fcm_dpo/delta': 0.08537392318248749, 'fcm_dpo/margin': 230.5576171875, 'margin_dpo/margin_mean': 230.55758666992188, 'margin_dpo/margin_std': 416.346923828125, 'logps/chosen': -629.663818359375, 'logps/rejected': -869.134521484375, 'logps/ref_chosen': -69.92803192138672, 'logps/ref_rejected': -78.84111022949219, 'KL/chosen_KL_mean': -559.73583984375, 'KL/rejected_KL_mean': -790.2933959960938, 'KL/mean': -675.0146484375, 'KL/std': 367.716552734375, 'logits/chosen': -0.8070446848869324, 'logits/rejected': -0.7969012260437012, 'epoch': 0.68} + 68%|██████▊ | 462/681 [19:36<09:20, 2.56s/it] 68%|██████▊ | 463/681 [19:38<09:10, 2.53s/it] {'loss': 1.0939, 'grad_norm': 37.92364501953125, 'learning_rate': 1.4202485903778976e-07, 'fcm_dpo/beta': 0.0013845614157617092, 'fcm_dpo/q_t': 0.405579149723053, 'fcm_dpo/delta': -0.02077137678861618, 'fcm_dpo/margin': 303.08685302734375, 'margin_dpo/margin_mean': 303.08685302734375, 'margin_dpo/margin_std': 430.80828857421875, 'logps/chosen': -614.5189208984375, 'logps/rejected': -951.3563842773438, 'logps/ref_chosen': -55.27437210083008, 'logps/ref_rejected': -89.02497863769531, 'KL/chosen_KL_mean': -559.2445068359375, 'KL/rejected_KL_mean': -862.3314208984375, 'KL/mean': -710.7879638671875, 'KL/std': 379.8200378417969, 'logits/chosen': -0.7985125780105591, 'logits/rejected': -0.8062667846679688, 'epoch': 0.68} + 68%|██████▊ | 463/681 [19:38<09:10, 2.53s/it] 68%|██████▊ | 464/681 [19:40<08:51, 2.45s/it] {'loss': 0.936, 'grad_norm': 38.354400634765625, 'learning_rate': 1.4086882387355658e-07, 'fcm_dpo/beta': 0.0013179676607251167, 'fcm_dpo/q_t': 0.3569212555885315, 'fcm_dpo/delta': -0.24893316626548767, 'fcm_dpo/margin': 478.4560546875, 'margin_dpo/margin_mean': 478.4560546875, 'margin_dpo/margin_std': 467.5229187011719, 'logps/chosen': -606.620849609375, 'logps/rejected': -1136.654052734375, 'logps/ref_chosen': -50.91230010986328, 'logps/ref_rejected': -102.4893798828125, 'KL/chosen_KL_mean': -555.7085571289062, 'KL/rejected_KL_mean': -1034.1646728515625, 'KL/mean': -794.9366455078125, 'KL/std': 459.53875732421875, 'logits/chosen': -0.7893344163894653, 'logits/rejected': -0.8504258990287781, 'epoch': 0.68} + 68%|██████▊ | 464/681 [19:40<08:51, 2.45s/it] 68%|██████▊ | 465/681 [19:43<08:56, 2.49s/it] {'loss': 1.046, 'grad_norm': 38.300540924072266, 'learning_rate': 1.3971566441730714e-07, 'fcm_dpo/beta': 0.0012960683088749647, 'fcm_dpo/q_t': 0.3856911063194275, 'fcm_dpo/delta': -0.09770198166370392, 'fcm_dpo/margin': 380.36016845703125, 'margin_dpo/margin_mean': 380.36016845703125, 'margin_dpo/margin_std': 488.3748779296875, 'logps/chosen': -624.9694213867188, 'logps/rejected': -1059.15869140625, 'logps/ref_chosen': -60.116851806640625, 'logps/ref_rejected': -113.94602966308594, 'KL/chosen_KL_mean': -564.8525390625, 'KL/rejected_KL_mean': -945.2127075195312, 'KL/mean': -755.03271484375, 'KL/std': 466.654052734375, 'logits/chosen': -0.793292760848999, 'logits/rejected': -0.8128570318222046, 'epoch': 0.68} + 68%|██████▊ | 465/681 [19:43<08:56, 2.49s/it] 68%|██████▊ | 466/681 [19:46<09:03, 2.53s/it] {'loss': 1.0959, 'grad_norm': 36.897422790527344, 'learning_rate': 1.3856541105586545e-07, 'fcm_dpo/beta': 0.0012731440365314484, 'fcm_dpo/q_t': 0.40266337990760803, 'fcm_dpo/delta': -0.028695937246084213, 'fcm_dpo/margin': 335.2580261230469, 'margin_dpo/margin_mean': 335.2580261230469, 'margin_dpo/margin_std': 487.9477233886719, 'logps/chosen': -678.9547119140625, 'logps/rejected': -1051.607177734375, 'logps/ref_chosen': -52.920921325683594, 'logps/ref_rejected': -90.3154296875, 'KL/chosen_KL_mean': -626.0338134765625, 'KL/rejected_KL_mean': -961.291748046875, 'KL/mean': -793.6627197265625, 'KL/std': 443.3585205078125, 'logits/chosen': -0.8305766582489014, 'logits/rejected': -0.8335669040679932, 'epoch': 0.68} + 68%|██████▊ | 466/681 [19:46<09:03, 2.53s/it] 69%|██████▊ | 467/681 [19:48<08:58, 2.52s/it] {'loss': 1.1501, 'grad_norm': 49.63898468017578, 'learning_rate': 1.3741809409947729e-07, 'fcm_dpo/beta': 0.0012568333186209202, 'fcm_dpo/q_t': 0.403271347284317, 'fcm_dpo/delta': -0.049399569630622864, 'fcm_dpo/margin': 354.17047119140625, 'margin_dpo/margin_mean': 354.17047119140625, 'margin_dpo/margin_std': 657.70361328125, 'logps/chosen': -865.343505859375, 'logps/rejected': -1243.658447265625, 'logps/ref_chosen': -78.7158203125, 'logps/ref_rejected': -102.86019897460938, 'KL/chosen_KL_mean': -786.627685546875, 'KL/rejected_KL_mean': -1140.7982177734375, 'KL/mean': -963.7129516601562, 'KL/std': 571.1072998046875, 'logits/chosen': -0.9358654022216797, 'logits/rejected': -0.9291361570358276, 'epoch': 0.69} + 69%|██████▊ | 467/681 [19:48<08:58, 2.52s/it] 69%|██████▊ | 468/681 [19:51<09:09, 2.58s/it] {'loss': 1.017, 'grad_norm': 39.59514617919922, 'learning_rate': 1.362737437810114e-07, 'fcm_dpo/beta': 0.0012354985810816288, 'fcm_dpo/q_t': 0.38068056106567383, 'fcm_dpo/delta': -0.15800079703330994, 'fcm_dpo/margin': 444.5817565917969, 'margin_dpo/margin_mean': 444.5817565917969, 'margin_dpo/margin_std': 578.4088745117188, 'logps/chosen': -692.8939208984375, 'logps/rejected': -1168.569091796875, 'logps/ref_chosen': -69.93536376953125, 'logps/ref_rejected': -101.02880859375, 'KL/chosen_KL_mean': -622.95849609375, 'KL/rejected_KL_mean': -1067.540283203125, 'KL/mean': -845.2493896484375, 'KL/std': 515.7662963867188, 'logits/chosen': -0.9017723798751831, 'logits/rejected': -0.9113543629646301, 'epoch': 0.69} + 69%|██████▊ | 468/681 [19:51<09:09, 2.58s/it] 69%|██████▉ | 469/681 [19:54<09:22, 2.65s/it] {'loss': 1.0151, 'grad_norm': 33.920169830322266, 'learning_rate': 1.351323902551631e-07, 'fcm_dpo/beta': 0.0011932153720408678, 'fcm_dpo/q_t': 0.37997373938560486, 'fcm_dpo/delta': -0.13762570917606354, 'fcm_dpo/margin': 443.07666015625, 'margin_dpo/margin_mean': 443.07666015625, 'margin_dpo/margin_std': 517.1961669921875, 'logps/chosen': -730.501220703125, 'logps/rejected': -1210.239501953125, 'logps/ref_chosen': -68.12469482421875, 'logps/ref_rejected': -104.78640747070312, 'KL/chosen_KL_mean': -662.3765258789062, 'KL/rejected_KL_mean': -1105.453125, 'KL/mean': -883.9148559570312, 'KL/std': 464.3486022949219, 'logits/chosen': -0.9452608227729797, 'logits/rejected': -0.9607683420181274, 'epoch': 0.69} + 69%|██████▉ | 469/681 [19:54<09:22, 2.65s/it] 69%|██████▉ | 470/681 [19:56<09:15, 2.63s/it] {'loss': 1.0719, 'grad_norm': 28.70859718322754, 'learning_rate': 1.339940635976592e-07, 'fcm_dpo/beta': 0.001185485627502203, 'fcm_dpo/q_t': 0.3954838514328003, 'fcm_dpo/delta': -0.06129393354058266, 'fcm_dpo/margin': 386.737548828125, 'margin_dpo/margin_mean': 386.737548828125, 'margin_dpo/margin_std': 531.3819580078125, 'logps/chosen': -604.9050903320312, 'logps/rejected': -1030.5535888671875, 'logps/ref_chosen': -43.791927337646484, 'logps/ref_rejected': -82.70285034179688, 'KL/chosen_KL_mean': -561.1131591796875, 'KL/rejected_KL_mean': -947.8507080078125, 'KL/mean': -754.48193359375, 'KL/std': 474.6820068359375, 'logits/chosen': -0.885380744934082, 'logits/rejected': -0.8969517350196838, 'epoch': 0.69} + 69%|██████▉ | 470/681 [19:56<09:15, 2.63s/it] 69%|██████▉ | 471/681 [19:58<08:53, 2.54s/it] {'loss': 1.1346, 'grad_norm': 37.73147964477539, 'learning_rate': 1.3285879380446563e-07, 'fcm_dpo/beta': 0.0011751014972105622, 'fcm_dpo/q_t': 0.41689130663871765, 'fcm_dpo/delta': 0.022231273353099823, 'fcm_dpo/margin': 321.73797607421875, 'margin_dpo/margin_mean': 321.73797607421875, 'margin_dpo/margin_std': 533.201904296875, 'logps/chosen': -789.7672119140625, 'logps/rejected': -1131.776123046875, 'logps/ref_chosen': -63.33952331542969, 'logps/ref_rejected': -83.61048126220703, 'KL/chosen_KL_mean': -726.427734375, 'KL/rejected_KL_mean': -1048.165771484375, 'KL/mean': -887.2966918945312, 'KL/std': 499.47198486328125, 'logits/chosen': -0.9937692880630493, 'logits/rejected': -1.0016134977340698, 'epoch': 0.69} + 69%|██████▉ | 471/681 [19:59<08:53, 2.54s/it] 69%|██████▉ | 472/681 [20:01<09:05, 2.61s/it] {'loss': 1.102, 'grad_norm': 35.72392272949219, 'learning_rate': 1.317266107909975e-07, 'fcm_dpo/beta': 0.0011670588282868266, 'fcm_dpo/q_t': 0.4006522297859192, 'fcm_dpo/delta': -0.07075389474630356, 'fcm_dpo/margin': 400.17333984375, 'margin_dpo/margin_mean': 400.17333984375, 'margin_dpo/margin_std': 645.8973999023438, 'logps/chosen': -796.4598388671875, 'logps/rejected': -1230.17626953125, 'logps/ref_chosen': -83.66610717773438, 'logps/ref_rejected': -117.20919799804688, 'KL/chosen_KL_mean': -712.793701171875, 'KL/rejected_KL_mean': -1112.967041015625, 'KL/mean': -912.88037109375, 'KL/std': 599.86572265625, 'logits/chosen': -0.954893946647644, 'logits/rejected': -0.93065345287323, 'epoch': 0.69} + 69%|██████▉ | 472/681 [20:01<09:05, 2.61s/it] 69%|██████▉ | 473/681 [20:04<09:08, 2.64s/it] {'loss': 1.3781, 'grad_norm': 116.40143585205078, 'learning_rate': 1.3059754439133002e-07, 'fcm_dpo/beta': 0.0011666135396808386, 'fcm_dpo/q_t': 0.45503491163253784, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 182.16502380371094, 'margin_dpo/margin_mean': 182.16500854492188, 'margin_dpo/margin_std': 820.9163208007812, 'logps/chosen': -915.9295654296875, 'logps/rejected': -1115.744140625, 'logps/ref_chosen': -63.49696731567383, 'logps/ref_rejected': -81.14657592773438, 'KL/chosen_KL_mean': -852.4325561523438, 'KL/rejected_KL_mean': -1034.5975341796875, 'KL/mean': -943.5150146484375, 'KL/std': 626.2051391601562, 'logits/chosen': -1.0035320520401, 'logits/rejected': -0.9779636859893799, 'epoch': 0.69} + 69%|██████▉ | 473/681 [20:04<09:08, 2.64s/it] 70%|██████▉ | 474/681 [20:07<09:10, 2.66s/it] {'loss': 1.1566, 'grad_norm': 36.65470886230469, 'learning_rate': 1.2947162435741277e-07, 'fcm_dpo/beta': 0.0011568089248612523, 'fcm_dpo/q_t': 0.41386541724205017, 'fcm_dpo/delta': -0.0847577303647995, 'fcm_dpo/margin': 327.47601318359375, 'margin_dpo/margin_mean': 327.47601318359375, 'margin_dpo/margin_std': 581.762939453125, 'logps/chosen': -738.7584838867188, 'logps/rejected': -1103.7030029296875, 'logps/ref_chosen': -52.6119384765625, 'logps/ref_rejected': -90.08041381835938, 'KL/chosen_KL_mean': -686.1465454101562, 'KL/rejected_KL_mean': -1013.62255859375, 'KL/mean': -849.884521484375, 'KL/std': 536.9088134765625, 'logits/chosen': -0.944141149520874, 'logits/rejected': -0.9484027624130249, 'epoch': 0.7} + 70%|██████▉ | 474/681 [20:07<09:10, 2.66s/it] 70%|██████▉ | 475/681 [20:09<08:54, 2.60s/it] {'loss': 1.017, 'grad_norm': 42.494869232177734, 'learning_rate': 1.2834888035828596e-07, 'fcm_dpo/beta': 0.0011289971880614758, 'fcm_dpo/q_t': 0.38658300042152405, 'fcm_dpo/delta': -0.09538697451353073, 'fcm_dpo/margin': 434.46240234375, 'margin_dpo/margin_mean': 434.46240234375, 'margin_dpo/margin_std': 460.6612548828125, 'logps/chosen': -540.369140625, 'logps/rejected': -1022.3992919921875, 'logps/ref_chosen': -42.49519348144531, 'logps/ref_rejected': -90.06294250488281, 'KL/chosen_KL_mean': -497.8739013671875, 'KL/rejected_KL_mean': -932.3363037109375, 'KL/mean': -715.1051025390625, 'KL/std': 428.21868896484375, 'logits/chosen': -0.9651395082473755, 'logits/rejected': -0.9933245182037354, 'epoch': 0.7} + 70%|██████▉ | 475/681 [20:09<08:54, 2.60s/it] 70%|██████▉ | 476/681 [20:12<08:50, 2.59s/it] {'loss': 1.108, 'grad_norm': 58.33089065551758, 'learning_rate': 1.2722934197929802e-07, 'fcm_dpo/beta': 0.0011274400167167187, 'fcm_dpo/q_t': 0.4138496518135071, 'fcm_dpo/delta': 0.021099748089909554, 'fcm_dpo/margin': 336.776611328125, 'margin_dpo/margin_mean': 336.776611328125, 'margin_dpo/margin_std': 474.5893859863281, 'logps/chosen': -686.5010986328125, 'logps/rejected': -1054.03857421875, 'logps/ref_chosen': -42.94938278198242, 'logps/ref_rejected': -73.71023559570312, 'KL/chosen_KL_mean': -643.5517578125, 'KL/rejected_KL_mean': -980.328369140625, 'KL/mean': -811.9400634765625, 'KL/std': 494.214111328125, 'logits/chosen': -0.9680467844009399, 'logits/rejected': -0.985359787940979, 'epoch': 0.7} + 70%|██████▉ | 476/681 [20:12<08:50, 2.59s/it] 70%|███████ | 477/681 [20:14<08:45, 2.58s/it] {'loss': 1.138, 'grad_norm': 32.38017272949219, 'learning_rate': 1.2611303872132631e-07, 'fcm_dpo/beta': 0.0011346408864483237, 'fcm_dpo/q_t': 0.41092249751091003, 'fcm_dpo/delta': 0.0038326121866703033, 'fcm_dpo/margin': 349.0743408203125, 'margin_dpo/margin_mean': 349.0743408203125, 'margin_dpo/margin_std': 609.6284790039062, 'logps/chosen': -741.791748046875, 'logps/rejected': -1096.2308349609375, 'logps/ref_chosen': -70.77261352539062, 'logps/ref_rejected': -76.13737487792969, 'KL/chosen_KL_mean': -671.0191040039062, 'KL/rejected_KL_mean': -1020.0934448242188, 'KL/mean': -845.5562744140625, 'KL/std': 523.93212890625, 'logits/chosen': -0.9727839231491089, 'logits/rejected': -0.9398672580718994, 'epoch': 0.7} + 70%|███████ | 477/681 [20:14<08:45, 2.58s/it] 70%|███████ | 478/681 [20:17<08:48, 2.60s/it] {'loss': 1.0658, 'grad_norm': 33.31148147583008, 'learning_rate': 1.2500000000000005e-07, 'fcm_dpo/beta': 0.001124206930398941, 'fcm_dpo/q_t': 0.39735937118530273, 'fcm_dpo/delta': -0.05444270372390747, 'fcm_dpo/margin': 402.06536865234375, 'margin_dpo/margin_mean': 402.06536865234375, 'margin_dpo/margin_std': 529.6115112304688, 'logps/chosen': -585.0142822265625, 'logps/rejected': -1031.0009765625, 'logps/ref_chosen': -41.440513610839844, 'logps/ref_rejected': -85.36196899414062, 'KL/chosen_KL_mean': -543.57373046875, 'KL/rejected_KL_mean': -945.6390380859375, 'KL/mean': -744.6063842773438, 'KL/std': 450.7725830078125, 'logits/chosen': -0.8353407979011536, 'logits/rejected': -0.8606827259063721, 'epoch': 0.7} + 70%|███████ | 478/681 [20:17<08:48, 2.60s/it] 70%|███████ | 479/681 [20:19<08:41, 2.58s/it] {'loss': 1.1246, 'grad_norm': 30.347566604614258, 'learning_rate': 1.2389025514492456e-07, 'fcm_dpo/beta': 0.0011205710470676422, 'fcm_dpo/q_t': 0.40699535608291626, 'fcm_dpo/delta': -0.0337379164993763, 'fcm_dpo/margin': 385.18798828125, 'margin_dpo/margin_mean': 385.18798828125, 'margin_dpo/margin_std': 668.6539306640625, 'logps/chosen': -758.428466796875, 'logps/rejected': -1184.82470703125, 'logps/ref_chosen': -53.907920837402344, 'logps/ref_rejected': -95.1163330078125, 'KL/chosen_KL_mean': -704.5205078125, 'KL/rejected_KL_mean': -1089.7083740234375, 'KL/mean': -897.114501953125, 'KL/std': 577.3861083984375, 'logits/chosen': -0.9103279113769531, 'logits/rejected': -0.9409140348434448, 'epoch': 0.7} + 70%|███████ | 479/681 [20:19<08:41, 2.58s/it] 70%|███████ | 480/681 [20:22<08:28, 2.53s/it] {'loss': 1.1745, 'grad_norm': 53.98848342895508, 'learning_rate': 1.227838333989088e-07, 'fcm_dpo/beta': 0.0011057795491069555, 'fcm_dpo/q_t': 0.4251486659049988, 'fcm_dpo/delta': -0.04888079687952995, 'fcm_dpo/margin': 302.38421630859375, 'margin_dpo/margin_mean': 302.38421630859375, 'margin_dpo/margin_std': 548.7213134765625, 'logps/chosen': -858.31103515625, 'logps/rejected': -1184.945068359375, 'logps/ref_chosen': -58.682701110839844, 'logps/ref_rejected': -82.93248748779297, 'KL/chosen_KL_mean': -799.6283569335938, 'KL/rejected_KL_mean': -1102.0125732421875, 'KL/mean': -950.8204345703125, 'KL/std': 511.99639892578125, 'logits/chosen': -0.9347141981124878, 'logits/rejected': -0.9363481998443604, 'epoch': 0.7} + 70%|███████ | 480/681 [20:22<08:28, 2.53s/it] 71%|███████ | 481/681 [20:24<08:24, 2.52s/it] {'loss': 1.0317, 'grad_norm': 28.775257110595703, 'learning_rate': 1.2168076391719489e-07, 'fcm_dpo/beta': 0.0010841806652024388, 'fcm_dpo/q_t': 0.3871780037879944, 'fcm_dpo/delta': -0.10892824828624725, 'fcm_dpo/margin': 464.44964599609375, 'margin_dpo/margin_mean': 464.4496154785156, 'margin_dpo/margin_std': 576.0606689453125, 'logps/chosen': -721.7613525390625, 'logps/rejected': -1223.6671142578125, 'logps/ref_chosen': -54.964271545410156, 'logps/ref_rejected': -92.42044067382812, 'KL/chosen_KL_mean': -666.797119140625, 'KL/rejected_KL_mean': -1131.24658203125, 'KL/mean': -899.0219116210938, 'KL/std': 542.1533813476562, 'logits/chosen': -0.9464655518531799, 'logits/rejected': -0.9776947498321533, 'epoch': 0.71} + 71%|███████ | 481/681 [20:24<08:24, 2.52s/it] 71%|███████ | 482/681 [20:27<08:41, 2.62s/it] {'loss': 1.2821, 'grad_norm': 54.073997497558594, 'learning_rate': 1.2058107576668938e-07, 'fcm_dpo/beta': 0.0010857656598091125, 'fcm_dpo/q_t': 0.4472288489341736, 'fcm_dpo/delta': 0.08385416120290756, 'fcm_dpo/margin': 205.1912384033203, 'margin_dpo/margin_mean': 205.19125366210938, 'margin_dpo/margin_std': 625.9072265625, 'logps/chosen': -785.8701171875, 'logps/rejected': -1011.0975341796875, 'logps/ref_chosen': -67.553466796875, 'logps/ref_rejected': -87.58953857421875, 'KL/chosen_KL_mean': -718.3167114257812, 'KL/rejected_KL_mean': -923.5079345703125, 'KL/mean': -820.912353515625, 'KL/std': 538.7589111328125, 'logits/chosen': -0.8339771032333374, 'logits/rejected': -0.8247455358505249, 'epoch': 0.71} + 71%|███████ | 482/681 [20:27<08:41, 2.62s/it] 71%|███████ | 483/681 [20:30<08:38, 2.62s/it] {'loss': 1.0223, 'grad_norm': 31.249698638916016, 'learning_rate': 1.194847979251979e-07, 'fcm_dpo/beta': 0.001074553350917995, 'fcm_dpo/q_t': 0.38575249910354614, 'fcm_dpo/delta': -0.11760546267032623, 'fcm_dpo/margin': 475.93743896484375, 'margin_dpo/margin_mean': 475.9373779296875, 'margin_dpo/margin_std': 576.4466552734375, 'logps/chosen': -679.411376953125, 'logps/rejected': -1187.805908203125, 'logps/ref_chosen': -63.32981872558594, 'logps/ref_rejected': -95.78697204589844, 'KL/chosen_KL_mean': -616.08154296875, 'KL/rejected_KL_mean': -1092.01904296875, 'KL/mean': -854.05029296875, 'KL/std': 518.1444091796875, 'logits/chosen': -0.9016849994659424, 'logits/rejected': -0.9181410074234009, 'epoch': 0.71} + 71%|███████ | 483/681 [20:30<08:38, 2.62s/it] 71%|███████ | 484/681 [20:32<08:18, 2.53s/it] {'loss': 1.0546, 'grad_norm': 62.45989990234375, 'learning_rate': 1.1839195928066101e-07, 'fcm_dpo/beta': 0.0010655100923031569, 'fcm_dpo/q_t': 0.3974398374557495, 'fcm_dpo/delta': -0.05194704234600067, 'fcm_dpo/margin': 421.85662841796875, 'margin_dpo/margin_mean': 421.85662841796875, 'margin_dpo/margin_std': 506.4351806640625, 'logps/chosen': -580.262939453125, 'logps/rejected': -1027.352783203125, 'logps/ref_chosen': -59.13812255859375, 'logps/ref_rejected': -84.37144470214844, 'KL/chosen_KL_mean': -521.124755859375, 'KL/rejected_KL_mean': -942.9813842773438, 'KL/mean': -732.0531005859375, 'KL/std': 495.1884765625, 'logits/chosen': -0.8718733787536621, 'logits/rejected': -0.8974796533584595, 'epoch': 0.71} + 71%|███████ | 484/681 [20:32<08:18, 2.53s/it] 71%|███████ | 485/681 [20:35<08:12, 2.51s/it] {'loss': 1.089, 'grad_norm': 35.550392150878906, 'learning_rate': 1.1730258863039347e-07, 'fcm_dpo/beta': 0.0010573656763881445, 'fcm_dpo/q_t': 0.40495431423187256, 'fcm_dpo/delta': -0.022721393033862114, 'fcm_dpo/margin': 398.82159423828125, 'margin_dpo/margin_mean': 398.8216552734375, 'margin_dpo/margin_std': 560.9061279296875, 'logps/chosen': -597.9276123046875, 'logps/rejected': -1041.2637939453125, 'logps/ref_chosen': -58.849571228027344, 'logps/ref_rejected': -103.36408233642578, 'KL/chosen_KL_mean': -539.0780639648438, 'KL/rejected_KL_mean': -937.8997192382812, 'KL/mean': -738.4888916015625, 'KL/std': 491.9963073730469, 'logits/chosen': -0.794913649559021, 'logits/rejected': -0.8159662485122681, 'epoch': 0.71} + 71%|███████ | 485/681 [20:35<08:12, 2.51s/it] 71%|███████▏ | 486/681 [20:37<07:43, 2.38s/it] {'loss': 1.0777, 'grad_norm': 30.52185821533203, 'learning_rate': 1.1621671468032493e-07, 'fcm_dpo/beta': 0.0010410689283162355, 'fcm_dpo/q_t': 0.3951270878314972, 'fcm_dpo/delta': -0.07956840097904205, 'fcm_dpo/margin': 457.0060119628906, 'margin_dpo/margin_mean': 457.00604248046875, 'margin_dpo/margin_std': 684.0638427734375, 'logps/chosen': -677.2293701171875, 'logps/rejected': -1171.114990234375, 'logps/ref_chosen': -55.25966262817383, 'logps/ref_rejected': -92.13936614990234, 'KL/chosen_KL_mean': -621.9697265625, 'KL/rejected_KL_mean': -1078.9757080078125, 'KL/mean': -850.4727172851562, 'KL/std': 568.115478515625, 'logits/chosen': -0.8822107911109924, 'logits/rejected': -0.9047358632087708, 'epoch': 0.71} + 71%|███████▏ | 486/681 [20:37<07:43, 2.38s/it] 72%|███████▏ | 487/681 [20:39<07:57, 2.46s/it] {'loss': 1.1342, 'grad_norm': 33.21575164794922, 'learning_rate': 1.1513436604424378e-07, 'fcm_dpo/beta': 0.001048167236149311, 'fcm_dpo/q_t': 0.41704893112182617, 'fcm_dpo/delta': 0.044978074729442596, 'fcm_dpo/margin': 339.7569580078125, 'margin_dpo/margin_mean': 339.7569580078125, 'margin_dpo/margin_std': 529.3812255859375, 'logps/chosen': -727.1214599609375, 'logps/rejected': -1106.23388671875, 'logps/ref_chosen': -53.06330871582031, 'logps/ref_rejected': -92.41883087158203, 'KL/chosen_KL_mean': -674.0582275390625, 'KL/rejected_KL_mean': -1013.8150634765625, 'KL/mean': -843.9366455078125, 'KL/std': 521.2279663085938, 'logits/chosen': -0.8942869901657104, 'logits/rejected': -0.9046221971511841, 'epoch': 0.72} + 72%|███████▏ | 487/681 [20:39<07:57, 2.46s/it] 72%|███████▏ | 488/681 [20:42<08:04, 2.51s/it] {'loss': 1.0978, 'grad_norm': 34.97057342529297, 'learning_rate': 1.1405557124304335e-07, 'fcm_dpo/beta': 0.001053705345839262, 'fcm_dpo/q_t': 0.4128515124320984, 'fcm_dpo/delta': 0.02513560838997364, 'fcm_dpo/margin': 356.37518310546875, 'margin_dpo/margin_mean': 356.37518310546875, 'margin_dpo/margin_std': 445.29071044921875, 'logps/chosen': -594.98095703125, 'logps/rejected': -983.1345825195312, 'logps/ref_chosen': -52.22815704345703, 'logps/ref_rejected': -84.00656127929688, 'KL/chosen_KL_mean': -542.7528076171875, 'KL/rejected_KL_mean': -899.1280517578125, 'KL/mean': -720.9403686523438, 'KL/std': 461.8782043457031, 'logits/chosen': -0.8141319751739502, 'logits/rejected': -0.8222429752349854, 'epoch': 0.72} + 72%|███████▏ | 488/681 [20:42<08:04, 2.51s/it] 72%|███████▏ | 489/681 [20:44<07:58, 2.49s/it] {'loss': 1.1201, 'grad_norm': 27.448223114013672, 'learning_rate': 1.1298035870396985e-07, 'fcm_dpo/beta': 0.0010611966717988253, 'fcm_dpo/q_t': 0.4179996848106384, 'fcm_dpo/delta': 0.03748384118080139, 'fcm_dpo/margin': 342.54595947265625, 'margin_dpo/margin_mean': 342.5459289550781, 'margin_dpo/margin_std': 497.568359375, 'logps/chosen': -556.213134765625, 'logps/rejected': -922.1675415039062, 'logps/ref_chosen': -55.989627838134766, 'logps/ref_rejected': -79.39812469482422, 'KL/chosen_KL_mean': -500.2234802246094, 'KL/rejected_KL_mean': -842.7694091796875, 'KL/mean': -671.4964599609375, 'KL/std': 455.5792236328125, 'logits/chosen': -0.8774303197860718, 'logits/rejected': -0.8812981247901917, 'epoch': 0.72} + 72%|███████▏ | 489/681 [20:44<07:58, 2.49s/it] 72%|███████▏ | 490/681 [20:47<08:09, 2.56s/it] {'loss': 1.1353, 'grad_norm': 37.27909851074219, 'learning_rate': 1.1190875675987355e-07, 'fcm_dpo/beta': 0.0010620702523738146, 'fcm_dpo/q_t': 0.4118698239326477, 'fcm_dpo/delta': 0.0034542735666036606, 'fcm_dpo/margin': 373.4256591796875, 'margin_dpo/margin_mean': 373.4256896972656, 'margin_dpo/margin_std': 642.0191650390625, 'logps/chosen': -654.7120361328125, 'logps/rejected': -1086.180419921875, 'logps/ref_chosen': -52.36639404296875, 'logps/ref_rejected': -110.4090576171875, 'KL/chosen_KL_mean': -602.345703125, 'KL/rejected_KL_mean': -975.7713012695312, 'KL/mean': -789.0585327148438, 'KL/std': 554.9224853515625, 'logits/chosen': -0.8731991052627563, 'logits/rejected': -0.911872148513794, 'epoch': 0.72} + 72%|███████▏ | 490/681 [20:47<08:09, 2.56s/it] 72%|███████▏ | 491/681 [20:50<08:07, 2.57s/it] {'loss': 1.1945, 'grad_norm': 32.712093353271484, 'learning_rate': 1.1084079364846241e-07, 'fcm_dpo/beta': 0.0010826380457729101, 'fcm_dpo/q_t': 0.4377876818180084, 'fcm_dpo/delta': 0.12914934754371643, 'fcm_dpo/margin': 253.20947265625, 'margin_dpo/margin_mean': 253.20948791503906, 'margin_dpo/margin_std': 480.40399169921875, 'logps/chosen': -618.3865356445312, 'logps/rejected': -884.7525634765625, 'logps/ref_chosen': -60.11626434326172, 'logps/ref_rejected': -73.27278900146484, 'KL/chosen_KL_mean': -558.270263671875, 'KL/rejected_KL_mean': -811.479736328125, 'KL/mean': -684.875, 'KL/std': 470.6094970703125, 'logits/chosen': -0.9021086096763611, 'logits/rejected': -0.8983560800552368, 'epoch': 0.72} + 72%|███████▏ | 491/681 [20:50<08:07, 2.57s/it] 72%|███████▏ | 492/681 [20:52<08:07, 2.58s/it] {'loss': 1.2158, 'grad_norm': 31.351417541503906, 'learning_rate': 1.097764975115576e-07, 'fcm_dpo/beta': 0.0011053578928112984, 'fcm_dpo/q_t': 0.4370453357696533, 'fcm_dpo/delta': 0.12110729515552521, 'fcm_dpo/margin': 255.5540771484375, 'margin_dpo/margin_mean': 255.55409240722656, 'margin_dpo/margin_std': 563.4765014648438, 'logps/chosen': -632.8526611328125, 'logps/rejected': -907.0721435546875, 'logps/ref_chosen': -53.994178771972656, 'logps/ref_rejected': -72.65962219238281, 'KL/chosen_KL_mean': -578.8584594726562, 'KL/rejected_KL_mean': -834.41259765625, 'KL/mean': -706.635498046875, 'KL/std': 479.8116149902344, 'logits/chosen': -0.9315870404243469, 'logits/rejected': -0.9179561734199524, 'epoch': 0.72} + 72%|███████▏ | 492/681 [20:52<08:07, 2.58s/it] 72%|███████▏ | 493/681 [20:55<08:12, 2.62s/it] {'loss': 1.1808, 'grad_norm': 35.48196029663086, 'learning_rate': 1.0871589639435203e-07, 'fcm_dpo/beta': 0.0011120472336187959, 'fcm_dpo/q_t': 0.4283533990383148, 'fcm_dpo/delta': -0.011631077155470848, 'fcm_dpo/margin': 275.22186279296875, 'margin_dpo/margin_mean': 275.22186279296875, 'margin_dpo/margin_std': 509.6318359375, 'logps/chosen': -697.522705078125, 'logps/rejected': -984.5703125, 'logps/ref_chosen': -75.49723815917969, 'logps/ref_rejected': -87.32301330566406, 'KL/chosen_KL_mean': -622.0254516601562, 'KL/rejected_KL_mean': -897.247314453125, 'KL/mean': -759.6363525390625, 'KL/std': 523.6834106445312, 'logits/chosen': -0.9604239463806152, 'logits/rejected': -0.9354810118675232, 'epoch': 0.72} + 72%|███████▏ | 493/681 [20:55<08:12, 2.62s/it] 73%|███████▎ | 494/681 [20:58<08:10, 2.62s/it] {'loss': 1.0284, 'grad_norm': 48.854129791259766, 'learning_rate': 1.0765901824467166e-07, 'fcm_dpo/beta': 0.0010983939282596111, 'fcm_dpo/q_t': 0.3895619511604309, 'fcm_dpo/delta': -0.0822177529335022, 'fcm_dpo/margin': 435.43121337890625, 'margin_dpo/margin_mean': 435.43121337890625, 'margin_dpo/margin_std': 479.2147216796875, 'logps/chosen': -539.6531982421875, 'logps/rejected': -1019.8164672851562, 'logps/ref_chosen': -41.35926818847656, 'logps/ref_rejected': -86.09136962890625, 'KL/chosen_KL_mean': -498.29388427734375, 'KL/rejected_KL_mean': -933.72509765625, 'KL/mean': -716.009521484375, 'KL/std': 477.13525390625, 'logits/chosen': -0.8259874582290649, 'logits/rejected': -0.8612606525421143, 'epoch': 0.73} + 73%|███████▎ | 494/681 [20:58<08:10, 2.62s/it] 73%|███████▎ | 495/681 [21:00<08:18, 2.68s/it] {'loss': 1.0958, 'grad_norm': 31.173200607299805, 'learning_rate': 1.0660589091223854e-07, 'fcm_dpo/beta': 0.0010908616241067648, 'fcm_dpo/q_t': 0.4062625765800476, 'fcm_dpo/delta': -0.022133061662316322, 'fcm_dpo/margin': 386.12274169921875, 'margin_dpo/margin_mean': 386.1227722167969, 'margin_dpo/margin_std': 574.861328125, 'logps/chosen': -619.0137939453125, 'logps/rejected': -1033.02587890625, 'logps/ref_chosen': -63.53507995605469, 'logps/ref_rejected': -91.42443084716797, 'KL/chosen_KL_mean': -555.478759765625, 'KL/rejected_KL_mean': -941.6015625, 'KL/mean': -748.5401611328125, 'KL/std': 487.3724060058594, 'logits/chosen': -0.9227169752120972, 'logits/rejected': -0.9331672191619873, 'epoch': 0.73} + 73%|███████▎ | 495/681 [21:01<08:18, 2.68s/it] 73%|███████▎ | 496/681 [21:03<08:15, 2.68s/it] {'loss': 1.2315, 'grad_norm': 64.04462432861328, 'learning_rate': 1.0555654214793722e-07, 'fcm_dpo/beta': 0.0011188681237399578, 'fcm_dpo/q_t': 0.44692689180374146, 'fcm_dpo/delta': 0.17580503225326538, 'fcm_dpo/margin': 203.84727478027344, 'margin_dpo/margin_mean': 203.84725952148438, 'margin_dpo/margin_std': 444.61639404296875, 'logps/chosen': -772.530517578125, 'logps/rejected': -988.115234375, 'logps/ref_chosen': -72.5919189453125, 'logps/ref_rejected': -84.32933807373047, 'KL/chosen_KL_mean': -699.9385986328125, 'KL/rejected_KL_mean': -903.785888671875, 'KL/mean': -801.8622436523438, 'KL/std': 393.8895568847656, 'logits/chosen': -0.8876965641975403, 'logits/rejected': -0.8597399592399597, 'epoch': 0.73} + 73%|███████▎ | 496/681 [21:03<08:15, 2.68s/it] 73%|███████▎ | 497/681 [21:06<08:08, 2.65s/it] {'loss': 1.2363, 'grad_norm': 41.72929000854492, 'learning_rate': 1.0451099960308374e-07, 'fcm_dpo/beta': 0.001132933422923088, 'fcm_dpo/q_t': 0.44608232378959656, 'fcm_dpo/delta': 0.02158385142683983, 'fcm_dpo/margin': 207.31849670410156, 'margin_dpo/margin_mean': 207.31849670410156, 'margin_dpo/margin_std': 470.48712158203125, 'logps/chosen': -731.359130859375, 'logps/rejected': -956.3720703125, 'logps/ref_chosen': -58.59397506713867, 'logps/ref_rejected': -76.28836822509766, 'KL/chosen_KL_mean': -672.76513671875, 'KL/rejected_KL_mean': -880.0836791992188, 'KL/mean': -776.4244384765625, 'KL/std': 496.9170227050781, 'logits/chosen': -0.858148455619812, 'logits/rejected': -0.8439843654632568, 'epoch': 0.73} + 73%|███████▎ | 497/681 [21:06<08:08, 2.65s/it] 73%|███████▎ | 498/681 [21:08<08:10, 2.68s/it] {'loss': 1.133, 'grad_norm': 44.66307067871094, 'learning_rate': 1.0346929082869641e-07, 'fcm_dpo/beta': 0.0011333951260894537, 'fcm_dpo/q_t': 0.4110422730445862, 'fcm_dpo/delta': 0.006364853121340275, 'fcm_dpo/margin': 347.5252685546875, 'margin_dpo/margin_mean': 347.5252685546875, 'margin_dpo/margin_std': 589.1495361328125, 'logps/chosen': -718.2606201171875, 'logps/rejected': -1078.538330078125, 'logps/ref_chosen': -71.20565795898438, 'logps/ref_rejected': -83.95803833007812, 'KL/chosen_KL_mean': -647.0549926757812, 'KL/rejected_KL_mean': -994.5802001953125, 'KL/mean': -820.817626953125, 'KL/std': 524.9059448242188, 'logits/chosen': -0.9270666837692261, 'logits/rejected': -0.9164772033691406, 'epoch': 0.73} + 73%|███████▎ | 498/681 [21:09<08:10, 2.68s/it] 73%|███████▎ | 499/681 [21:11<07:58, 2.63s/it] {'loss': 1.0594, 'grad_norm': 42.19448471069336, 'learning_rate': 1.0243144327477013e-07, 'fcm_dpo/beta': 0.0011198758147656918, 'fcm_dpo/q_t': 0.3932623863220215, 'fcm_dpo/delta': -0.0776449665427208, 'fcm_dpo/margin': 423.1136169433594, 'margin_dpo/margin_mean': 423.11358642578125, 'margin_dpo/margin_std': 571.1682739257812, 'logps/chosen': -615.6781616210938, 'logps/rejected': -1088.615234375, 'logps/ref_chosen': -51.25519561767578, 'logps/ref_rejected': -101.07870483398438, 'KL/chosen_KL_mean': -564.4229736328125, 'KL/rejected_KL_mean': -987.5364990234375, 'KL/mean': -775.9797973632812, 'KL/std': 518.1436767578125, 'logits/chosen': -0.9195848107337952, 'logits/rejected': -0.955754280090332, 'epoch': 0.73} + 73%|███████▎ | 499/681 [21:11<07:58, 2.63s/it] 73%|███████▎ | 500/681 [21:13<07:45, 2.57s/it] {'loss': 1.1271, 'grad_norm': 35.08540725708008, 'learning_rate': 1.0139748428955333e-07, 'fcm_dpo/beta': 0.001116940751671791, 'fcm_dpo/q_t': 0.41066765785217285, 'fcm_dpo/delta': 0.006859854329377413, 'fcm_dpo/margin': 352.2098388671875, 'margin_dpo/margin_mean': 352.2098388671875, 'margin_dpo/margin_std': 569.4359741210938, 'logps/chosen': -728.84619140625, 'logps/rejected': -1117.9627685546875, 'logps/ref_chosen': -57.027442932128906, 'logps/ref_rejected': -93.93421173095703, 'KL/chosen_KL_mean': -671.8187255859375, 'KL/rejected_KL_mean': -1024.028564453125, 'KL/mean': -847.9236450195312, 'KL/std': 458.623046875, 'logits/chosen': -0.9325329661369324, 'logits/rejected': -0.9658868312835693, 'epoch': 0.73} + 73%|███████▎ | 500/681 [21:13<07:45, 2.57s/it] 74%|███████▎ | 501/681 [21:16<07:41, 2.56s/it] {'loss': 1.1115, 'grad_norm': 33.07366943359375, 'learning_rate': 1.0036744111882672e-07, 'fcm_dpo/beta': 0.0011179624125361443, 'fcm_dpo/q_t': 0.4073890149593353, 'fcm_dpo/delta': -0.017988204956054688, 'fcm_dpo/margin': 373.15838623046875, 'margin_dpo/margin_mean': 373.15838623046875, 'margin_dpo/margin_std': 589.1995849609375, 'logps/chosen': -626.8782348632812, 'logps/rejected': -1025.833740234375, 'logps/ref_chosen': -54.359527587890625, 'logps/ref_rejected': -80.15670013427734, 'KL/chosen_KL_mean': -572.5186767578125, 'KL/rejected_KL_mean': -945.6771240234375, 'KL/mean': -759.097900390625, 'KL/std': 483.7222900390625, 'logits/chosen': -0.8823180198669434, 'logits/rejected': -0.8721954822540283, 'epoch': 0.74} + 74%|███████▎ | 501/681 [21:16<07:41, 2.56s/it] 74%|███████▎ | 502/681 [21:19<07:40, 2.58s/it] {'loss': 1.0688, 'grad_norm': 29.106658935546875, 'learning_rate': 9.934134090518592e-08, 'fcm_dpo/beta': 0.0011112934444099665, 'fcm_dpo/q_t': 0.40389347076416016, 'fcm_dpo/delta': -0.024970781058073044, 'fcm_dpo/margin': 381.4633483886719, 'margin_dpo/margin_mean': 381.4633483886719, 'margin_dpo/margin_std': 472.6005859375, 'logps/chosen': -585.7492065429688, 'logps/rejected': -982.5608520507812, 'logps/ref_chosen': -67.60050964355469, 'logps/ref_rejected': -82.94876098632812, 'KL/chosen_KL_mean': -518.148681640625, 'KL/rejected_KL_mean': -899.612060546875, 'KL/mean': -708.8804321289062, 'KL/std': 450.12200927734375, 'logits/chosen': -0.8181363940238953, 'logits/rejected': -0.8096420764923096, 'epoch': 0.74} + 74%|███████▎ | 502/681 [21:19<07:40, 2.58s/it] 74%|███████▍ | 503/681 [21:21<07:46, 2.62s/it] {'loss': 1.1006, 'grad_norm': 28.44597816467285, 'learning_rate': 9.831921068732571e-08, 'fcm_dpo/beta': 0.001108947559259832, 'fcm_dpo/q_t': 0.41361480951309204, 'fcm_dpo/delta': 0.01953038200736046, 'fcm_dpo/margin': 343.667236328125, 'margin_dpo/margin_mean': 343.6672668457031, 'margin_dpo/margin_std': 457.1417236328125, 'logps/chosen': -578.8792724609375, 'logps/rejected': -949.9735107421875, 'logps/ref_chosen': -55.078407287597656, 'logps/ref_rejected': -82.50544738769531, 'KL/chosen_KL_mean': -523.8008422851562, 'KL/rejected_KL_mean': -867.4680786132812, 'KL/mean': -695.634521484375, 'KL/std': 425.3750305175781, 'logits/chosen': -0.8395601511001587, 'logits/rejected': -0.8324748873710632, 'epoch': 0.74} + 74%|███████▍ | 503/681 [21:21<07:46, 2.62s/it] 74%|███████▍ | 504/681 [21:24<07:45, 2.63s/it] {'loss': 1.0678, 'grad_norm': 29.1925106048584, 'learning_rate': 9.730107739932805e-08, 'fcm_dpo/beta': 0.0011051710462197661, 'fcm_dpo/q_t': 0.3969269096851349, 'fcm_dpo/delta': -0.06065092608332634, 'fcm_dpo/margin': 414.3384094238281, 'margin_dpo/margin_mean': 414.3384094238281, 'margin_dpo/margin_std': 545.068603515625, 'logps/chosen': -627.961669921875, 'logps/rejected': -1086.096435546875, 'logps/ref_chosen': -59.96575164794922, 'logps/ref_rejected': -103.76212310791016, 'KL/chosen_KL_mean': -567.9959716796875, 'KL/rejected_KL_mean': -982.3343505859375, 'KL/mean': -775.1651611328125, 'KL/std': 495.7042541503906, 'logits/chosen': -0.8878883123397827, 'logits/rejected': -0.9151204228401184, 'epoch': 0.74} + 74%|███████▍ | 504/681 [21:24<07:45, 2.63s/it] 74%|███████▍ | 505/681 [21:27<07:40, 2.61s/it] {'loss': 1.2186, 'grad_norm': 46.33066940307617, 'learning_rate': 9.628696786995188e-08, 'fcm_dpo/beta': 0.0011264740023761988, 'fcm_dpo/q_t': 0.44324439764022827, 'fcm_dpo/delta': 0.15632013976573944, 'fcm_dpo/margin': 219.66627502441406, 'margin_dpo/margin_mean': 219.666259765625, 'margin_dpo/margin_std': 454.0839538574219, 'logps/chosen': -721.1199951171875, 'logps/rejected': -953.2167358398438, 'logps/ref_chosen': -76.1549072265625, 'logps/ref_rejected': -88.58537292480469, 'KL/chosen_KL_mean': -644.965087890625, 'KL/rejected_KL_mean': -864.63134765625, 'KL/mean': -754.7982177734375, 'KL/std': 480.5470275878906, 'logits/chosen': -0.926541805267334, 'logits/rejected': -0.9026806354522705, 'epoch': 0.74} + 74%|███████▍ | 505/681 [21:27<07:40, 2.61s/it] 74%|███████▍ | 506/681 [21:29<07:26, 2.55s/it] {'loss': 1.0916, 'grad_norm': 40.91910171508789, 'learning_rate': 9.527690882192635e-08, 'fcm_dpo/beta': 0.0011272106785327196, 'fcm_dpo/q_t': 0.405214786529541, 'fcm_dpo/delta': -0.017008088529109955, 'fcm_dpo/margin': 368.968017578125, 'margin_dpo/margin_mean': 368.968017578125, 'margin_dpo/margin_std': 511.87725830078125, 'logps/chosen': -565.7027587890625, 'logps/rejected': -964.1253662109375, 'logps/ref_chosen': -48.96050262451172, 'logps/ref_rejected': -78.41505432128906, 'KL/chosen_KL_mean': -516.7422485351562, 'KL/rejected_KL_mean': -885.7102661132812, 'KL/mean': -701.2262573242188, 'KL/std': 465.8466796875, 'logits/chosen': -0.909249484539032, 'logits/rejected': -0.9271351099014282, 'epoch': 0.74} + 74%|███████▍ | 506/681 [21:29<07:26, 2.55s/it] 74%|███████▍ | 507/681 [21:32<07:29, 2.58s/it] {'loss': 1.1574, 'grad_norm': 30.438766479492188, 'learning_rate': 9.427092687124691e-08, 'fcm_dpo/beta': 0.0011373506858944893, 'fcm_dpo/q_t': 0.4222760498523712, 'fcm_dpo/delta': 0.04258999228477478, 'fcm_dpo/margin': 315.60784912109375, 'margin_dpo/margin_mean': 315.6078186035156, 'margin_dpo/margin_std': 585.120849609375, 'logps/chosen': -656.431884765625, 'logps/rejected': -1000.611083984375, 'logps/ref_chosen': -66.80149841308594, 'logps/ref_rejected': -95.37289428710938, 'KL/chosen_KL_mean': -589.63037109375, 'KL/rejected_KL_mean': -905.2382202148438, 'KL/mean': -747.434326171875, 'KL/std': 535.4652099609375, 'logits/chosen': -0.9080416560173035, 'logits/rejected': -0.9134109020233154, 'epoch': 0.74} + 74%|███████▍ | 507/681 [21:32<07:29, 2.58s/it] 75%|███████▍ | 508/681 [21:34<07:28, 2.59s/it] {'loss': 1.2159, 'grad_norm': 43.13357162475586, 'learning_rate': 9.326904852647344e-08, 'fcm_dpo/beta': 0.0011591333895921707, 'fcm_dpo/q_t': 0.43193942308425903, 'fcm_dpo/delta': 0.09055158495903015, 'fcm_dpo/margin': 269.07879638671875, 'margin_dpo/margin_mean': 269.07879638671875, 'margin_dpo/margin_std': 613.018310546875, 'logps/chosen': -714.3203735351562, 'logps/rejected': -1007.7232666015625, 'logps/ref_chosen': -71.303466796875, 'logps/ref_rejected': -95.6275405883789, 'KL/chosen_KL_mean': -643.0169067382812, 'KL/rejected_KL_mean': -912.095703125, 'KL/mean': -777.5562744140625, 'KL/std': 526.7119750976562, 'logits/chosen': -0.8901297450065613, 'logits/rejected': -0.8925095796585083, 'epoch': 0.75} + 75%|███████▍ | 508/681 [21:34<07:28, 2.59s/it] 75%|███████▍ | 509/681 [21:37<07:27, 2.60s/it] {'loss': 1.1345, 'grad_norm': 28.78981590270996, 'learning_rate': 9.227130018803195e-08, 'fcm_dpo/beta': 0.0011755790328606963, 'fcm_dpo/q_t': 0.4198834300041199, 'fcm_dpo/delta': 0.04976864904165268, 'fcm_dpo/margin': 298.8412170410156, 'margin_dpo/margin_mean': 298.8412170410156, 'margin_dpo/margin_std': 454.3382873535156, 'logps/chosen': -539.5061645507812, 'logps/rejected': -857.7847900390625, 'logps/ref_chosen': -63.81895065307617, 'logps/ref_rejected': -83.25643920898438, 'KL/chosen_KL_mean': -475.68719482421875, 'KL/rejected_KL_mean': -774.5283813476562, 'KL/mean': -625.1077880859375, 'KL/std': 375.84735107421875, 'logits/chosen': -0.8108519315719604, 'logits/rejected': -0.8077250123023987, 'epoch': 0.75} + 75%|███████▍ | 509/681 [21:37<07:27, 2.60s/it] 75%|███████▍ | 510/681 [21:40<07:30, 2.63s/it] {'loss': 1.0442, 'grad_norm': 31.50473403930664, 'learning_rate': 9.127770814751932e-08, 'fcm_dpo/beta': 0.001165606314316392, 'fcm_dpo/q_t': 0.395079642534256, 'fcm_dpo/delta': -0.05261443555355072, 'fcm_dpo/margin': 386.30255126953125, 'margin_dpo/margin_mean': 386.30255126953125, 'margin_dpo/margin_std': 427.64312744140625, 'logps/chosen': -625.7650756835938, 'logps/rejected': -1062.954345703125, 'logps/ref_chosen': -51.878448486328125, 'logps/ref_rejected': -102.7651596069336, 'KL/chosen_KL_mean': -573.8865966796875, 'KL/rejected_KL_mean': -960.1890869140625, 'KL/mean': -767.037841796875, 'KL/std': 436.06304931640625, 'logits/chosen': -0.8299954533576965, 'logits/rejected': -0.8495923280715942, 'epoch': 0.75} + 75%|███████▍ | 510/681 [21:40<07:30, 2.63s/it] 75%|███████▌ | 511/681 [21:42<07:24, 2.62s/it] {'loss': 1.1563, 'grad_norm': 36.895851135253906, 'learning_rate': 9.028829858700973e-08, 'fcm_dpo/beta': 0.0011685066856443882, 'fcm_dpo/q_t': 0.4177062213420868, 'fcm_dpo/delta': 0.03857073932886124, 'fcm_dpo/margin': 310.46112060546875, 'margin_dpo/margin_mean': 310.46112060546875, 'margin_dpo/margin_std': 569.1368408203125, 'logps/chosen': -603.4005126953125, 'logps/rejected': -946.4803466796875, 'logps/ref_chosen': -60.23811721801758, 'logps/ref_rejected': -92.85676574707031, 'KL/chosen_KL_mean': -543.1624145507812, 'KL/rejected_KL_mean': -853.62353515625, 'KL/mean': -698.3929443359375, 'KL/std': 473.2135314941406, 'logits/chosen': -0.8786974549293518, 'logits/rejected': -0.8848339319229126, 'epoch': 0.75} + 75%|███████▌ | 511/681 [21:42<07:24, 2.62s/it] 75%|███████▌ | 512/681 [21:44<07:04, 2.51s/it] {'loss': 1.0194, 'grad_norm': 52.474979400634766, 'learning_rate': 8.930309757836516e-08, 'fcm_dpo/beta': 0.0011527151800692081, 'fcm_dpo/q_t': 0.38830769062042236, 'fcm_dpo/delta': -0.08524032682180405, 'fcm_dpo/margin': 417.19671630859375, 'margin_dpo/margin_mean': 417.19671630859375, 'margin_dpo/margin_std': 433.2279052734375, 'logps/chosen': -481.12811279296875, 'logps/rejected': -925.295166015625, 'logps/ref_chosen': -54.905494689941406, 'logps/ref_rejected': -81.87586975097656, 'KL/chosen_KL_mean': -426.22259521484375, 'KL/rejected_KL_mean': -843.4193115234375, 'KL/mean': -634.8209838867188, 'KL/std': 422.7352294921875, 'logits/chosen': -0.8602747917175293, 'logits/rejected': -0.8812437057495117, 'epoch': 0.75} + 75%|███████▌ | 512/681 [21:44<07:04, 2.51s/it] 75%|███████▌ | 513/681 [21:47<07:13, 2.58s/it] {'loss': 1.1427, 'grad_norm': 44.99782943725586, 'learning_rate': 8.832213108254863e-08, 'fcm_dpo/beta': 0.0011531409109011292, 'fcm_dpo/q_t': 0.42183050513267517, 'fcm_dpo/delta': 0.05850052088499069, 'fcm_dpo/margin': 297.572509765625, 'margin_dpo/margin_mean': 297.572509765625, 'margin_dpo/margin_std': 461.5359191894531, 'logps/chosen': -599.0048828125, 'logps/rejected': -907.7233276367188, 'logps/ref_chosen': -64.91644287109375, 'logps/ref_rejected': -76.06245422363281, 'KL/chosen_KL_mean': -534.08837890625, 'KL/rejected_KL_mean': -831.660888671875, 'KL/mean': -682.8746337890625, 'KL/std': 402.69537353515625, 'logits/chosen': -0.9019182920455933, 'logits/rejected': -0.8876909017562866, 'epoch': 0.75} + 75%|███████▌ | 513/681 [21:47<07:13, 2.58s/it] 75%|███████▌ | 514/681 [21:50<07:16, 2.61s/it] {'loss': 1.1448, 'grad_norm': 27.680599212646484, 'learning_rate': 8.734542494893954e-08, 'fcm_dpo/beta': 0.0011747241951525211, 'fcm_dpo/q_t': 0.42250925302505493, 'fcm_dpo/delta': 0.05465298146009445, 'fcm_dpo/margin': 295.4927673339844, 'margin_dpo/margin_mean': 295.4927673339844, 'margin_dpo/margin_std': 489.8013610839844, 'logps/chosen': -630.3372802734375, 'logps/rejected': -930.5460205078125, 'logps/ref_chosen': -74.22957611083984, 'logps/ref_rejected': -78.945556640625, 'KL/chosen_KL_mean': -556.1077270507812, 'KL/rejected_KL_mean': -851.6004638671875, 'KL/mean': -703.8541259765625, 'KL/std': 437.5733642578125, 'logits/chosen': -0.8176130652427673, 'logits/rejected': -0.8068991899490356, 'epoch': 0.75} + 75%|███████▌ | 514/681 [21:50<07:16, 2.61s/it] 76%|███████▌ | 515/681 [21:52<07:12, 2.61s/it] {'loss': 1.2001, 'grad_norm': 43.433387756347656, 'learning_rate': 8.637300491465272e-08, 'fcm_dpo/beta': 0.0011996763059869409, 'fcm_dpo/q_t': 0.4364135265350342, 'fcm_dpo/delta': 0.13362111151218414, 'fcm_dpo/margin': 225.07615661621094, 'margin_dpo/margin_mean': 225.076171875, 'margin_dpo/margin_std': 440.53546142578125, 'logps/chosen': -525.75732421875, 'logps/rejected': -787.5296630859375, 'logps/ref_chosen': -50.40156555175781, 'logps/ref_rejected': -87.09774780273438, 'KL/chosen_KL_mean': -475.355712890625, 'KL/rejected_KL_mean': -700.431884765625, 'KL/mean': -587.893798828125, 'KL/std': 380.00994873046875, 'logits/chosen': -0.8131271600723267, 'logits/rejected': -0.8236969709396362, 'epoch': 0.76} + 76%|███████▌ | 515/681 [21:52<07:12, 2.61s/it] 76%|███████▌ | 516/681 [21:55<06:54, 2.51s/it] {'loss': 1.0691, 'grad_norm': 40.77699279785156, 'learning_rate': 8.540489660386064e-08, 'fcm_dpo/beta': 0.0012069368967786431, 'fcm_dpo/q_t': 0.4014623761177063, 'fcm_dpo/delta': -0.02384302206337452, 'fcm_dpo/margin': 350.2784118652344, 'margin_dpo/margin_mean': 350.2784118652344, 'margin_dpo/margin_std': 421.2920227050781, 'logps/chosen': -574.0386962890625, 'logps/rejected': -971.3899536132812, 'logps/ref_chosen': -64.64956665039062, 'logps/ref_rejected': -111.72237396240234, 'KL/chosen_KL_mean': -509.38916015625, 'KL/rejected_KL_mean': -859.6676025390625, 'KL/mean': -684.5283813476562, 'KL/std': 431.451416015625, 'logits/chosen': -0.9214959740638733, 'logits/rejected': -0.9501833319664001, 'epoch': 0.76} + 76%|███████▌ | 516/681 [21:55<06:54, 2.51s/it] 76%|███████▌ | 517/681 [21:57<06:51, 2.51s/it] {'loss': 1.0478, 'grad_norm': 32.40835189819336, 'learning_rate': 8.444112552711752e-08, 'fcm_dpo/beta': 0.0011841601226478815, 'fcm_dpo/q_t': 0.39262643456459045, 'fcm_dpo/delta': -0.08773398399353027, 'fcm_dpo/margin': 408.0665283203125, 'margin_dpo/margin_mean': 408.0665283203125, 'margin_dpo/margin_std': 531.6887817382812, 'logps/chosen': -606.9964599609375, 'logps/rejected': -1043.232421875, 'logps/ref_chosen': -60.913551330566406, 'logps/ref_rejected': -89.08308410644531, 'KL/chosen_KL_mean': -546.0828857421875, 'KL/rejected_KL_mean': -954.1494140625, 'KL/mean': -750.1161499023438, 'KL/std': 485.58392333984375, 'logits/chosen': -0.8471901416778564, 'logits/rejected': -0.8455414772033691, 'epoch': 0.76} + 76%|███████▌ | 517/681 [21:57<06:51, 2.51s/it] 76%|███████▌ | 518/681 [22:00<06:59, 2.57s/it] {'loss': 1.0905, 'grad_norm': 53.09996795654297, 'learning_rate': 8.348171708068747e-08, 'fcm_dpo/beta': 0.0011782585643231869, 'fcm_dpo/q_t': 0.40781712532043457, 'fcm_dpo/delta': 0.004059506580233574, 'fcm_dpo/margin': 336.003662109375, 'margin_dpo/margin_mean': 336.003662109375, 'margin_dpo/margin_std': 429.115966796875, 'logps/chosen': -548.8475341796875, 'logps/rejected': -912.7080078125, 'logps/ref_chosen': -57.45589065551758, 'logps/ref_rejected': -85.31269836425781, 'KL/chosen_KL_mean': -491.3916931152344, 'KL/rejected_KL_mean': -827.3953247070312, 'KL/mean': -659.3934936523438, 'KL/std': 386.0772705078125, 'logits/chosen': -0.8820132613182068, 'logits/rejected': -0.8964939117431641, 'epoch': 0.76} + 76%|███████▌ | 518/681 [22:00<06:59, 2.57s/it] 76%|███████▌ | 519/681 [22:03<07:12, 2.67s/it] {'loss': 1.1966, 'grad_norm': 40.137332916259766, 'learning_rate': 8.25266965458755e-08, 'fcm_dpo/beta': 0.0011857892386615276, 'fcm_dpo/q_t': 0.4360736012458801, 'fcm_dpo/delta': 0.028549687936902046, 'fcm_dpo/margin': 231.53121948242188, 'margin_dpo/margin_mean': 231.53121948242188, 'margin_dpo/margin_std': 438.93048095703125, 'logps/chosen': -581.9234008789062, 'logps/rejected': -843.83544921875, 'logps/ref_chosen': -74.06331634521484, 'logps/ref_rejected': -104.44416809082031, 'KL/chosen_KL_mean': -507.8600769042969, 'KL/rejected_KL_mean': -739.3912963867188, 'KL/mean': -623.6256713867188, 'KL/std': 352.45123291015625, 'logits/chosen': -0.8481384515762329, 'logits/rejected': -0.8306090235710144, 'epoch': 0.76} + 76%|███████▌ | 519/681 [22:03<07:12, 2.67s/it] 76%|███████▋ | 520/681 [22:06<07:09, 2.67s/it] {'loss': 1.1217, 'grad_norm': 39.273406982421875, 'learning_rate': 8.15760890883607e-08, 'fcm_dpo/beta': 0.0011898789089173079, 'fcm_dpo/q_t': 0.4156090021133423, 'fcm_dpo/delta': 0.024717746302485466, 'fcm_dpo/margin': 316.0799560546875, 'margin_dpo/margin_mean': 316.0799560546875, 'margin_dpo/margin_std': 469.7958068847656, 'logps/chosen': -613.88232421875, 'logps/rejected': -959.643798828125, 'logps/ref_chosen': -70.2998275756836, 'logps/ref_rejected': -99.98133850097656, 'KL/chosen_KL_mean': -543.58251953125, 'KL/rejected_KL_mean': -859.6624145507812, 'KL/mean': -701.6224975585938, 'KL/std': 423.9530029296875, 'logits/chosen': -0.8328167200088501, 'logits/rejected': -0.839728832244873, 'epoch': 0.76} + 76%|███████▋ | 520/681 [22:06<07:09, 2.67s/it] 77%|███████▋ | 521/681 [22:08<07:06, 2.67s/it] {'loss': 1.0873, 'grad_norm': 35.94675827026367, 'learning_rate': 8.062991975753378e-08, 'fcm_dpo/beta': 0.0012013925006613135, 'fcm_dpo/q_t': 0.4064163863658905, 'fcm_dpo/delta': -0.010350905358791351, 'fcm_dpo/margin': 340.51385498046875, 'margin_dpo/margin_mean': 340.51385498046875, 'margin_dpo/margin_std': 440.299560546875, 'logps/chosen': -543.9345703125, 'logps/rejected': -909.5860595703125, 'logps/ref_chosen': -58.14292526245117, 'logps/ref_rejected': -83.28060913085938, 'KL/chosen_KL_mean': -485.7915954589844, 'KL/rejected_KL_mean': -826.305419921875, 'KL/mean': -656.0485229492188, 'KL/std': 436.52398681640625, 'logits/chosen': -0.8887852430343628, 'logits/rejected': -0.8937211036682129, 'epoch': 0.77} + 77%|███████▋ | 521/681 [22:08<07:06, 2.67s/it] 77%|███████▋ | 522/681 [22:11<07:01, 2.65s/it] {'loss': 1.1338, 'grad_norm': 31.800031661987305, 'learning_rate': 7.968821348583643e-08, 'fcm_dpo/beta': 0.0011995111126452684, 'fcm_dpo/q_t': 0.41692230105400085, 'fcm_dpo/delta': 0.03616529330611229, 'fcm_dpo/margin': 304.40625, 'margin_dpo/margin_mean': 304.40625, 'margin_dpo/margin_std': 485.10552978515625, 'logps/chosen': -604.60888671875, 'logps/rejected': -928.4814453125, 'logps/ref_chosen': -46.54766845703125, 'logps/ref_rejected': -66.01388549804688, 'KL/chosen_KL_mean': -558.0612182617188, 'KL/rejected_KL_mean': -862.467529296875, 'KL/mean': -710.2643432617188, 'KL/std': 458.29718017578125, 'logits/chosen': -0.887365460395813, 'logits/rejected': -0.8906110525131226, 'epoch': 0.77} + 77%|███████▋ | 522/681 [22:11<07:01, 2.65s/it] 77%|███████▋ | 523/681 [22:13<07:01, 2.67s/it] {'loss': 1.1345, 'grad_norm': 40.38140106201172, 'learning_rate': 7.875099508810484e-08, 'fcm_dpo/beta': 0.0012012626975774765, 'fcm_dpo/q_t': 0.4126874804496765, 'fcm_dpo/delta': 0.006936301477253437, 'fcm_dpo/margin': 327.4243469238281, 'margin_dpo/margin_mean': 327.42437744140625, 'margin_dpo/margin_std': 560.2000732421875, 'logps/chosen': -651.356201171875, 'logps/rejected': -1000.7723999023438, 'logps/ref_chosen': -61.76960372924805, 'logps/ref_rejected': -83.76141357421875, 'KL/chosen_KL_mean': -589.586669921875, 'KL/rejected_KL_mean': -917.010986328125, 'KL/mean': -753.298828125, 'KL/std': 527.0751342773438, 'logits/chosen': -0.954033613204956, 'logits/rejected': -0.9616571068763733, 'epoch': 0.77} + 77%|███████▋ | 523/681 [22:14<07:01, 2.67s/it] 77%|███████▋ | 524/681 [22:16<06:57, 2.66s/it] {'loss': 1.1083, 'grad_norm': 41.24837112426758, 'learning_rate': 7.781828926091535e-08, 'fcm_dpo/beta': 0.001195290358737111, 'fcm_dpo/q_t': 0.40466296672821045, 'fcm_dpo/delta': -0.008701588958501816, 'fcm_dpo/margin': 341.17156982421875, 'margin_dpo/margin_mean': 341.17156982421875, 'margin_dpo/margin_std': 499.772705078125, 'logps/chosen': -648.994140625, 'logps/rejected': -993.3956298828125, 'logps/ref_chosen': -78.0720443725586, 'logps/ref_rejected': -81.30198669433594, 'KL/chosen_KL_mean': -570.922119140625, 'KL/rejected_KL_mean': -912.0936279296875, 'KL/mean': -741.5078735351562, 'KL/std': 479.9468994140625, 'logits/chosen': -0.9870351552963257, 'logits/rejected': -0.9809165000915527, 'epoch': 0.77} + 77%|███████▋ | 524/681 [22:16<06:57, 2.66s/it] 77%|███████▋ | 525/681 [22:19<06:56, 2.67s/it] {'loss': 1.0245, 'grad_norm': 30.923978805541992, 'learning_rate': 7.689012058193384e-08, 'fcm_dpo/beta': 0.001174594508484006, 'fcm_dpo/q_t': 0.384994238615036, 'fcm_dpo/delta': -0.12164277583360672, 'fcm_dpo/margin': 438.11553955078125, 'margin_dpo/margin_mean': 438.11553955078125, 'margin_dpo/margin_std': 533.79833984375, 'logps/chosen': -630.6170654296875, 'logps/rejected': -1117.9576416015625, 'logps/ref_chosen': -50.827857971191406, 'logps/ref_rejected': -100.05294036865234, 'KL/chosen_KL_mean': -579.7892456054688, 'KL/rejected_KL_mean': -1017.9046630859375, 'KL/mean': -798.8469848632812, 'KL/std': 511.00323486328125, 'logits/chosen': -0.9067383408546448, 'logits/rejected': -0.943909764289856, 'epoch': 0.77} + 77%|███████▋ | 525/681 [22:19<06:56, 2.67s/it] 77%|███████▋ | 526/681 [22:21<06:47, 2.63s/it] {'loss': 1.0538, 'grad_norm': 29.568998336791992, 'learning_rate': 7.596651350926836e-08, 'fcm_dpo/beta': 0.0011603353777900338, 'fcm_dpo/q_t': 0.38871896266937256, 'fcm_dpo/delta': -0.0920577421784401, 'fcm_dpo/margin': 420.246337890625, 'margin_dpo/margin_mean': 420.246337890625, 'margin_dpo/margin_std': 552.4700317382812, 'logps/chosen': -676.84423828125, 'logps/rejected': -1120.2327880859375, 'logps/ref_chosen': -63.167236328125, 'logps/ref_rejected': -86.30934143066406, 'KL/chosen_KL_mean': -613.677001953125, 'KL/rejected_KL_mean': -1033.9234619140625, 'KL/mean': -823.80029296875, 'KL/std': 496.4564208984375, 'logits/chosen': -0.9136035442352295, 'logits/rejected': -0.9113898873329163, 'epoch': 0.77} + 77%|███████▋ | 526/681 [22:21<06:47, 2.63s/it] 77%|███████▋ | 527/681 [22:24<06:40, 2.60s/it] {'loss': 1.1347, 'grad_norm': 32.31246566772461, 'learning_rate': 7.504749238082414e-08, 'fcm_dpo/beta': 0.0011582564329728484, 'fcm_dpo/q_t': 0.4208937883377075, 'fcm_dpo/delta': 0.062111612409353256, 'fcm_dpo/margin': 293.5635070800781, 'margin_dpo/margin_mean': 293.5634765625, 'margin_dpo/margin_std': 429.7330322265625, 'logps/chosen': -698.6761474609375, 'logps/rejected': -999.4535522460938, 'logps/ref_chosen': -71.12867736816406, 'logps/ref_rejected': -78.3425521850586, 'KL/chosen_KL_mean': -627.5474853515625, 'KL/rejected_KL_mean': -921.1109619140625, 'KL/mean': -774.3292236328125, 'KL/std': 518.6869506835938, 'logits/chosen': -1.083193063735962, 'logits/rejected': -1.0532429218292236, 'epoch': 0.77} + 77%|███████▋ | 527/681 [22:24<06:40, 2.60s/it] 78%|███████▊ | 528/681 [22:27<06:42, 2.63s/it] {'loss': 1.1168, 'grad_norm': 51.520660400390625, 'learning_rate': 7.413308141366254e-08, 'fcm_dpo/beta': 0.0011612444650381804, 'fcm_dpo/q_t': 0.40898245573043823, 'fcm_dpo/delta': -0.01449208240956068, 'fcm_dpo/margin': 356.39044189453125, 'margin_dpo/margin_mean': 356.39044189453125, 'margin_dpo/margin_std': 576.7999267578125, 'logps/chosen': -714.2769165039062, 'logps/rejected': -1096.488037109375, 'logps/ref_chosen': -68.0894546508789, 'logps/ref_rejected': -93.91006469726562, 'KL/chosen_KL_mean': -646.1875, 'KL/rejected_KL_mean': -1002.5779418945312, 'KL/mean': -824.3826904296875, 'KL/std': 493.030029296875, 'logits/chosen': -1.0223352909088135, 'logits/rejected': -1.0113223791122437, 'epoch': 0.78} + 78%|███████▊ | 528/681 [22:27<06:42, 2.63s/it] 78%|███████▊ | 529/681 [22:29<06:46, 2.68s/it] {'loss': 1.2374, 'grad_norm': 43.96234893798828, 'learning_rate': 7.322330470336313e-08, 'fcm_dpo/beta': 0.0011665602214634418, 'fcm_dpo/q_t': 0.4364190697669983, 'fcm_dpo/delta': 0.02131509780883789, 'fcm_dpo/margin': 237.14752197265625, 'margin_dpo/margin_mean': 237.14752197265625, 'margin_dpo/margin_std': 574.8782348632812, 'logps/chosen': -818.3095703125, 'logps/rejected': -1089.0911865234375, 'logps/ref_chosen': -55.57495880126953, 'logps/ref_rejected': -89.20909118652344, 'KL/chosen_KL_mean': -762.7345581054688, 'KL/rejected_KL_mean': -999.882080078125, 'KL/mean': -881.308349609375, 'KL/std': 446.8496398925781, 'logits/chosen': -1.0255260467529297, 'logits/rejected': -1.0359970331192017, 'epoch': 0.78} + 78%|███████▊ | 529/681 [22:29<06:46, 2.68s/it] 78%|███████▊ | 530/681 [22:32<06:49, 2.71s/it] {'loss': 1.1248, 'grad_norm': 53.588645935058594, 'learning_rate': 7.231818622338822e-08, 'fcm_dpo/beta': 0.0011577388504520059, 'fcm_dpo/q_t': 0.4014556407928467, 'fcm_dpo/delta': -0.05902961269021034, 'fcm_dpo/margin': 394.1950988769531, 'margin_dpo/margin_mean': 394.1950988769531, 'margin_dpo/margin_std': 704.854736328125, 'logps/chosen': -716.2905883789062, 'logps/rejected': -1150.1688232421875, 'logps/ref_chosen': -47.601417541503906, 'logps/ref_rejected': -87.2845230102539, 'KL/chosen_KL_mean': -668.689208984375, 'KL/rejected_KL_mean': -1062.88427734375, 'KL/mean': -865.7867431640625, 'KL/std': 562.298095703125, 'logits/chosen': -0.9486408829689026, 'logits/rejected': -0.9478579759597778, 'epoch': 0.78} + 78%|███████▊ | 530/681 [22:32<06:49, 2.71s/it] 78%|███████▊ | 531/681 [22:34<06:27, 2.58s/it] {'loss': 1.1178, 'grad_norm': 36.676822662353516, 'learning_rate': 7.141774982445147e-08, 'fcm_dpo/beta': 0.0011553821386769414, 'fcm_dpo/q_t': 0.4106895327568054, 'fcm_dpo/delta': -0.0027520228177309036, 'fcm_dpo/margin': 348.384521484375, 'margin_dpo/margin_mean': 348.3844909667969, 'margin_dpo/margin_std': 555.5078735351562, 'logps/chosen': -782.0972900390625, 'logps/rejected': -1145.841796875, 'logps/ref_chosen': -55.246063232421875, 'logps/ref_rejected': -70.60598754882812, 'KL/chosen_KL_mean': -726.8511962890625, 'KL/rejected_KL_mean': -1075.2357177734375, 'KL/mean': -901.04345703125, 'KL/std': 581.962158203125, 'logits/chosen': -1.0601121187210083, 'logits/rejected': -1.0491019487380981, 'epoch': 0.78} + 78%|███████▊ | 531/681 [22:34<06:27, 2.58s/it] 78%|███████▊ | 532/681 [22:37<06:30, 2.62s/it] {'loss': 1.1519, 'grad_norm': 76.43359375, 'learning_rate': 7.052201923388953e-08, 'fcm_dpo/beta': 0.0011431981110945344, 'fcm_dpo/q_t': 0.41110938787460327, 'fcm_dpo/delta': -0.0051701366901397705, 'fcm_dpo/margin': 353.33392333984375, 'margin_dpo/margin_mean': 353.33392333984375, 'margin_dpo/margin_std': 646.3972778320312, 'logps/chosen': -803.6498413085938, 'logps/rejected': -1173.2890625, 'logps/ref_chosen': -70.28601837158203, 'logps/ref_rejected': -86.5913314819336, 'KL/chosen_KL_mean': -733.3638305664062, 'KL/rejected_KL_mean': -1086.69775390625, 'KL/mean': -910.0308227539062, 'KL/std': 548.9955444335938, 'logits/chosen': -0.986026406288147, 'logits/rejected': -0.962155282497406, 'epoch': 0.78} + 78%|███████▊ | 532/681 [22:37<06:30, 2.62s/it] 78%|███████▊ | 533/681 [22:40<06:25, 2.60s/it] {'loss': 1.2054, 'grad_norm': 54.19781494140625, 'learning_rate': 6.963101805503646e-08, 'fcm_dpo/beta': 0.00114994659088552, 'fcm_dpo/q_t': 0.4316534399986267, 'fcm_dpo/delta': -0.009539761580526829, 'fcm_dpo/margin': 262.97039794921875, 'margin_dpo/margin_mean': 262.97039794921875, 'margin_dpo/margin_std': 563.1723022460938, 'logps/chosen': -718.072509765625, 'logps/rejected': -992.77587890625, 'logps/ref_chosen': -64.8551025390625, 'logps/ref_rejected': -76.58805847167969, 'KL/chosen_KL_mean': -653.2174072265625, 'KL/rejected_KL_mean': -916.1878051757812, 'KL/mean': -784.70263671875, 'KL/std': 470.7712707519531, 'logits/chosen': -0.9915690422058105, 'logits/rejected': -0.9716913104057312, 'epoch': 0.78} + 78%|███████▊ | 533/681 [22:40<06:25, 2.60s/it] 78%|███████▊ | 534/681 [22:42<06:22, 2.60s/it] {'loss': 1.1061, 'grad_norm': 37.44085693359375, 'learning_rate': 6.874476976660184e-08, 'fcm_dpo/beta': 0.001137340790592134, 'fcm_dpo/q_t': 0.40718841552734375, 'fcm_dpo/delta': -0.017138652503490448, 'fcm_dpo/margin': 365.05377197265625, 'margin_dpo/margin_mean': 365.0538024902344, 'margin_dpo/margin_std': 543.7841796875, 'logps/chosen': -758.4595947265625, 'logps/rejected': -1141.9375, 'logps/ref_chosen': -60.119388580322266, 'logps/ref_rejected': -78.54347229003906, 'KL/chosen_KL_mean': -698.3402099609375, 'KL/rejected_KL_mean': -1063.39404296875, 'KL/mean': -880.8671875, 'KL/std': 534.1531982421875, 'logits/chosen': -0.9751067757606506, 'logits/rejected': -0.9717357158660889, 'epoch': 0.78} + 78%|███████▊ | 534/681 [22:42<06:22, 2.60s/it] 79%|███████▊ | 535/681 [22:45<06:19, 2.60s/it] {'loss': 1.0564, 'grad_norm': 29.454992294311523, 'learning_rate': 6.786329772205246e-08, 'fcm_dpo/beta': 0.0011387758422642946, 'fcm_dpo/q_t': 0.39370042085647583, 'fcm_dpo/delta': -0.0821937620639801, 'fcm_dpo/margin': 419.6590576171875, 'margin_dpo/margin_mean': 419.6590576171875, 'margin_dpo/margin_std': 548.3145141601562, 'logps/chosen': -638.4017333984375, 'logps/rejected': -1100.0382080078125, 'logps/ref_chosen': -54.330238342285156, 'logps/ref_rejected': -96.30763244628906, 'KL/chosen_KL_mean': -584.0714721679688, 'KL/rejected_KL_mean': -1003.7305908203125, 'KL/mean': -793.9010620117188, 'KL/std': 514.93896484375, 'logits/chosen': -0.9155275821685791, 'logits/rejected': -0.9193699359893799, 'epoch': 0.79} + 79%|███████▊ | 535/681 [22:45<06:19, 2.60s/it] 79%|███████▊ | 536/681 [22:48<06:19, 2.62s/it] {'loss': 1.0258, 'grad_norm': 38.40432357788086, 'learning_rate': 6.698662514899638e-08, 'fcm_dpo/beta': 0.0011007413268089294, 'fcm_dpo/q_t': 0.3827175498008728, 'fcm_dpo/delta': -0.15751913189888, 'fcm_dpo/margin': 498.69720458984375, 'margin_dpo/margin_mean': 498.6971740722656, 'margin_dpo/margin_std': 687.29541015625, 'logps/chosen': -575.4672241210938, 'logps/rejected': -1116.181640625, 'logps/ref_chosen': -47.08053207397461, 'logps/ref_rejected': -89.09783935546875, 'KL/chosen_KL_mean': -528.38671875, 'KL/rejected_KL_mean': -1027.083740234375, 'KL/mean': -777.7352905273438, 'KL/std': 573.330322265625, 'logits/chosen': -0.9019815325737, 'logits/rejected': -0.933282732963562, 'epoch': 0.79} + 79%|███████▊ | 536/681 [22:48<06:19, 2.62s/it] 79%|███████▉ | 537/681 [22:50<06:13, 2.60s/it] {'loss': 1.1392, 'grad_norm': 42.51823425292969, 'learning_rate': 6.611477514857114e-08, 'fcm_dpo/beta': 0.0011007563443854451, 'fcm_dpo/q_t': 0.4137336313724518, 'fcm_dpo/delta': 0.021880976855754852, 'fcm_dpo/margin': 343.6505126953125, 'margin_dpo/margin_mean': 343.6505126953125, 'margin_dpo/margin_std': 572.0556640625, 'logps/chosen': -610.2826538085938, 'logps/rejected': -966.6240234375, 'logps/ref_chosen': -57.747467041015625, 'logps/ref_rejected': -70.43838500976562, 'KL/chosen_KL_mean': -552.53515625, 'KL/rejected_KL_mean': -896.1856689453125, 'KL/mean': -724.3604736328125, 'KL/std': 467.387939453125, 'logits/chosen': -0.9375029802322388, 'logits/rejected': -0.9250655770301819, 'epoch': 0.79} + 79%|███████▉ | 537/681 [22:50<06:13, 2.60s/it] 79%|███████▉ | 538/681 [22:53<06:12, 2.61s/it] {'loss': 1.0884, 'grad_norm': 32.99623489379883, 'learning_rate': 6.524777069483525e-08, 'fcm_dpo/beta': 0.0010912481229752302, 'fcm_dpo/q_t': 0.40652403235435486, 'fcm_dpo/delta': -0.011391473934054375, 'fcm_dpo/margin': 376.39959716796875, 'margin_dpo/margin_mean': 376.3995666503906, 'margin_dpo/margin_std': 509.0475158691406, 'logps/chosen': -741.4945678710938, 'logps/rejected': -1135.706298828125, 'logps/ref_chosen': -66.41594696044922, 'logps/ref_rejected': -84.22808837890625, 'KL/chosen_KL_mean': -675.07861328125, 'KL/rejected_KL_mean': -1051.478271484375, 'KL/mean': -863.2783813476562, 'KL/std': 501.026123046875, 'logits/chosen': -0.9146217107772827, 'logits/rejected': -0.9003403186798096, 'epoch': 0.79} + 79%|███████▉ | 538/681 [22:53<06:12, 2.61s/it] 79%|███████▉ | 539/681 [22:55<06:14, 2.63s/it] {'loss': 1.0922, 'grad_norm': 36.090091705322266, 'learning_rate': 6.438563463416221e-08, 'fcm_dpo/beta': 0.0010967530542984605, 'fcm_dpo/q_t': 0.4094918370246887, 'fcm_dpo/delta': 0.004480024799704552, 'fcm_dpo/margin': 360.6948547363281, 'margin_dpo/margin_mean': 360.6948547363281, 'margin_dpo/margin_std': 470.793212890625, 'logps/chosen': -635.21337890625, 'logps/rejected': -1029.269287109375, 'logps/ref_chosen': -58.492855072021484, 'logps/ref_rejected': -91.85395050048828, 'KL/chosen_KL_mean': -576.7205810546875, 'KL/rejected_KL_mean': -937.4154052734375, 'KL/mean': -757.0679931640625, 'KL/std': 426.46710205078125, 'logits/chosen': -0.9449999332427979, 'logits/rejected': -0.9356608390808105, 'epoch': 0.79} + 79%|███████▉ | 539/681 [22:55<06:14, 2.63s/it] 79%|███████▉ | 540/681 [22:58<06:09, 2.62s/it] {'loss': 1.052, 'grad_norm': 34.88587951660156, 'learning_rate': 6.352838968463919e-08, 'fcm_dpo/beta': 0.001079935347661376, 'fcm_dpo/q_t': 0.3896099925041199, 'fcm_dpo/delta': -0.1131967157125473, 'fcm_dpo/margin': 470.0218811035156, 'margin_dpo/margin_mean': 470.0218505859375, 'margin_dpo/margin_std': 641.974853515625, 'logps/chosen': -648.696044921875, 'logps/rejected': -1171.665283203125, 'logps/ref_chosen': -63.482513427734375, 'logps/ref_rejected': -116.42999267578125, 'KL/chosen_KL_mean': -585.2135620117188, 'KL/rejected_KL_mean': -1055.2353515625, 'KL/mean': -820.2244262695312, 'KL/std': 539.0792236328125, 'logits/chosen': -0.9021656513214111, 'logits/rejected': -0.9275361895561218, 'epoch': 0.79} + 79%|███████▉ | 540/681 [22:58<06:09, 2.62s/it] 79%|███████▉ | 541/681 [23:00<05:57, 2.55s/it] {'loss': 1.2295, 'grad_norm': 53.53697204589844, 'learning_rate': 6.267605843546767e-08, 'fcm_dpo/beta': 0.0010710853384807706, 'fcm_dpo/q_t': 0.4407821297645569, 'fcm_dpo/delta': 0.004636428784579039, 'fcm_dpo/margin': 243.10484313964844, 'margin_dpo/margin_mean': 243.10482788085938, 'margin_dpo/margin_std': 556.22802734375, 'logps/chosen': -769.9769897460938, 'logps/rejected': -1038.0751953125, 'logps/ref_chosen': -78.28036499023438, 'logps/ref_rejected': -103.273681640625, 'KL/chosen_KL_mean': -691.6966552734375, 'KL/rejected_KL_mean': -934.8014526367188, 'KL/mean': -813.2490234375, 'KL/std': 476.3492431640625, 'logits/chosen': -1.006117343902588, 'logits/rejected': -1.0002844333648682, 'epoch': 0.79} + 79%|███████▉ | 541/681 [23:00<05:57, 2.55s/it] 80%|███████▉ | 542/681 [23:03<06:05, 2.63s/it] {'loss': 1.0568, 'grad_norm': 52.26215362548828, 'learning_rate': 6.182866334636888e-08, 'fcm_dpo/beta': 0.001048381207510829, 'fcm_dpo/q_t': 0.3913062810897827, 'fcm_dpo/delta': -0.09220831096172333, 'fcm_dpo/margin': 463.4676208496094, 'margin_dpo/margin_mean': 463.46759033203125, 'margin_dpo/margin_std': 619.2740478515625, 'logps/chosen': -643.865234375, 'logps/rejected': -1146.3228759765625, 'logps/ref_chosen': -57.48497009277344, 'logps/ref_rejected': -96.47506713867188, 'KL/chosen_KL_mean': -586.3802490234375, 'KL/rejected_KL_mean': -1049.8477783203125, 'KL/mean': -818.114013671875, 'KL/std': 528.82470703125, 'logits/chosen': -0.9827414751052856, 'logits/rejected': -1.0148510932922363, 'epoch': 0.8} + 80%|███████▉ | 542/681 [23:03<06:05, 2.63s/it] 80%|███████▉ | 543/681 [23:06<05:59, 2.61s/it] {'loss': 1.2008, 'grad_norm': 37.28949737548828, 'learning_rate': 6.098622674699147e-08, 'fcm_dpo/beta': 0.0010581112001091242, 'fcm_dpo/q_t': 0.4326293468475342, 'fcm_dpo/delta': 0.05605652183294296, 'fcm_dpo/margin': 326.9117431640625, 'margin_dpo/margin_mean': 326.91180419921875, 'margin_dpo/margin_std': 747.7957763671875, 'logps/chosen': -732.422607421875, 'logps/rejected': -1104.3157958984375, 'logps/ref_chosen': -60.61750793457031, 'logps/ref_rejected': -105.59896850585938, 'KL/chosen_KL_mean': -671.8050537109375, 'KL/rejected_KL_mean': -998.716796875, 'KL/mean': -835.260986328125, 'KL/std': 636.306640625, 'logits/chosen': -0.9363719820976257, 'logits/rejected': -0.9664100408554077, 'epoch': 0.8} + 80%|███████▉ | 543/681 [23:06<05:59, 2.61s/it] 80%|███████▉ | 544/681 [23:08<05:54, 2.59s/it] {'loss': 1.1033, 'grad_norm': 34.128662109375, 'learning_rate': 6.01487708363232e-08, 'fcm_dpo/beta': 0.0010604651179164648, 'fcm_dpo/q_t': 0.4069485068321228, 'fcm_dpo/delta': -0.011842611245810986, 'fcm_dpo/margin': 387.8731994628906, 'margin_dpo/margin_mean': 387.8731994628906, 'margin_dpo/margin_std': 581.0994262695312, 'logps/chosen': -731.4625244140625, 'logps/rejected': -1160.648193359375, 'logps/ref_chosen': -59.642303466796875, 'logps/ref_rejected': -100.95469665527344, 'KL/chosen_KL_mean': -671.8202514648438, 'KL/rejected_KL_mean': -1059.6934814453125, 'KL/mean': -865.7568359375, 'KL/std': 506.9651794433594, 'logits/chosen': -0.9132235050201416, 'logits/rejected': -0.9313616752624512, 'epoch': 0.8} + 80%|███████▉ | 544/681 [23:08<05:54, 2.59s/it] 80%|████████ | 545/681 [23:11<05:49, 2.57s/it] {'loss': 1.0465, 'grad_norm': 40.978694915771484, 'learning_rate': 5.9316317682106294e-08, 'fcm_dpo/beta': 0.001048812409862876, 'fcm_dpo/q_t': 0.39258188009262085, 'fcm_dpo/delta': -0.0808180570602417, 'fcm_dpo/margin': 454.80206298828125, 'margin_dpo/margin_mean': 454.80206298828125, 'margin_dpo/margin_std': 570.3173828125, 'logps/chosen': -678.5577392578125, 'logps/rejected': -1161.619140625, 'logps/ref_chosen': -67.64859771728516, 'logps/ref_rejected': -95.90800476074219, 'KL/chosen_KL_mean': -610.9091796875, 'KL/rejected_KL_mean': -1065.711181640625, 'KL/mean': -838.3101806640625, 'KL/std': 501.88525390625, 'logits/chosen': -0.8520915508270264, 'logits/rejected': -0.8862226009368896, 'epoch': 0.8} + 80%|████████ | 545/681 [23:11<05:49, 2.57s/it] 80%|████████ | 546/681 [23:13<05:46, 2.56s/it] {'loss': 1.1611, 'grad_norm': 33.9541015625, 'learning_rate': 5.848888922025552e-08, 'fcm_dpo/beta': 0.0010577274952083826, 'fcm_dpo/q_t': 0.42593374848365784, 'fcm_dpo/delta': 0.08934411406517029, 'fcm_dpo/margin': 296.3592529296875, 'margin_dpo/margin_mean': 296.3592529296875, 'margin_dpo/margin_std': 485.8372802734375, 'logps/chosen': -638.8780517578125, 'logps/rejected': -966.3592529296875, 'logps/ref_chosen': -50.744232177734375, 'logps/ref_rejected': -81.86622619628906, 'KL/chosen_KL_mean': -588.1337890625, 'KL/rejected_KL_mean': -884.4930419921875, 'KL/mean': -736.3134155273438, 'KL/std': 446.0101013183594, 'logits/chosen': -0.9163818359375, 'logits/rejected': -0.9050056338310242, 'epoch': 0.8} + 80%|████████ | 546/681 [23:13<05:46, 2.56s/it] 80%|████████ | 547/681 [23:16<05:50, 2.61s/it] {'loss': 1.0896, 'grad_norm': 40.24742126464844, 'learning_rate': 5.7666507254280265e-08, 'fcm_dpo/beta': 0.0010618357919156551, 'fcm_dpo/q_t': 0.4060874581336975, 'fcm_dpo/delta': -0.016077794134616852, 'fcm_dpo/margin': 391.194091796875, 'margin_dpo/margin_mean': 391.194091796875, 'margin_dpo/margin_std': 540.9904174804688, 'logps/chosen': -663.6678466796875, 'logps/rejected': -1071.9356689453125, 'logps/ref_chosen': -73.6877212524414, 'logps/ref_rejected': -90.76136779785156, 'KL/chosen_KL_mean': -589.9801635742188, 'KL/rejected_KL_mean': -981.17431640625, 'KL/mean': -785.5772094726562, 'KL/std': 491.4225769042969, 'logits/chosen': -0.8674280643463135, 'logits/rejected': -0.881703794002533, 'epoch': 0.8} + 80%|████████ | 547/681 [23:16<05:50, 2.61s/it] 80%|████████ | 548/681 [23:19<05:43, 2.58s/it] {'loss': 1.1266, 'grad_norm': 31.893949508666992, 'learning_rate': 5.684919345471029e-08, 'fcm_dpo/beta': 0.00106256443541497, 'fcm_dpo/q_t': 0.41794323921203613, 'fcm_dpo/delta': 0.021664846688508987, 'fcm_dpo/margin': 356.8335266113281, 'margin_dpo/margin_mean': 356.8335266113281, 'margin_dpo/margin_std': 595.5147094726562, 'logps/chosen': -687.3170166015625, 'logps/rejected': -1073.022216796875, 'logps/ref_chosen': -65.24634552001953, 'logps/ref_rejected': -94.11807250976562, 'KL/chosen_KL_mean': -622.0706787109375, 'KL/rejected_KL_mean': -978.9041748046875, 'KL/mean': -800.4874267578125, 'KL/std': 532.3212280273438, 'logits/chosen': -0.9592008590698242, 'logits/rejected': -0.9574205875396729, 'epoch': 0.8} + 80%|████████ | 548/681 [23:19<05:43, 2.58s/it] 81%|████████ | 549/681 [23:21<05:37, 2.56s/it] {'loss': 1.1934, 'grad_norm': 56.424461364746094, 'learning_rate': 5.603696935852426e-08, 'fcm_dpo/beta': 0.0010651289485394955, 'fcm_dpo/q_t': 0.4341329336166382, 'fcm_dpo/delta': 0.011987905949354172, 'fcm_dpo/margin': 274.5236511230469, 'margin_dpo/margin_mean': 274.52362060546875, 'margin_dpo/margin_std': 537.4019775390625, 'logps/chosen': -697.5167236328125, 'logps/rejected': -996.7384033203125, 'logps/ref_chosen': -49.21235656738281, 'logps/ref_rejected': -73.91031646728516, 'KL/chosen_KL_mean': -648.3043823242188, 'KL/rejected_KL_mean': -922.8280639648438, 'KL/mean': -785.566162109375, 'KL/std': 443.6226501464844, 'logits/chosen': -0.9499194622039795, 'logits/rejected': -0.94138503074646, 'epoch': 0.81} + 81%|████████ | 549/681 [23:21<05:37, 2.56s/it] 81%|████████ | 550/681 [23:24<05:37, 2.57s/it] {'loss': 1.1307, 'grad_norm': 34.55923843383789, 'learning_rate': 5.5229856368582376e-08, 'fcm_dpo/beta': 0.0010721642756834626, 'fcm_dpo/q_t': 0.41808733344078064, 'fcm_dpo/delta': 0.04264108091592789, 'fcm_dpo/margin': 334.7535095214844, 'margin_dpo/margin_mean': 334.7535400390625, 'margin_dpo/margin_std': 521.0529174804688, 'logps/chosen': -705.7626953125, 'logps/rejected': -1078.8350830078125, 'logps/ref_chosen': -56.80695343017578, 'logps/ref_rejected': -95.12580871582031, 'KL/chosen_KL_mean': -648.9557495117188, 'KL/rejected_KL_mean': -983.709228515625, 'KL/mean': -816.33251953125, 'KL/std': 498.76806640625, 'logits/chosen': -0.898378849029541, 'logits/rejected': -0.9233511686325073, 'epoch': 0.81} + 81%|████████ | 550/681 [23:24<05:37, 2.57s/it] 81%|████████ | 551/681 [23:26<05:29, 2.53s/it] {'loss': 0.9675, 'grad_norm': 50.58546447753906, 'learning_rate': 5.4427875753062734e-08, 'fcm_dpo/beta': 0.0010456846794113517, 'fcm_dpo/q_t': 0.37006914615631104, 'fcm_dpo/delta': -0.17770320177078247, 'fcm_dpo/margin': 542.4798583984375, 'margin_dpo/margin_mean': 542.4798583984375, 'margin_dpo/margin_std': 536.1229858398438, 'logps/chosen': -619.71337890625, 'logps/rejected': -1214.759765625, 'logps/ref_chosen': -59.10633087158203, 'logps/ref_rejected': -111.67280578613281, 'KL/chosen_KL_mean': -560.6070556640625, 'KL/rejected_KL_mean': -1103.0869140625, 'KL/mean': -831.8469848632812, 'KL/std': 532.996337890625, 'logits/chosen': -0.8850421905517578, 'logits/rejected': -0.9424214363098145, 'epoch': 0.81} + 81%|████████ | 551/681 [23:26<05:29, 2.53s/it] 81%|████████ | 552/681 [23:28<05:17, 2.46s/it] {'loss': 0.9761, 'grad_norm': 51.7211799621582, 'learning_rate': 5.363104864490034e-08, 'fcm_dpo/beta': 0.0009956832509487867, 'fcm_dpo/q_t': 0.36827754974365234, 'fcm_dpo/delta': -0.21994295716285706, 'fcm_dpo/margin': 605.8824462890625, 'margin_dpo/margin_mean': 605.8823852539062, 'margin_dpo/margin_std': 696.2559814453125, 'logps/chosen': -608.7723999023438, 'logps/rejected': -1256.8623046875, 'logps/ref_chosen': -62.35459899902344, 'logps/ref_rejected': -104.56210327148438, 'KL/chosen_KL_mean': -546.4177856445312, 'KL/rejected_KL_mean': -1152.300048828125, 'KL/mean': -849.3590087890625, 'KL/std': 611.6688232421875, 'logits/chosen': -0.9471904039382935, 'logits/rejected': -0.9889096021652222, 'epoch': 0.81} + 81%|████████ | 552/681 [23:29<05:17, 2.46s/it] 81%|████████ | 553/681 [23:31<05:22, 2.52s/it] {'loss': 1.1778, 'grad_norm': 25.391754150390625, 'learning_rate': 5.2839396041230415e-08, 'fcm_dpo/beta': 0.0010049683041870594, 'fcm_dpo/q_t': 0.4325829744338989, 'fcm_dpo/delta': 0.0984039306640625, 'fcm_dpo/margin': 303.19573974609375, 'margin_dpo/margin_mean': 303.1957702636719, 'margin_dpo/margin_std': 562.311279296875, 'logps/chosen': -695.291748046875, 'logps/rejected': -1028.325927734375, 'logps/ref_chosen': -68.25881958007812, 'logps/ref_rejected': -98.0971450805664, 'KL/chosen_KL_mean': -627.032958984375, 'KL/rejected_KL_mean': -930.2286987304688, 'KL/mean': -778.630859375, 'KL/std': 506.2027587890625, 'logits/chosen': -0.9180362224578857, 'logits/rejected': -0.9116028547286987, 'epoch': 0.81} + 81%|████████ | 553/681 [23:31<05:22, 2.52s/it] 81%|████████▏ | 554/681 [23:34<05:28, 2.58s/it] {'loss': 1.1142, 'grad_norm': 55.65289306640625, 'learning_rate': 5.205293880283551e-08, 'fcm_dpo/beta': 0.001014210982248187, 'fcm_dpo/q_t': 0.40570682287216187, 'fcm_dpo/delta': -0.034694697707891464, 'fcm_dpo/margin': 426.5704040527344, 'margin_dpo/margin_mean': 426.5704345703125, 'margin_dpo/margin_std': 692.4230346679688, 'logps/chosen': -709.7548217773438, 'logps/rejected': -1158.1602783203125, 'logps/ref_chosen': -67.94767761230469, 'logps/ref_rejected': -89.78272247314453, 'KL/chosen_KL_mean': -641.80712890625, 'KL/rejected_KL_mean': -1068.3775634765625, 'KL/mean': -855.0924072265625, 'KL/std': 544.1128540039062, 'logits/chosen': -0.9369876980781555, 'logits/rejected': -0.9231326580047607, 'epoch': 0.81} + 81%|████████▏ | 554/681 [23:34<05:28, 2.58s/it] 81%|████████▏ | 555/681 [23:36<05:19, 2.54s/it] {'loss': 1.0769, 'grad_norm': 36.912017822265625, 'learning_rate': 5.127169765359515e-08, 'fcm_dpo/beta': 0.0009933705441653728, 'fcm_dpo/q_t': 0.3950349688529968, 'fcm_dpo/delta': -0.09272074699401855, 'fcm_dpo/margin': 491.5249938964844, 'margin_dpo/margin_mean': 491.5250244140625, 'margin_dpo/margin_std': 748.8701171875, 'logps/chosen': -710.472900390625, 'logps/rejected': -1257.146728515625, 'logps/ref_chosen': -53.33049011230469, 'logps/ref_rejected': -108.47937774658203, 'KL/chosen_KL_mean': -657.1424560546875, 'KL/rejected_KL_mean': -1148.66748046875, 'KL/mean': -902.9049072265625, 'KL/std': 580.0269165039062, 'logits/chosen': -0.9536832571029663, 'logits/rejected': -1.00516676902771, 'epoch': 0.81} + 81%|████████▏ | 555/681 [23:36<05:19, 2.54s/it] 82%|████████▏ | 556/681 [23:39<05:23, 2.59s/it] {'loss': 1.1549, 'grad_norm': 37.66986083984375, 'learning_rate': 5.049569317994012e-08, 'fcm_dpo/beta': 0.0010029294062405825, 'fcm_dpo/q_t': 0.43013256788253784, 'fcm_dpo/delta': 0.10188616812229156, 'fcm_dpo/margin': 300.2801513671875, 'margin_dpo/margin_mean': 300.2801513671875, 'margin_dpo/margin_std': 445.45648193359375, 'logps/chosen': -690.203369140625, 'logps/rejected': -1033.179443359375, 'logps/ref_chosen': -58.64447021484375, 'logps/ref_rejected': -101.34040832519531, 'KL/chosen_KL_mean': -631.5589599609375, 'KL/rejected_KL_mean': -931.8389892578125, 'KL/mean': -781.698974609375, 'KL/std': 445.9133605957031, 'logits/chosen': -0.9434751272201538, 'logits/rejected': -0.9371851086616516, 'epoch': 0.82} + 82%|████████▏ | 556/681 [23:39<05:23, 2.59s/it] 82%|████████▏ | 557/681 [23:42<05:22, 2.60s/it] {'loss': 1.107, 'grad_norm': 59.12196731567383, 'learning_rate': 4.9724945830310144e-08, 'fcm_dpo/beta': 0.00100015162024647, 'fcm_dpo/q_t': 0.4047701060771942, 'fcm_dpo/delta': -0.03522220626473427, 'fcm_dpo/margin': 433.4267883300781, 'margin_dpo/margin_mean': 433.4267883300781, 'margin_dpo/margin_std': 684.0028076171875, 'logps/chosen': -749.1619873046875, 'logps/rejected': -1224.687744140625, 'logps/ref_chosen': -67.84066009521484, 'logps/ref_rejected': -109.93965911865234, 'KL/chosen_KL_mean': -681.3212890625, 'KL/rejected_KL_mean': -1114.748046875, 'KL/mean': -898.03466796875, 'KL/std': 606.8817749023438, 'logits/chosen': -0.974113404750824, 'logits/rejected': -1.007190227508545, 'epoch': 0.82} + 82%|████████▏ | 557/681 [23:42<05:22, 2.60s/it] 82%|████████▏ | 558/681 [23:44<05:17, 2.58s/it] {'loss': 0.9754, 'grad_norm': 32.32392120361328, 'learning_rate': 4.8959475914614554e-08, 'fcm_dpo/beta': 0.0009742493275552988, 'fcm_dpo/q_t': 0.36719027161598206, 'fcm_dpo/delta': -0.18480078876018524, 'fcm_dpo/margin': 589.447265625, 'margin_dpo/margin_mean': 589.447265625, 'margin_dpo/margin_std': 615.0665283203125, 'logps/chosen': -667.9453125, 'logps/rejected': -1297.185302734375, 'logps/ref_chosen': -62.36824035644531, 'logps/ref_rejected': -102.16102600097656, 'KL/chosen_KL_mean': -605.5770263671875, 'KL/rejected_KL_mean': -1195.0242919921875, 'KL/mean': -900.3006591796875, 'KL/std': 560.5341186523438, 'logits/chosen': -0.9915221929550171, 'logits/rejected': -1.0101500749588013, 'epoch': 0.82} + 82%|████████▏ | 558/681 [23:44<05:17, 2.58s/it] 82%|████████▏ | 559/681 [23:47<05:14, 2.58s/it] {'loss': 1.0501, 'grad_norm': 28.45130157470703, 'learning_rate': 4.8199303603697614e-08, 'fcm_dpo/beta': 0.0009549415553919971, 'fcm_dpo/q_t': 0.3929804563522339, 'fcm_dpo/delta': -0.0784287303686142, 'fcm_dpo/margin': 497.1776428222656, 'margin_dpo/margin_mean': 497.17767333984375, 'margin_dpo/margin_std': 634.4929809570312, 'logps/chosen': -760.2862548828125, 'logps/rejected': -1290.15380859375, 'logps/ref_chosen': -60.752323150634766, 'logps/ref_rejected': -93.44229125976562, 'KL/chosen_KL_mean': -699.533935546875, 'KL/rejected_KL_mean': -1196.711669921875, 'KL/mean': -948.122802734375, 'KL/std': 582.9520263671875, 'logits/chosen': -1.1178151369094849, 'logits/rejected': -1.1277766227722168, 'epoch': 0.82} + 82%|████████▏ | 559/681 [23:47<05:14, 2.58s/it] 82%|████████▏ | 560/681 [23:50<05:18, 2.63s/it] {'loss': 1.1422, 'grad_norm': 31.18995475769043, 'learning_rate': 4.7444448928806615e-08, 'fcm_dpo/beta': 0.0009533166885375977, 'fcm_dpo/q_t': 0.42201805114746094, 'fcm_dpo/delta': 0.06086999550461769, 'fcm_dpo/margin': 357.6928405761719, 'margin_dpo/margin_mean': 357.69287109375, 'margin_dpo/margin_std': 554.28125, 'logps/chosen': -684.1489868164062, 'logps/rejected': -1063.729248046875, 'logps/ref_chosen': -58.10382080078125, 'logps/ref_rejected': -79.99122619628906, 'KL/chosen_KL_mean': -626.045166015625, 'KL/rejected_KL_mean': -983.7379760742188, 'KL/mean': -804.8916015625, 'KL/std': 515.1402587890625, 'logits/chosen': -0.8733669519424438, 'logits/rejected': -0.856816828250885, 'epoch': 0.82} + 82%|████████▏ | 560/681 [23:50<05:18, 2.63s/it] 82%|████████▏ | 561/681 [23:52<05:09, 2.58s/it] {'loss': 1.208, 'grad_norm': 41.6833610534668, 'learning_rate': 4.669493178106432e-08, 'fcm_dpo/beta': 0.0009805468143895268, 'fcm_dpo/q_t': 0.43384015560150146, 'fcm_dpo/delta': 0.11781884729862213, 'fcm_dpo/margin': 290.80755615234375, 'margin_dpo/margin_mean': 290.8075256347656, 'margin_dpo/margin_std': 622.505615234375, 'logps/chosen': -785.6019287109375, 'logps/rejected': -1124.565185546875, 'logps/ref_chosen': -50.912879943847656, 'logps/ref_rejected': -99.06856536865234, 'KL/chosen_KL_mean': -734.6890258789062, 'KL/rejected_KL_mean': -1025.49658203125, 'KL/mean': -880.0928344726562, 'KL/std': 505.81610107421875, 'logits/chosen': -1.0193910598754883, 'logits/rejected': -1.037698745727539, 'epoch': 0.82} + 82%|████████▏ | 561/681 [23:52<05:09, 2.58s/it] 83%|████████▎ | 562/681 [23:55<05:10, 2.61s/it] {'loss': 1.1004, 'grad_norm': 40.292320251464844, 'learning_rate': 4.5950771910944596e-08, 'fcm_dpo/beta': 0.0009777405066415668, 'fcm_dpo/q_t': 0.40837323665618896, 'fcm_dpo/delta': -0.007167506963014603, 'fcm_dpo/margin': 415.52349853515625, 'margin_dpo/margin_mean': 415.52349853515625, 'margin_dpo/margin_std': 602.928955078125, 'logps/chosen': -749.496337890625, 'logps/rejected': -1202.09814453125, 'logps/ref_chosen': -59.46440124511719, 'logps/ref_rejected': -96.54266357421875, 'KL/chosen_KL_mean': -690.031982421875, 'KL/rejected_KL_mean': -1105.555419921875, 'KL/mean': -897.793701171875, 'KL/std': 561.4027099609375, 'logits/chosen': -1.024315357208252, 'logits/rejected': -1.0398998260498047, 'epoch': 0.83} + 83%|████████▎ | 562/681 [23:55<05:10, 2.61s/it] 83%|████████▎ | 563/681 [23:57<05:02, 2.56s/it] {'loss': 1.2111, 'grad_norm': 40.284427642822266, 'learning_rate': 4.521198892775202e-08, 'fcm_dpo/beta': 0.0009785356232896447, 'fcm_dpo/q_t': 0.4237578213214874, 'fcm_dpo/delta': -0.04188579320907593, 'fcm_dpo/margin': 322.9136657714844, 'margin_dpo/margin_mean': 322.9136657714844, 'margin_dpo/margin_std': 696.208251953125, 'logps/chosen': -812.5322265625, 'logps/rejected': -1169.405517578125, 'logps/ref_chosen': -60.60819625854492, 'logps/ref_rejected': -94.56770324707031, 'KL/chosen_KL_mean': -751.924072265625, 'KL/rejected_KL_mean': -1074.837890625, 'KL/mean': -913.3809204101562, 'KL/std': 594.7562255859375, 'logits/chosen': -1.0153368711471558, 'logits/rejected': -1.0244905948638916, 'epoch': 0.83} + 83%|████████▎ | 563/681 [23:57<05:02, 2.56s/it] 83%|████████▎ | 564/681 [24:00<04:56, 2.54s/it] {'loss': 1.0964, 'grad_norm': 40.814979553222656, 'learning_rate': 4.447860229910544e-08, 'fcm_dpo/beta': 0.0009786732262000442, 'fcm_dpo/q_t': 0.41156822443008423, 'fcm_dpo/delta': 0.01432707067579031, 'fcm_dpo/margin': 394.514404296875, 'margin_dpo/margin_mean': 394.514404296875, 'margin_dpo/margin_std': 508.09417724609375, 'logps/chosen': -760.5452880859375, 'logps/rejected': -1174.029541015625, 'logps/ref_chosen': -74.26837921142578, 'logps/ref_rejected': -93.23818969726562, 'KL/chosen_KL_mean': -686.2769775390625, 'KL/rejected_KL_mean': -1080.7913818359375, 'KL/mean': -883.5341796875, 'KL/std': 529.4177856445312, 'logits/chosen': -1.0610636472702026, 'logits/rejected': -1.053609013557434, 'epoch': 0.83} + 83%|████████▎ | 564/681 [24:00<04:56, 2.54s/it] 83%|████████▎ | 565/681 [24:02<04:57, 2.56s/it] {'loss': 1.127, 'grad_norm': 42.958003997802734, 'learning_rate': 4.375063135042445e-08, 'fcm_dpo/beta': 0.0009748205775395036, 'fcm_dpo/q_t': 0.41027140617370605, 'fcm_dpo/delta': -0.015806902199983597, 'fcm_dpo/margin': 425.871826171875, 'margin_dpo/margin_mean': 425.871826171875, 'margin_dpo/margin_std': 730.54736328125, 'logps/chosen': -785.214111328125, 'logps/rejected': -1227.844970703125, 'logps/ref_chosen': -69.0199203491211, 'logps/ref_rejected': -85.7789306640625, 'KL/chosen_KL_mean': -716.1942138671875, 'KL/rejected_KL_mean': -1142.0660400390625, 'KL/mean': -929.130126953125, 'KL/std': 609.8633422851562, 'logits/chosen': -0.9631332159042358, 'logits/rejected': -0.9660812616348267, 'epoch': 0.83} + 83%|████████▎ | 565/681 [24:02<04:57, 2.56s/it] 83%|████████▎ | 566/681 [24:05<05:00, 2.61s/it] {'loss': 1.0997, 'grad_norm': 32.570377349853516, 'learning_rate': 4.3028095264420525e-08, 'fcm_dpo/beta': 0.000970390741713345, 'fcm_dpo/q_t': 0.3978080153465271, 'fcm_dpo/delta': -0.06614132225513458, 'fcm_dpo/margin': 476.88720703125, 'margin_dpo/margin_mean': 476.88720703125, 'margin_dpo/margin_std': 747.5025634765625, 'logps/chosen': -773.6256103515625, 'logps/rejected': -1287.8367919921875, 'logps/ref_chosen': -66.5453109741211, 'logps/ref_rejected': -103.86932373046875, 'KL/chosen_KL_mean': -707.080322265625, 'KL/rejected_KL_mean': -1183.967529296875, 'KL/mean': -945.5238647460938, 'KL/std': 644.3311767578125, 'logits/chosen': -1.0035604238510132, 'logits/rejected': -1.0268689393997192, 'epoch': 0.83} + 83%|████████▎ | 566/681 [24:05<05:00, 2.61s/it] 83%|████████▎ | 567/681 [24:07<04:53, 2.58s/it] {'loss': 1.1145, 'grad_norm': 42.263118743896484, 'learning_rate': 4.231101308059165e-08, 'fcm_dpo/beta': 0.000967850093729794, 'fcm_dpo/q_t': 0.4172418713569641, 'fcm_dpo/delta': 0.048623181879520416, 'fcm_dpo/margin': 364.84228515625, 'margin_dpo/margin_mean': 364.8423156738281, 'margin_dpo/margin_std': 478.48101806640625, 'logps/chosen': -707.1751708984375, 'logps/rejected': -1104.5302734375, 'logps/ref_chosen': -52.85829544067383, 'logps/ref_rejected': -85.37095642089844, 'KL/chosen_KL_mean': -654.31689453125, 'KL/rejected_KL_mean': -1019.1592407226562, 'KL/mean': -836.738037109375, 'KL/std': 439.9637756347656, 'logits/chosen': -1.077162504196167, 'logits/rejected': -1.0844173431396484, 'epoch': 0.83} + 83%|████████▎ | 567/681 [24:07<04:53, 2.58s/it] 83%|████████▎ | 568/681 [24:10<04:45, 2.53s/it] {'loss': 1.0371, 'grad_norm': 30.015487670898438, 'learning_rate': 4.1599403694720145e-08, 'fcm_dpo/beta': 0.0009604596998542547, 'fcm_dpo/q_t': 0.3923587203025818, 'fcm_dpo/delta': -0.06550342589616776, 'fcm_dpo/margin': 481.43841552734375, 'margin_dpo/margin_mean': 481.43841552734375, 'margin_dpo/margin_std': 534.50341796875, 'logps/chosen': -694.0675048828125, 'logps/rejected': -1219.40576171875, 'logps/ref_chosen': -45.1923828125, 'logps/ref_rejected': -89.09236907958984, 'KL/chosen_KL_mean': -648.8751220703125, 'KL/rejected_KL_mean': -1130.3134765625, 'KL/mean': -889.5942993164062, 'KL/std': 514.7445068359375, 'logits/chosen': -1.0153778791427612, 'logits/rejected': -1.0532267093658447, 'epoch': 0.83} + 83%|████████▎ | 568/681 [24:10<04:45, 2.53s/it] 84%|████████▎ | 569/681 [24:12<04:46, 2.56s/it] {'loss': 1.134, 'grad_norm': 49.2674446105957, 'learning_rate': 4.089328585837512e-08, 'fcm_dpo/beta': 0.0009647671831771731, 'fcm_dpo/q_t': 0.410520076751709, 'fcm_dpo/delta': 0.00415463000535965, 'fcm_dpo/margin': 409.6417236328125, 'margin_dpo/margin_mean': 409.6417236328125, 'margin_dpo/margin_std': 681.7449951171875, 'logps/chosen': -808.2116088867188, 'logps/rejected': -1233.236083984375, 'logps/ref_chosen': -63.72056198120117, 'logps/ref_rejected': -79.10325622558594, 'KL/chosen_KL_mean': -744.4910278320312, 'KL/rejected_KL_mean': -1154.1328125, 'KL/mean': -949.3118896484375, 'KL/std': 672.5451049804688, 'logits/chosen': -1.025818109512329, 'logits/rejected': -1.0322705507278442, 'epoch': 0.84} + 84%|████████▎ | 569/681 [24:12<04:46, 2.56s/it] 84%|████████▎ | 570/681 [24:15<04:45, 2.57s/it] {'loss': 1.1329, 'grad_norm': 29.400341033935547, 'learning_rate': 4.019267817841834e-08, 'fcm_dpo/beta': 0.0009631971479393542, 'fcm_dpo/q_t': 0.4191049039363861, 'fcm_dpo/delta': 0.040986284613609314, 'fcm_dpo/margin': 374.2774963378906, 'margin_dpo/margin_mean': 374.2774963378906, 'margin_dpo/margin_std': 585.3759155273438, 'logps/chosen': -749.0067749023438, 'logps/rejected': -1143.8115234375, 'logps/ref_chosen': -61.61454391479492, 'logps/ref_rejected': -82.14186096191406, 'KL/chosen_KL_mean': -687.3922119140625, 'KL/rejected_KL_mean': -1061.669677734375, 'KL/mean': -874.5309448242188, 'KL/std': 541.8402709960938, 'logits/chosen': -1.1264129877090454, 'logits/rejected': -1.1218767166137695, 'epoch': 0.84} + 84%|████████▎ | 570/681 [24:15<04:45, 2.57s/it] 84%|████████▍ | 571/681 [24:18<04:43, 2.58s/it] {'loss': 1.1022, 'grad_norm': 41.382713317871094, 'learning_rate': 3.9497599116513705e-08, 'fcm_dpo/beta': 0.0009610787965357304, 'fcm_dpo/q_t': 0.40491753816604614, 'fcm_dpo/delta': -0.02597730979323387, 'fcm_dpo/margin': 441.94012451171875, 'margin_dpo/margin_mean': 441.94012451171875, 'margin_dpo/margin_std': 674.6705322265625, 'logps/chosen': -769.42578125, 'logps/rejected': -1249.648681640625, 'logps/ref_chosen': -53.05406188964844, 'logps/ref_rejected': -91.33682250976562, 'KL/chosen_KL_mean': -716.3717041015625, 'KL/rejected_KL_mean': -1158.3118896484375, 'KL/mean': -937.341796875, 'KL/std': 546.9036865234375, 'logits/chosen': -1.0316365957260132, 'logits/rejected': -1.0433616638183594, 'epoch': 0.84} + 84%|████████▍ | 571/681 [24:18<04:43, 2.58s/it] 84%|████████▍ | 572/681 [24:20<04:34, 2.52s/it] {'loss': 1.1089, 'grad_norm': 28.851036071777344, 'learning_rate': 3.880806698864086e-08, 'fcm_dpo/beta': 0.000953345384914428, 'fcm_dpo/q_t': 0.4049929678440094, 'fcm_dpo/delta': -0.04224724695086479, 'fcm_dpo/margin': 461.7014465332031, 'margin_dpo/margin_mean': 461.7014465332031, 'margin_dpo/margin_std': 761.991943359375, 'logps/chosen': -801.095947265625, 'logps/rejected': -1297.89501953125, 'logps/ref_chosen': -48.45928955078125, 'logps/ref_rejected': -83.55703735351562, 'KL/chosen_KL_mean': -752.6365966796875, 'KL/rejected_KL_mean': -1214.3380126953125, 'KL/mean': -983.4873046875, 'KL/std': 640.98876953125, 'logits/chosen': -1.074343204498291, 'logits/rejected': -1.1045624017715454, 'epoch': 0.84} + 84%|████████▍ | 572/681 [24:20<04:34, 2.52s/it] 84%|████████▍ | 573/681 [24:22<04:23, 2.44s/it] {'loss': 1.108, 'grad_norm': 25.593852996826172, 'learning_rate': 3.812409996461275e-08, 'fcm_dpo/beta': 0.0009574309224262834, 'fcm_dpo/q_t': 0.4144536852836609, 'fcm_dpo/delta': 0.020482124760746956, 'fcm_dpo/margin': 397.17266845703125, 'margin_dpo/margin_mean': 397.17266845703125, 'margin_dpo/margin_std': 568.3775634765625, 'logps/chosen': -785.7216796875, 'logps/rejected': -1216.5966796875, 'logps/ref_chosen': -51.62262725830078, 'logps/ref_rejected': -85.32499694824219, 'KL/chosen_KL_mean': -734.09912109375, 'KL/rejected_KL_mean': -1131.271728515625, 'KL/mean': -932.6853637695312, 'KL/std': 566.801513671875, 'logits/chosen': -1.0817201137542725, 'logits/rejected': -1.092029333114624, 'epoch': 0.84} + 84%|████████▍ | 573/681 [24:22<04:23, 2.44s/it] 84%|████████▍ | 574/681 [24:25<04:30, 2.52s/it] {'loss': 1.0865, 'grad_norm': 34.41420364379883, 'learning_rate': 3.74457160675965e-08, 'fcm_dpo/beta': 0.0009572736453264952, 'fcm_dpo/q_t': 0.40490391850471497, 'fcm_dpo/delta': -0.017637627199292183, 'fcm_dpo/margin': 435.4447021484375, 'margin_dpo/margin_mean': 435.4447021484375, 'margin_dpo/margin_std': 589.404296875, 'logps/chosen': -717.9173583984375, 'logps/rejected': -1195.1240234375, 'logps/ref_chosen': -51.04446029663086, 'logps/ref_rejected': -92.80640411376953, 'KL/chosen_KL_mean': -666.8729248046875, 'KL/rejected_KL_mean': -1102.317626953125, 'KL/mean': -884.59521484375, 'KL/std': 510.1865234375, 'logits/chosen': -1.0834131240844727, 'logits/rejected': -1.1097838878631592, 'epoch': 0.84} + 84%|████████▍ | 574/681 [24:25<04:30, 2.52s/it] 84%|████████▍ | 575/681 [24:28<04:33, 2.58s/it] {'loss': 1.1358, 'grad_norm': 41.16646957397461, 'learning_rate': 3.677293317363864e-08, 'fcm_dpo/beta': 0.000948374392464757, 'fcm_dpo/q_t': 0.4119390845298767, 'fcm_dpo/delta': 0.011579148471355438, 'fcm_dpo/margin': 408.832763671875, 'margin_dpo/margin_mean': 408.8327941894531, 'margin_dpo/margin_std': 670.8654174804688, 'logps/chosen': -819.1287841796875, 'logps/rejected': -1251.5576171875, 'logps/ref_chosen': -71.7901382446289, 'logps/ref_rejected': -95.38619995117188, 'KL/chosen_KL_mean': -747.3386840820312, 'KL/rejected_KL_mean': -1156.17138671875, 'KL/mean': -951.7550659179688, 'KL/std': 525.8989868164062, 'logits/chosen': -0.9548108577728271, 'logits/rejected': -0.960533618927002, 'epoch': 0.84} + 84%|████████▍ | 575/681 [24:28<04:33, 2.58s/it] 85%|████████▍ | 576/681 [24:30<04:24, 2.52s/it] {'loss': 1.1832, 'grad_norm': 33.871307373046875, 'learning_rate': 3.6105769011194224e-08, 'fcm_dpo/beta': 0.0009677187772467732, 'fcm_dpo/q_t': 0.4320542812347412, 'fcm_dpo/delta': 0.10300955176353455, 'fcm_dpo/margin': 310.19610595703125, 'margin_dpo/margin_mean': 310.1961364746094, 'margin_dpo/margin_std': 572.8515014648438, 'logps/chosen': -786.4808349609375, 'logps/rejected': -1143.168212890625, 'logps/ref_chosen': -54.262962341308594, 'logps/ref_rejected': -100.75428009033203, 'KL/chosen_KL_mean': -732.2178955078125, 'KL/rejected_KL_mean': -1042.4139404296875, 'KL/mean': -887.31591796875, 'KL/std': 495.1169128417969, 'logits/chosen': -1.1345970630645752, 'logits/rejected': -1.1627776622772217, 'epoch': 0.85} + 85%|████████▍ | 576/681 [24:30<04:24, 2.52s/it] 85%|████████▍ | 577/681 [24:33<04:19, 2.49s/it] {'loss': 1.1172, 'grad_norm': 26.741127014160156, 'learning_rate': 3.5444241160659304e-08, 'fcm_dpo/beta': 0.0009760315297171474, 'fcm_dpo/q_t': 0.4116186499595642, 'fcm_dpo/delta': 0.008000888861715794, 'fcm_dpo/margin': 401.9121398925781, 'margin_dpo/margin_mean': 401.912109375, 'margin_dpo/margin_std': 588.481201171875, 'logps/chosen': -706.4434814453125, 'logps/rejected': -1130.5166015625, 'logps/ref_chosen': -61.909706115722656, 'logps/ref_rejected': -84.07069396972656, 'KL/chosen_KL_mean': -644.5338134765625, 'KL/rejected_KL_mean': -1046.4459228515625, 'KL/mean': -845.4898681640625, 'KL/std': 548.8078002929688, 'logits/chosen': -1.0345063209533691, 'logits/rejected': -1.0221607685089111, 'epoch': 0.85} + 85%|████████▍ | 577/681 [24:33<04:19, 2.49s/it] 85%|████████▍ | 578/681 [24:35<04:20, 2.53s/it] {'loss': 1.0675, 'grad_norm': 35.5023078918457, 'learning_rate': 3.478836705390808e-08, 'fcm_dpo/beta': 0.0009675461915321648, 'fcm_dpo/q_t': 0.40107935667037964, 'fcm_dpo/delta': -0.033737167716026306, 'fcm_dpo/margin': 445.94696044921875, 'margin_dpo/margin_mean': 445.94696044921875, 'margin_dpo/margin_std': 543.1402587890625, 'logps/chosen': -690.57861328125, 'logps/rejected': -1170.6981201171875, 'logps/ref_chosen': -49.26368713378906, 'logps/ref_rejected': -83.4362564086914, 'KL/chosen_KL_mean': -641.31494140625, 'KL/rejected_KL_mean': -1087.2618408203125, 'KL/mean': -864.2884521484375, 'KL/std': 547.829833984375, 'logits/chosen': -0.9391261339187622, 'logits/rejected': -0.9697315692901611, 'epoch': 0.85} + 85%|████████▍ | 578/681 [24:35<04:20, 2.53s/it] 85%|████████▌ | 579/681 [24:38<04:25, 2.60s/it] {'loss': 1.2205, 'grad_norm': 53.29568099975586, 'learning_rate': 3.41381639738331e-08, 'fcm_dpo/beta': 0.000979724689386785, 'fcm_dpo/q_t': 0.4389345049858093, 'fcm_dpo/delta': 0.04294705390930176, 'fcm_dpo/margin': 268.92694091796875, 'margin_dpo/margin_mean': 268.92694091796875, 'margin_dpo/margin_std': 589.2688598632812, 'logps/chosen': -798.509765625, 'logps/rejected': -1103.33837890625, 'logps/ref_chosen': -58.88581848144531, 'logps/ref_rejected': -94.78762817382812, 'KL/chosen_KL_mean': -739.6239013671875, 'KL/rejected_KL_mean': -1008.5507202148438, 'KL/mean': -874.08740234375, 'KL/std': 546.8580932617188, 'logits/chosen': -0.9918534755706787, 'logits/rejected': -0.9905188083648682, 'epoch': 0.85} + 85%|████████▌ | 579/681 [24:38<04:25, 2.60s/it] 85%|████████▌ | 580/681 [24:41<04:23, 2.61s/it] {'loss': 1.0525, 'grad_norm': 30.890201568603516, 'learning_rate': 3.349364905389032e-08, 'fcm_dpo/beta': 0.0009632025612518191, 'fcm_dpo/q_t': 0.3941301107406616, 'fcm_dpo/delta': -0.08945266157388687, 'fcm_dpo/margin': 503.046875, 'margin_dpo/margin_mean': 503.046875, 'margin_dpo/margin_std': 688.0292358398438, 'logps/chosen': -588.8133544921875, 'logps/rejected': -1124.911865234375, 'logps/ref_chosen': -48.70683670043945, 'logps/ref_rejected': -81.7583999633789, 'KL/chosen_KL_mean': -540.1065673828125, 'KL/rejected_KL_mean': -1043.1533203125, 'KL/mean': -791.6300048828125, 'KL/std': 603.1106567382812, 'logits/chosen': -0.8625004291534424, 'logits/rejected': -0.8941880464553833, 'epoch': 0.85} + 85%|████████▌ | 580/681 [24:41<04:23, 2.61s/it] 85%|████████▌ | 581/681 [24:43<04:16, 2.57s/it] {'loss': 1.1562, 'grad_norm': 40.021026611328125, 'learning_rate': 3.285483927764726e-08, 'fcm_dpo/beta': 0.0009719936642795801, 'fcm_dpo/q_t': 0.42395222187042236, 'fcm_dpo/delta': 0.05880071595311165, 'fcm_dpo/margin': 353.0070495605469, 'margin_dpo/margin_mean': 353.0070495605469, 'margin_dpo/margin_std': 629.962158203125, 'logps/chosen': -791.2696533203125, 'logps/rejected': -1173.7901611328125, 'logps/ref_chosen': -62.22235107421875, 'logps/ref_rejected': -91.73568725585938, 'KL/chosen_KL_mean': -729.04736328125, 'KL/rejected_KL_mean': -1082.054443359375, 'KL/mean': -905.5509033203125, 'KL/std': 574.6087036132812, 'logits/chosen': -1.0945156812667847, 'logits/rejected': -1.103161334991455, 'epoch': 0.85} + 85%|████████▌ | 581/681 [24:43<04:16, 2.57s/it] 85%|████████▌ | 582/681 [24:46<04:15, 2.58s/it] {'loss': 1.1133, 'grad_norm': 30.24727439880371, 'learning_rate': 3.222175147833556e-08, 'fcm_dpo/beta': 0.0009659301722422242, 'fcm_dpo/q_t': 0.4117897152900696, 'fcm_dpo/delta': -0.08082351088523865, 'fcm_dpo/margin': 392.1502990722656, 'margin_dpo/margin_mean': 392.1502990722656, 'margin_dpo/margin_std': 531.6431884765625, 'logps/chosen': -697.55078125, 'logps/rejected': -1141.5419921875, 'logps/ref_chosen': -58.228660583496094, 'logps/ref_rejected': -110.06959533691406, 'KL/chosen_KL_mean': -639.3220825195312, 'KL/rejected_KL_mean': -1031.472412109375, 'KL/mean': -835.397216796875, 'KL/std': 496.10870361328125, 'logits/chosen': -1.0156798362731934, 'logits/rejected': -1.039165735244751, 'epoch': 0.85} + 85%|████████▌ | 582/681 [24:46<04:15, 2.58s/it] 86%|████████▌ | 583/681 [24:48<04:15, 2.61s/it] {'loss': 1.2647, 'grad_norm': 62.559593200683594, 'learning_rate': 3.159440233840763e-08, 'fcm_dpo/beta': 0.0009584878571331501, 'fcm_dpo/q_t': 0.44876495003700256, 'fcm_dpo/delta': 0.0019017525482922792, 'fcm_dpo/margin': 238.00784301757812, 'margin_dpo/margin_mean': 238.00784301757812, 'margin_dpo/margin_std': 635.57080078125, 'logps/chosen': -785.408447265625, 'logps/rejected': -1054.957275390625, 'logps/ref_chosen': -56.86286163330078, 'logps/ref_rejected': -88.4039306640625, 'KL/chosen_KL_mean': -728.5455322265625, 'KL/rejected_KL_mean': -966.553466796875, 'KL/mean': -847.5494384765625, 'KL/std': 537.0501098632812, 'logits/chosen': -0.9803950190544128, 'logits/rejected': -0.9772003293037415, 'epoch': 0.86} + 86%|████████▌ | 583/681 [24:48<04:15, 2.61s/it] 86%|████████▌ | 584/681 [24:51<04:07, 2.55s/it] {'loss': 1.0459, 'grad_norm': 34.74457931518555, 'learning_rate': 3.0972808389096635e-08, 'fcm_dpo/beta': 0.0009496349957771599, 'fcm_dpo/q_t': 0.39483463764190674, 'fcm_dpo/delta': -0.0657280907034874, 'fcm_dpo/margin': 487.2535400390625, 'margin_dpo/margin_mean': 487.2535400390625, 'margin_dpo/margin_std': 577.2208862304688, 'logps/chosen': -682.844970703125, 'logps/rejected': -1210.833984375, 'logps/ref_chosen': -56.90068054199219, 'logps/ref_rejected': -97.63606262207031, 'KL/chosen_KL_mean': -625.9443359375, 'KL/rejected_KL_mean': -1113.19775390625, 'KL/mean': -869.571044921875, 'KL/std': 543.7821044921875, 'logits/chosen': -1.0310046672821045, 'logits/rejected': -1.0454175472259521, 'epoch': 0.86} + 86%|████████▌ | 584/681 [24:51<04:07, 2.55s/it] 86%|████████▌ | 585/681 [24:53<04:03, 2.54s/it] {'loss': 1.1158, 'grad_norm': 30.642621994018555, 'learning_rate': 3.035698600998121e-08, 'fcm_dpo/beta': 0.0009398453403264284, 'fcm_dpo/q_t': 0.4046263098716736, 'fcm_dpo/delta': -0.02655157260596752, 'fcm_dpo/margin': 452.44818115234375, 'margin_dpo/margin_mean': 452.44818115234375, 'margin_dpo/margin_std': 744.0001220703125, 'logps/chosen': -783.9991455078125, 'logps/rejected': -1259.6429443359375, 'logps/ref_chosen': -60.973968505859375, 'logps/ref_rejected': -84.16952514648438, 'KL/chosen_KL_mean': -723.0252075195312, 'KL/rejected_KL_mean': -1175.473388671875, 'KL/mean': -949.249267578125, 'KL/std': 627.5142822265625, 'logits/chosen': -1.0402522087097168, 'logits/rejected': -1.065436840057373, 'epoch': 0.86} + 86%|████████▌ | 585/681 [24:53<04:03, 2.54s/it] 86%|████████▌ | 586/681 [24:56<04:00, 2.53s/it] {'loss': 1.1994, 'grad_norm': 30.652240753173828, 'learning_rate': 2.974695142855388e-08, 'fcm_dpo/beta': 0.0009562649065628648, 'fcm_dpo/q_t': 0.4345587491989136, 'fcm_dpo/delta': 0.1183374673128128, 'fcm_dpo/margin': 298.2107238769531, 'margin_dpo/margin_mean': 298.2107238769531, 'margin_dpo/margin_std': 598.6849365234375, 'logps/chosen': -815.96337890625, 'logps/rejected': -1149.12109375, 'logps/ref_chosen': -56.85559844970703, 'logps/ref_rejected': -91.80261993408203, 'KL/chosen_KL_mean': -759.1077880859375, 'KL/rejected_KL_mean': -1057.318359375, 'KL/mean': -908.213134765625, 'KL/std': 538.911865234375, 'logits/chosen': -1.0284502506256104, 'logits/rejected': -1.0488755702972412, 'epoch': 0.86} + 86%|████████▌ | 586/681 [24:56<04:00, 2.53s/it] 86%|████████▌ | 587/681 [24:58<04:00, 2.56s/it] {'loss': 1.0913, 'grad_norm': 45.825714111328125, 'learning_rate': 2.9142720719793122e-08, 'fcm_dpo/beta': 0.0009647482074797153, 'fcm_dpo/q_t': 0.40699630975723267, 'fcm_dpo/delta': -0.006192212924361229, 'fcm_dpo/margin': 420.68768310546875, 'margin_dpo/margin_mean': 420.68768310546875, 'margin_dpo/margin_std': 569.4257202148438, 'logps/chosen': -572.8347778320312, 'logps/rejected': -1031.4547119140625, 'logps/ref_chosen': -44.69159698486328, 'logps/ref_rejected': -82.62385559082031, 'KL/chosen_KL_mean': -528.1431884765625, 'KL/rejected_KL_mean': -948.830810546875, 'KL/mean': -738.4869995117188, 'KL/std': 569.4769897460938, 'logits/chosen': -1.0657103061676025, 'logits/rejected': -1.09328293800354, 'epoch': 0.86} + 86%|████████▌ | 587/681 [24:58<04:00, 2.56s/it] 86%|████████▋ | 588/681 [25:01<03:51, 2.49s/it] {'loss': 1.1351, 'grad_norm': 26.90322494506836, 'learning_rate': 2.8544309805740018e-08, 'fcm_dpo/beta': 0.0009668685379438102, 'fcm_dpo/q_t': 0.42055660486221313, 'fcm_dpo/delta': 0.05999944359064102, 'fcm_dpo/margin': 353.66156005859375, 'margin_dpo/margin_mean': 353.66156005859375, 'margin_dpo/margin_std': 518.2518310546875, 'logps/chosen': -737.3505249023438, 'logps/rejected': -1148.087158203125, 'logps/ref_chosen': -50.29494857788086, 'logps/ref_rejected': -107.36988067626953, 'KL/chosen_KL_mean': -687.0555419921875, 'KL/rejected_KL_mean': -1040.7171630859375, 'KL/mean': -863.8863525390625, 'KL/std': 487.860107421875, 'logits/chosen': -1.039747714996338, 'logits/rejected': -1.066466212272644, 'epoch': 0.86} + 86%|████████▋ | 588/681 [25:01<03:51, 2.49s/it] 86%|████████▋ | 589/681 [25:03<03:49, 2.49s/it] {'loss': 1.058, 'grad_norm': 27.10498046875, 'learning_rate': 2.7951734455078786e-08, 'fcm_dpo/beta': 0.0009669238934293389, 'fcm_dpo/q_t': 0.39669230580329895, 'fcm_dpo/delta': -0.055137749761343, 'fcm_dpo/margin': 468.15966796875, 'margin_dpo/margin_mean': 468.1596984863281, 'margin_dpo/margin_std': 587.922607421875, 'logps/chosen': -757.5223999023438, 'logps/rejected': -1277.407470703125, 'logps/ref_chosen': -59.929908752441406, 'logps/ref_rejected': -111.65534973144531, 'KL/chosen_KL_mean': -697.592529296875, 'KL/rejected_KL_mean': -1165.752197265625, 'KL/mean': -931.67236328125, 'KL/std': 557.1414184570312, 'logits/chosen': -1.0181684494018555, 'logits/rejected': -1.0344040393829346, 'epoch': 0.86} + 86%|████████▋ | 589/681 [25:03<03:49, 2.49s/it] 87%|████████▋ | 590/681 [25:06<03:44, 2.46s/it] {'loss': 1.0661, 'grad_norm': 27.430288314819336, 'learning_rate': 2.736501028272095e-08, 'fcm_dpo/beta': 0.0009563218918628991, 'fcm_dpo/q_t': 0.40053310990333557, 'fcm_dpo/delta': -0.039607785642147064, 'fcm_dpo/margin': 457.78570556640625, 'margin_dpo/margin_mean': 457.78570556640625, 'margin_dpo/margin_std': 581.5162353515625, 'logps/chosen': -653.663818359375, 'logps/rejected': -1161.7025146484375, 'logps/ref_chosen': -55.80979537963867, 'logps/ref_rejected': -106.06282043457031, 'KL/chosen_KL_mean': -597.85400390625, 'KL/rejected_KL_mean': -1055.6396484375, 'KL/mean': -826.746826171875, 'KL/std': 536.9008178710938, 'logits/chosen': -0.9699843525886536, 'logits/rejected': -0.9988424777984619, 'epoch': 0.87} + 87%|████████▋ | 590/681 [25:06<03:44, 2.46s/it] 87%|████████▋ | 591/681 [25:08<03:33, 2.37s/it] {'loss': 1.0991, 'grad_norm': 31.656789779663086, 'learning_rate': 2.678415274939408e-08, 'fcm_dpo/beta': 0.0009555625729262829, 'fcm_dpo/q_t': 0.4076194763183594, 'fcm_dpo/delta': 0.0009453542297706008, 'fcm_dpo/margin': 417.65093994140625, 'margin_dpo/margin_mean': 417.65093994140625, 'margin_dpo/margin_std': 582.1824951171875, 'logps/chosen': -716.8046875, 'logps/rejected': -1162.0013427734375, 'logps/ref_chosen': -56.24061965942383, 'logps/ref_rejected': -83.78629302978516, 'KL/chosen_KL_mean': -660.5640869140625, 'KL/rejected_KL_mean': -1078.215087890625, 'KL/mean': -869.3895874023438, 'KL/std': 518.4139404296875, 'logits/chosen': -1.0377655029296875, 'logits/rejected': -1.0326879024505615, 'epoch': 0.87} + 87%|████████▋ | 591/681 [25:08<03:33, 2.37s/it] 87%|████████▋ | 592/681 [25:10<03:37, 2.44s/it] {'loss': 1.1662, 'grad_norm': 32.95262908935547, 'learning_rate': 2.6209177161234442e-08, 'fcm_dpo/beta': 0.0009605808882042766, 'fcm_dpo/q_t': 0.41771793365478516, 'fcm_dpo/delta': 0.03677193447947502, 'fcm_dpo/margin': 379.5429992675781, 'margin_dpo/margin_mean': 379.54296875, 'margin_dpo/margin_std': 707.2916870117188, 'logps/chosen': -739.423095703125, 'logps/rejected': -1146.7587890625, 'logps/ref_chosen': -47.94025421142578, 'logps/ref_rejected': -75.73287963867188, 'KL/chosen_KL_mean': -691.4828491210938, 'KL/rejected_KL_mean': -1071.02587890625, 'KL/mean': -881.25439453125, 'KL/std': 527.1260375976562, 'logits/chosen': -1.0429775714874268, 'logits/rejected': -1.0484647750854492, 'epoch': 0.87} + 87%|████████▋ | 592/681 [25:10<03:37, 2.44s/it] 87%|████████▋ | 593/681 [25:13<03:38, 2.49s/it] {'loss': 1.2117, 'grad_norm': 49.979095458984375, 'learning_rate': 2.564009866938349e-08, 'fcm_dpo/beta': 0.0009675570763647556, 'fcm_dpo/q_t': 0.4363827705383301, 'fcm_dpo/delta': 0.024251248687505722, 'fcm_dpo/margin': 292.6455383300781, 'margin_dpo/margin_mean': 292.6455383300781, 'margin_dpo/margin_std': 629.4410400390625, 'logps/chosen': -701.6932373046875, 'logps/rejected': -1006.555908203125, 'logps/ref_chosen': -48.690757751464844, 'logps/ref_rejected': -60.90800094604492, 'KL/chosen_KL_mean': -653.00244140625, 'KL/rejected_KL_mean': -945.64794921875, 'KL/mean': -799.3251953125, 'KL/std': 570.3946533203125, 'logits/chosen': -0.8947024345397949, 'logits/rejected': -0.8848444819450378, 'epoch': 0.87} + 87%|████████▋ | 593/681 [25:13<03:38, 2.49s/it] 87%|████████▋ | 594/681 [25:15<03:36, 2.49s/it] {'loss': 1.1434, 'grad_norm': 37.234134674072266, 'learning_rate': 2.5076932269588708e-08, 'fcm_dpo/beta': 0.0009798401733860373, 'fcm_dpo/q_t': 0.41849082708358765, 'fcm_dpo/delta': 0.03994458168745041, 'fcm_dpo/margin': 367.78497314453125, 'margin_dpo/margin_mean': 367.7850036621094, 'margin_dpo/margin_std': 599.2181396484375, 'logps/chosen': -700.6382446289062, 'logps/rejected': -1099.5880126953125, 'logps/ref_chosen': -54.93488693237305, 'logps/ref_rejected': -86.09967803955078, 'KL/chosen_KL_mean': -645.703369140625, 'KL/rejected_KL_mean': -1013.4883422851562, 'KL/mean': -829.5958251953125, 'KL/std': 561.0531616210938, 'logits/chosen': -1.017820954322815, 'logits/rejected': -1.0110870599746704, 'epoch': 0.87} + 87%|████████▋ | 594/681 [25:15<03:36, 2.49s/it] 87%|████████▋ | 595/681 [25:18<03:36, 2.52s/it] {'loss': 1.1, 'grad_norm': 41.969970703125, 'learning_rate': 2.451969280180849e-08, 'fcm_dpo/beta': 0.0009743094560690224, 'fcm_dpo/q_t': 0.41215771436691284, 'fcm_dpo/delta': 0.006154121831059456, 'fcm_dpo/margin': 404.4147033691406, 'margin_dpo/margin_mean': 404.41473388671875, 'margin_dpo/margin_std': 568.5491333007812, 'logps/chosen': -640.972412109375, 'logps/rejected': -1076.593994140625, 'logps/ref_chosen': -49.4204216003418, 'logps/ref_rejected': -80.62731170654297, 'KL/chosen_KL_mean': -591.552001953125, 'KL/rejected_KL_mean': -995.966796875, 'KL/mean': -793.7593994140625, 'KL/std': 504.74029541015625, 'logits/chosen': -1.0084481239318848, 'logits/rejected': -1.0286178588867188, 'epoch': 0.87} + 87%|████████▋ | 595/681 [25:18<03:36, 2.52s/it] 88%|████████▊ | 596/681 [25:21<03:36, 2.55s/it] {'loss': 1.1997, 'grad_norm': 55.20982360839844, 'learning_rate': 2.396839494982103e-08, 'fcm_dpo/beta': 0.000993602559901774, 'fcm_dpo/q_t': 0.43519163131713867, 'fcm_dpo/delta': 0.10849238932132721, 'fcm_dpo/margin': 296.4541015625, 'margin_dpo/margin_mean': 296.4541015625, 'margin_dpo/margin_std': 616.732421875, 'logps/chosen': -743.546630859375, 'logps/rejected': -1060.300048828125, 'logps/ref_chosen': -59.791683197021484, 'logps/ref_rejected': -80.09111785888672, 'KL/chosen_KL_mean': -683.7548828125, 'KL/rejected_KL_mean': -980.2089233398438, 'KL/mean': -831.98193359375, 'KL/std': 516.1655883789062, 'logits/chosen': -0.9886128306388855, 'logits/rejected': -0.9605743885040283, 'epoch': 0.88} + 88%|████████▊ | 596/681 [25:21<03:36, 2.55s/it] 88%|████████▊ | 597/681 [25:23<03:35, 2.56s/it] {'loss': 1.0574, 'grad_norm': 28.543447494506836, 'learning_rate': 2.3423053240837514e-08, 'fcm_dpo/beta': 0.0009732224280014634, 'fcm_dpo/q_t': 0.3910676836967468, 'fcm_dpo/delta': -0.10066782683134079, 'fcm_dpo/margin': 506.2997131347656, 'margin_dpo/margin_mean': 506.2997131347656, 'margin_dpo/margin_std': 674.1318359375, 'logps/chosen': -720.4802856445312, 'logps/rejected': -1270.212890625, 'logps/ref_chosen': -57.26078796386719, 'logps/ref_rejected': -100.6937255859375, 'KL/chosen_KL_mean': -663.219482421875, 'KL/rejected_KL_mean': -1169.5191650390625, 'KL/mean': -916.369384765625, 'KL/std': 611.7984619140625, 'logits/chosen': -0.9715480208396912, 'logits/rejected': -1.0194578170776367, 'epoch': 0.88} + 88%|████████▊ | 597/681 [25:23<03:35, 2.56s/it] 88%|████████▊ | 598/681 [25:26<03:27, 2.50s/it] {'loss': 1.1157, 'grad_norm': 44.60902404785156, 'learning_rate': 2.2883682045119062e-08, 'fcm_dpo/beta': 0.0009721480309963226, 'fcm_dpo/q_t': 0.4108089506626129, 'fcm_dpo/delta': 0.012626536190509796, 'fcm_dpo/margin': 397.7920227050781, 'margin_dpo/margin_mean': 397.79205322265625, 'margin_dpo/margin_std': 574.5714721679688, 'logps/chosen': -713.7061767578125, 'logps/rejected': -1148.423583984375, 'logps/ref_chosen': -52.51850509643555, 'logps/ref_rejected': -89.44385528564453, 'KL/chosen_KL_mean': -661.1876831054688, 'KL/rejected_KL_mean': -1058.979736328125, 'KL/mean': -860.0836791992188, 'KL/std': 523.8168334960938, 'logits/chosen': -0.9934415817260742, 'logits/rejected': -1.0047008991241455, 'epoch': 0.88} + 88%|████████▊ | 598/681 [25:26<03:27, 2.50s/it] 88%|████████▊ | 599/681 [25:28<03:30, 2.56s/it] {'loss': 1.1461, 'grad_norm': 29.521316528320312, 'learning_rate': 2.2350295575598367e-08, 'fcm_dpo/beta': 0.0009692448657006025, 'fcm_dpo/q_t': 0.42119812965393066, 'fcm_dpo/delta': -0.05783551558852196, 'fcm_dpo/margin': 336.578857421875, 'margin_dpo/margin_mean': 336.578857421875, 'margin_dpo/margin_std': 485.81683349609375, 'logps/chosen': -730.460693359375, 'logps/rejected': -1100.2154541015625, 'logps/ref_chosen': -49.802677154541016, 'logps/ref_rejected': -82.978515625, 'KL/chosen_KL_mean': -680.6580810546875, 'KL/rejected_KL_mean': -1017.2368774414062, 'KL/mean': -848.9474487304688, 'KL/std': 514.0728149414062, 'logits/chosen': -0.9816111326217651, 'logits/rejected': -0.9900449514389038, 'epoch': 0.88} + 88%|████████▊ | 599/681 [25:28<03:30, 2.56s/it] 88%|████████▊ | 600/681 [25:31<03:33, 2.63s/it] {'loss': 1.1791, 'grad_norm': 27.683170318603516, 'learning_rate': 2.1822907887504932e-08, 'fcm_dpo/beta': 0.0009796018712222576, 'fcm_dpo/q_t': 0.42857182025909424, 'fcm_dpo/delta': 0.08520510792732239, 'fcm_dpo/margin': 324.20037841796875, 'margin_dpo/margin_mean': 324.20037841796875, 'margin_dpo/margin_std': 615.3521118164062, 'logps/chosen': -805.2385864257812, 'logps/rejected': -1148.4605712890625, 'logps/ref_chosen': -66.43487548828125, 'logps/ref_rejected': -85.45649719238281, 'KL/chosen_KL_mean': -738.8037109375, 'KL/rejected_KL_mean': -1063.004150390625, 'KL/mean': -900.9039306640625, 'KL/std': 517.9166870117188, 'logits/chosen': -1.0804599523544312, 'logits/rejected': -1.078963279724121, 'epoch': 0.88} + 88%|████████▊ | 600/681 [25:31<03:33, 2.63s/it] 88%|████████▊ | 601/681 [25:33<03:23, 2.55s/it] {'loss': 1.0895, 'grad_norm': 35.56853103637695, 'learning_rate': 2.1301532877994742e-08, 'fcm_dpo/beta': 0.0009836689569056034, 'fcm_dpo/q_t': 0.4059777557849884, 'fcm_dpo/delta': -0.004790919832885265, 'fcm_dpo/margin': 411.2587890625, 'margin_dpo/margin_mean': 411.2587890625, 'margin_dpo/margin_std': 544.517822265625, 'logps/chosen': -793.01171875, 'logps/rejected': -1239.8277587890625, 'logps/ref_chosen': -59.13361358642578, 'logps/ref_rejected': -94.69093322753906, 'KL/chosen_KL_mean': -733.8780517578125, 'KL/rejected_KL_mean': -1145.13671875, 'KL/mean': -939.5074462890625, 'KL/std': 553.6764526367188, 'logits/chosen': -0.9942201972007751, 'logits/rejected': -1.0163451433181763, 'epoch': 0.88} + 88%|████████▊ | 601/681 [25:33<03:23, 2.55s/it] 88%|████████▊ | 602/681 [25:36<03:20, 2.54s/it] {'loss': 1.0188, 'grad_norm': 64.79503631591797, 'learning_rate': 2.0786184285784298e-08, 'fcm_dpo/beta': 0.0009767541196197271, 'fcm_dpo/q_t': 0.3871699869632721, 'fcm_dpo/delta': -0.09598802030086517, 'fcm_dpo/margin': 502.8282775878906, 'margin_dpo/margin_mean': 502.8282775878906, 'margin_dpo/margin_std': 533.3328857421875, 'logps/chosen': -557.080078125, 'logps/rejected': -1098.9833984375, 'logps/ref_chosen': -48.59352111816406, 'logps/ref_rejected': -87.6685562133789, 'KL/chosen_KL_mean': -508.48651123046875, 'KL/rejected_KL_mean': -1011.3148803710938, 'KL/mean': -759.9006958007812, 'KL/std': 511.07110595703125, 'logits/chosen': -1.037233591079712, 'logits/rejected': -1.0741159915924072, 'epoch': 0.88} + 88%|████████▊ | 602/681 [25:36<03:20, 2.54s/it] 89%|████████▊ | 603/681 [25:38<03:15, 2.50s/it] {'loss': 1.0902, 'grad_norm': 37.1284065246582, 'learning_rate': 2.0276875690788204e-08, 'fcm_dpo/beta': 0.0009601364727132022, 'fcm_dpo/q_t': 0.4039689302444458, 'fcm_dpo/delta': -0.03442168980836868, 'fcm_dpo/margin': 450.83575439453125, 'margin_dpo/margin_mean': 450.83575439453125, 'margin_dpo/margin_std': 660.8984375, 'logps/chosen': -712.3211059570312, 'logps/rejected': -1193.06787109375, 'logps/ref_chosen': -70.41461944580078, 'logps/ref_rejected': -100.32559967041016, 'KL/chosen_KL_mean': -641.906494140625, 'KL/rejected_KL_mean': -1092.7423095703125, 'KL/mean': -867.3243408203125, 'KL/std': 565.2948608398438, 'logits/chosen': -1.0195807218551636, 'logits/rejected': -1.0123958587646484, 'epoch': 0.89} + 89%|████████▊ | 603/681 [25:38<03:15, 2.50s/it] 89%|████████▊ | 604/681 [25:41<03:19, 2.60s/it] {'loss': 1.0613, 'grad_norm': 41.052913665771484, 'learning_rate': 1.977362051376158e-08, 'fcm_dpo/beta': 0.0009481116430833936, 'fcm_dpo/q_t': 0.39480096101760864, 'fcm_dpo/delta': -0.07910436391830444, 'fcm_dpo/margin': 501.3348693847656, 'margin_dpo/margin_mean': 501.33489990234375, 'margin_dpo/margin_std': 684.626708984375, 'logps/chosen': -680.0690307617188, 'logps/rejected': -1226.80029296875, 'logps/ref_chosen': -46.45808029174805, 'logps/ref_rejected': -91.8544921875, 'KL/chosen_KL_mean': -633.6109619140625, 'KL/rejected_KL_mean': -1134.94580078125, 'KL/mean': -884.2783813476562, 'KL/std': 557.851318359375, 'logits/chosen': -1.0104937553405762, 'logits/rejected': -1.0476266145706177, 'epoch': 0.89} + 89%|████████▊ | 604/681 [25:41<03:19, 2.60s/it] 89%|████████▉ | 605/681 [25:44<03:18, 2.61s/it] {'loss': 1.1459, 'grad_norm': 32.279541015625, 'learning_rate': 1.9276432015946446e-08, 'fcm_dpo/beta': 0.0009504579938948154, 'fcm_dpo/q_t': 0.4232047498226166, 'fcm_dpo/delta': 0.05476874113082886, 'fcm_dpo/margin': 365.2569580078125, 'margin_dpo/margin_mean': 365.2569885253906, 'margin_dpo/margin_std': 619.3516845703125, 'logps/chosen': -720.2533569335938, 'logps/rejected': -1121.56591796875, 'logps/ref_chosen': -66.24933624267578, 'logps/ref_rejected': -102.30496978759766, 'KL/chosen_KL_mean': -654.0040283203125, 'KL/rejected_KL_mean': -1019.260986328125, 'KL/mean': -836.632568359375, 'KL/std': 520.555908203125, 'logits/chosen': -0.9521446228027344, 'logits/rejected': -0.9669671654701233, 'epoch': 0.89} + 89%|████████▉ | 605/681 [25:44<03:18, 2.61s/it] 89%|████████▉ | 606/681 [25:46<03:11, 2.55s/it] {'loss': 1.105, 'grad_norm': 27.56992530822754, 'learning_rate': 1.8785323298722093e-08, 'fcm_dpo/beta': 0.0009606323437765241, 'fcm_dpo/q_t': 0.41221147775650024, 'fcm_dpo/delta': 0.014511629939079285, 'fcm_dpo/margin': 401.52313232421875, 'margin_dpo/margin_mean': 401.52313232421875, 'margin_dpo/margin_std': 560.071533203125, 'logps/chosen': -719.1483764648438, 'logps/rejected': -1164.223876953125, 'logps/ref_chosen': -54.819122314453125, 'logps/ref_rejected': -98.37146759033203, 'KL/chosen_KL_mean': -664.3292236328125, 'KL/rejected_KL_mean': -1065.852294921875, 'KL/mean': -865.0908203125, 'KL/std': 528.2071533203125, 'logits/chosen': -0.9551470875740051, 'logits/rejected': -0.9689816236495972, 'epoch': 0.89} + 89%|████████▉ | 606/681 [25:46<03:11, 2.55s/it] 89%|████████▉ | 607/681 [25:49<03:10, 2.58s/it] {'loss': 1.1528, 'grad_norm': 26.594741821289062, 'learning_rate': 1.8300307303259904e-08, 'fcm_dpo/beta': 0.0009705802076496184, 'fcm_dpo/q_t': 0.42473822832107544, 'fcm_dpo/delta': 0.0699785053730011, 'fcm_dpo/margin': 342.0454406738281, 'margin_dpo/margin_mean': 342.0454406738281, 'margin_dpo/margin_std': 565.7429809570312, 'logps/chosen': -743.32958984375, 'logps/rejected': -1107.068115234375, 'logps/ref_chosen': -58.08403778076172, 'logps/ref_rejected': -79.777099609375, 'KL/chosen_KL_mean': -685.24560546875, 'KL/rejected_KL_mean': -1027.291015625, 'KL/mean': -856.268310546875, 'KL/std': 536.4832763671875, 'logits/chosen': -0.9825999140739441, 'logits/rejected': -0.9769987463951111, 'epoch': 0.89} + 89%|████████▉ | 607/681 [25:49<03:10, 2.58s/it] 89%|████████▉ | 608/681 [25:51<03:04, 2.52s/it] {'loss': 1.0893, 'grad_norm': 30.251602172851562, 'learning_rate': 1.7821396810182437e-08, 'fcm_dpo/beta': 0.0009749716846272349, 'fcm_dpo/q_t': 0.4095849096775055, 'fcm_dpo/delta': 0.009674161672592163, 'fcm_dpo/margin': 400.5929260253906, 'margin_dpo/margin_mean': 400.5929260253906, 'margin_dpo/margin_std': 500.50164794921875, 'logps/chosen': -671.3092041015625, 'logps/rejected': -1109.224609375, 'logps/ref_chosen': -57.450836181640625, 'logps/ref_rejected': -94.77339172363281, 'KL/chosen_KL_mean': -613.8583374023438, 'KL/rejected_KL_mean': -1014.4512939453125, 'KL/mean': -814.1548461914062, 'KL/std': 486.152099609375, 'logits/chosen': -1.0167486667633057, 'logits/rejected': -1.030979871749878, 'epoch': 0.89} + 89%|████████▉ | 608/681 [25:51<03:04, 2.52s/it] 89%|████████▉ | 609/681 [25:54<02:57, 2.46s/it] {'loss': 1.0712, 'grad_norm': 33.4229736328125, 'learning_rate': 1.7348604439226617e-08, 'fcm_dpo/beta': 0.0009604240767657757, 'fcm_dpo/q_t': 0.3968961536884308, 'fcm_dpo/delta': -0.09091140329837799, 'fcm_dpo/margin': 506.5679016113281, 'margin_dpo/margin_mean': 506.56793212890625, 'margin_dpo/margin_std': 784.350830078125, 'logps/chosen': -698.8172607421875, 'logps/rejected': -1235.3958740234375, 'logps/ref_chosen': -58.805355072021484, 'logps/ref_rejected': -88.81600952148438, 'KL/chosen_KL_mean': -640.0119018554688, 'KL/rejected_KL_mean': -1146.579833984375, 'KL/mean': -893.2958984375, 'KL/std': 658.148193359375, 'logits/chosen': -1.0928289890289307, 'logits/rejected': -1.119450569152832, 'epoch': 0.89} + 89%|████████▉ | 609/681 [25:54<02:57, 2.46s/it] 90%|████████▉ | 610/681 [25:56<02:52, 2.43s/it] {'loss': 1.1736, 'grad_norm': 41.30014419555664, 'learning_rate': 1.6881942648911074e-08, 'fcm_dpo/beta': 0.0009702660609036684, 'fcm_dpo/q_t': 0.42758795619010925, 'fcm_dpo/delta': 0.09187015891075134, 'fcm_dpo/margin': 320.3234558105469, 'margin_dpo/margin_mean': 320.3234558105469, 'margin_dpo/margin_std': 581.2178955078125, 'logps/chosen': -692.6092529296875, 'logps/rejected': -1030.64306640625, 'logps/ref_chosen': -65.69503784179688, 'logps/ref_rejected': -83.40538787841797, 'KL/chosen_KL_mean': -626.9141845703125, 'KL/rejected_KL_mean': -947.2376708984375, 'KL/mean': -787.075927734375, 'KL/std': 506.7711486816406, 'logits/chosen': -0.9679499864578247, 'logits/rejected': -0.9463798999786377, 'epoch': 0.9} + 90%|████████▉ | 610/681 [25:56<02:52, 2.43s/it] 90%|████████▉ | 611/681 [25:58<02:46, 2.38s/it] {'loss': 1.0465, 'grad_norm': 30.76310920715332, 'learning_rate': 1.6421423736208e-08, 'fcm_dpo/beta': 0.0009553628042340279, 'fcm_dpo/q_t': 0.3883088231086731, 'fcm_dpo/delta': -0.12215965986251831, 'fcm_dpo/margin': 539.90380859375, 'margin_dpo/margin_mean': 539.90380859375, 'margin_dpo/margin_std': 748.833251953125, 'logps/chosen': -722.7176513671875, 'logps/rejected': -1296.35302734375, 'logps/ref_chosen': -52.59946823120117, 'logps/ref_rejected': -86.33099365234375, 'KL/chosen_KL_mean': -670.1181640625, 'KL/rejected_KL_mean': -1210.02197265625, 'KL/mean': -940.070068359375, 'KL/std': 670.6271362304688, 'logits/chosen': -1.0270860195159912, 'logits/rejected': -1.0751309394836426, 'epoch': 0.9} + 90%|████████▉ | 611/681 [25:58<02:46, 2.38s/it] 90%|████████▉ | 612/681 [26:01<02:44, 2.39s/it] {'loss': 1.0998, 'grad_norm': 27.917463302612305, 'learning_rate': 1.5967059836219042e-08, 'fcm_dpo/beta': 0.0009539818856865168, 'fcm_dpo/q_t': 0.410278856754303, 'fcm_dpo/delta': 0.010405594483017921, 'fcm_dpo/margin': 408.61627197265625, 'margin_dpo/margin_mean': 408.61627197265625, 'margin_dpo/margin_std': 555.7114868164062, 'logps/chosen': -759.0435791015625, 'logps/rejected': -1196.6485595703125, 'logps/ref_chosen': -59.32372283935547, 'logps/ref_rejected': -88.31239318847656, 'KL/chosen_KL_mean': -699.7198486328125, 'KL/rejected_KL_mean': -1108.336181640625, 'KL/mean': -904.0280151367188, 'KL/std': 525.3094482421875, 'logits/chosen': -1.0200650691986084, 'logits/rejected': -1.0243524312973022, 'epoch': 0.9} + 90%|████████▉ | 612/681 [26:01<02:44, 2.39s/it] 90%|█████████ | 613/681 [26:03<02:45, 2.43s/it] {'loss': 1.0397, 'grad_norm': 32.273929595947266, 'learning_rate': 1.551886292185553e-08, 'fcm_dpo/beta': 0.000938057666644454, 'fcm_dpo/q_t': 0.39130979776382446, 'fcm_dpo/delta': -0.07989558577537537, 'fcm_dpo/margin': 507.1754150390625, 'margin_dpo/margin_mean': 507.1754150390625, 'margin_dpo/margin_std': 607.134033203125, 'logps/chosen': -688.6175537109375, 'logps/rejected': -1241.1705322265625, 'logps/ref_chosen': -59.72996520996094, 'logps/ref_rejected': -105.10752868652344, 'KL/chosen_KL_mean': -628.8876342773438, 'KL/rejected_KL_mean': -1136.06298828125, 'KL/mean': -882.475341796875, 'KL/std': 600.5887451171875, 'logits/chosen': -1.0217537879943848, 'logits/rejected': -1.0764918327331543, 'epoch': 0.9} + 90%|█████████ | 613/681 [26:03<02:45, 2.43s/it] 90%|█████████ | 614/681 [26:06<02:44, 2.45s/it] {'loss': 1.0728, 'grad_norm': 43.64583206176758, 'learning_rate': 1.507684480352292e-08, 'fcm_dpo/beta': 0.0009298705263063312, 'fcm_dpo/q_t': 0.39804306626319885, 'fcm_dpo/delta': -0.057572945952415466, 'fcm_dpo/margin': 489.3064270019531, 'margin_dpo/margin_mean': 489.30645751953125, 'margin_dpo/margin_std': 680.9266357421875, 'logps/chosen': -749.4744262695312, 'logps/rejected': -1290.521240234375, 'logps/ref_chosen': -52.93898010253906, 'logps/ref_rejected': -104.67938232421875, 'KL/chosen_KL_mean': -696.5354614257812, 'KL/rejected_KL_mean': -1185.8419189453125, 'KL/mean': -941.188720703125, 'KL/std': 581.6685180664062, 'logits/chosen': -1.0031187534332275, 'logits/rejected': -1.0775550603866577, 'epoch': 0.9} + 90%|█████████ | 614/681 [26:06<02:44, 2.45s/it] 90%|█████████ | 615/681 [26:08<02:48, 2.55s/it] {'loss': 1.1233, 'grad_norm': 26.297256469726562, 'learning_rate': 1.4641017128809801e-08, 'fcm_dpo/beta': 0.0009312764159403741, 'fcm_dpo/q_t': 0.412952184677124, 'fcm_dpo/delta': 0.017031406983733177, 'fcm_dpo/margin': 411.66131591796875, 'margin_dpo/margin_mean': 411.6612854003906, 'margin_dpo/margin_std': 651.5823974609375, 'logps/chosen': -722.175048828125, 'logps/rejected': -1163.196533203125, 'logps/ref_chosen': -65.81727600097656, 'logps/ref_rejected': -95.17749786376953, 'KL/chosen_KL_mean': -656.3577880859375, 'KL/rejected_KL_mean': -1068.01904296875, 'KL/mean': -862.1884155273438, 'KL/std': 608.759033203125, 'logits/chosen': -0.9910403490066528, 'logits/rejected': -1.014068365097046, 'epoch': 0.9} + 90%|█████████ | 615/681 [26:08<02:48, 2.55s/it] 90%|█████████ | 616/681 [26:11<02:49, 2.61s/it] {'loss': 1.1659, 'grad_norm': 34.39405822753906, 'learning_rate': 1.4211391382180637e-08, 'fcm_dpo/beta': 0.0009417695691809058, 'fcm_dpo/q_t': 0.4281091094017029, 'fcm_dpo/delta': 0.08782128244638443, 'fcm_dpo/margin': 334.37677001953125, 'margin_dpo/margin_mean': 334.37677001953125, 'margin_dpo/margin_std': 575.5633544921875, 'logps/chosen': -839.8283081054688, 'logps/rejected': -1183.772705078125, 'logps/ref_chosen': -65.13285827636719, 'logps/ref_rejected': -74.70050048828125, 'KL/chosen_KL_mean': -774.6954345703125, 'KL/rejected_KL_mean': -1109.0721435546875, 'KL/mean': -941.8838500976562, 'KL/std': 509.840576171875, 'logits/chosen': -1.0712862014770508, 'logits/rejected': -1.0602033138275146, 'epoch': 0.9} + 90%|█████████ | 616/681 [26:11<02:49, 2.61s/it] 91%|█████████ | 617/681 [26:14<02:46, 2.61s/it] {'loss': 1.2385, 'grad_norm': 53.658565521240234, 'learning_rate': 1.378797888467345e-08, 'fcm_dpo/beta': 0.0009709987789392471, 'fcm_dpo/q_t': 0.44701701402664185, 'fcm_dpo/delta': 0.16959968209266663, 'fcm_dpo/margin': 241.1470489501953, 'margin_dpo/margin_mean': 241.14703369140625, 'margin_dpo/margin_std': 569.4755859375, 'logps/chosen': -795.143798828125, 'logps/rejected': -1037.51953125, 'logps/ref_chosen': -63.005550384521484, 'logps/ref_rejected': -64.234130859375, 'KL/chosen_KL_mean': -732.1382446289062, 'KL/rejected_KL_mean': -973.285400390625, 'KL/mean': -852.7117919921875, 'KL/std': 490.1700439453125, 'logits/chosen': -0.9488894939422607, 'logits/rejected': -0.9184377789497375, 'epoch': 0.91} + 91%|█████████ | 617/681 [26:14<02:46, 2.61s/it] 91%|█████████ | 618/681 [26:16<02:44, 2.61s/it] {'loss': 1.0849, 'grad_norm': 40.38612747192383, 'learning_rate': 1.3370790793601371e-08, 'fcm_dpo/beta': 0.0009685006225481629, 'fcm_dpo/q_t': 0.3915684223175049, 'fcm_dpo/delta': -0.11168282479047775, 'fcm_dpo/margin': 522.3434448242188, 'margin_dpo/margin_mean': 522.3434448242188, 'margin_dpo/margin_std': 827.5892333984375, 'logps/chosen': -830.9668579101562, 'logps/rejected': -1378.3623046875, 'logps/ref_chosen': -67.10134887695312, 'logps/ref_rejected': -92.15340423583984, 'KL/chosen_KL_mean': -763.865478515625, 'KL/rejected_KL_mean': -1286.208984375, 'KL/mean': -1025.0372314453125, 'KL/std': 651.3458251953125, 'logits/chosen': -1.0099037885665894, 'logits/rejected': -1.0500290393829346, 'epoch': 0.91} + 91%|█████████ | 618/681 [26:16<02:44, 2.61s/it] 91%|█████████ | 619/681 [26:19<02:41, 2.60s/it] {'loss': 1.1707, 'grad_norm': 49.994224548339844, 'learning_rate': 1.2959838102258535e-08, 'fcm_dpo/beta': 0.0009594704024493694, 'fcm_dpo/q_t': 0.42001599073410034, 'fcm_dpo/delta': 0.016789617016911507, 'fcm_dpo/margin': 400.0561828613281, 'margin_dpo/margin_mean': 400.0561828613281, 'margin_dpo/margin_std': 802.4068603515625, 'logps/chosen': -807.3140869140625, 'logps/rejected': -1244.577392578125, 'logps/ref_chosen': -55.978233337402344, 'logps/ref_rejected': -93.1854019165039, 'KL/chosen_KL_mean': -751.3358154296875, 'KL/rejected_KL_mean': -1151.3919677734375, 'KL/mean': -951.3638916015625, 'KL/std': 605.0140380859375, 'logits/chosen': -0.9973533153533936, 'logits/rejected': -1.0120331048965454, 'epoch': 0.91} + 91%|█████████ | 619/681 [26:19<02:41, 2.60s/it] 91%|█████████ | 620/681 [26:21<02:36, 2.57s/it] {'loss': 1.1326, 'grad_norm': 34.35021209716797, 'learning_rate': 1.2555131639630567e-08, 'fcm_dpo/beta': 0.000961203477345407, 'fcm_dpo/q_t': 0.4174911379814148, 'fcm_dpo/delta': 0.035247065126895905, 'fcm_dpo/margin': 380.6529846191406, 'margin_dpo/margin_mean': 380.6529846191406, 'margin_dpo/margin_std': 605.3323974609375, 'logps/chosen': -751.6961059570312, 'logps/rejected': -1150.96240234375, 'logps/ref_chosen': -59.79750061035156, 'logps/ref_rejected': -78.41075134277344, 'KL/chosen_KL_mean': -691.8986206054688, 'KL/rejected_KL_mean': -1072.551513671875, 'KL/mean': -882.22509765625, 'KL/std': 538.706787109375, 'logits/chosen': -1.0476036071777344, 'logits/rejected': -1.0572441816329956, 'epoch': 0.91} + 91%|█████████ | 620/681 [26:22<02:36, 2.57s/it] 91%|█████████ | 621/681 [26:24<02:34, 2.57s/it] {'loss': 1.033, 'grad_norm': 39.83711624145508, 'learning_rate': 1.2156682070109086e-08, 'fcm_dpo/beta': 0.0009547668742015958, 'fcm_dpo/q_t': 0.3814903199672699, 'fcm_dpo/delta': -0.13341151177883148, 'fcm_dpo/margin': 551.19140625, 'margin_dpo/margin_mean': 551.19140625, 'margin_dpo/margin_std': 702.85986328125, 'logps/chosen': -719.3101196289062, 'logps/rejected': -1304.937255859375, 'logps/ref_chosen': -53.93375778198242, 'logps/ref_rejected': -88.36951446533203, 'KL/chosen_KL_mean': -665.3763427734375, 'KL/rejected_KL_mean': -1216.567626953125, 'KL/mean': -940.9721069335938, 'KL/std': 632.556884765625, 'logits/chosen': -1.0708098411560059, 'logits/rejected': -1.1203954219818115, 'epoch': 0.91} + 91%|█████████ | 621/681 [26:24<02:34, 2.57s/it] 91%|█████████▏| 622/681 [26:27<02:31, 2.57s/it] {'loss': 1.1242, 'grad_norm': 30.787620544433594, 'learning_rate': 1.1764499893210878e-08, 'fcm_dpo/beta': 0.0009404352167621255, 'fcm_dpo/q_t': 0.4160274565219879, 'fcm_dpo/delta': 0.0270434208214283, 'fcm_dpo/margin': 397.30218505859375, 'margin_dpo/margin_mean': 397.30218505859375, 'margin_dpo/margin_std': 618.2388916015625, 'logps/chosen': -718.8143310546875, 'logps/rejected': -1141.349365234375, 'logps/ref_chosen': -60.28582000732422, 'logps/ref_rejected': -85.51873779296875, 'KL/chosen_KL_mean': -658.5285034179688, 'KL/rejected_KL_mean': -1055.8306884765625, 'KL/mean': -857.1796264648438, 'KL/std': 493.7017822265625, 'logits/chosen': -0.9209400415420532, 'logits/rejected': -0.9098290205001831, 'epoch': 0.91} + 91%|█████████▏| 622/681 [26:27<02:31, 2.57s/it] 91%|█████████▏| 623/681 [26:29<02:21, 2.44s/it] {'loss': 1.1853, 'grad_norm': 33.62225341796875, 'learning_rate': 1.1378595443300998e-08, 'fcm_dpo/beta': 0.0009606959065422416, 'fcm_dpo/q_t': 0.4334968328475952, 'fcm_dpo/delta': 0.10166360437870026, 'fcm_dpo/margin': 313.7361755371094, 'margin_dpo/margin_mean': 313.7362060546875, 'margin_dpo/margin_std': 610.1846923828125, 'logps/chosen': -786.423583984375, 'logps/rejected': -1121.0859375, 'logps/ref_chosen': -64.1569595336914, 'logps/ref_rejected': -85.08304595947266, 'KL/chosen_KL_mean': -722.2666015625, 'KL/rejected_KL_mean': -1036.0028076171875, 'KL/mean': -879.1347045898438, 'KL/std': 512.3851318359375, 'logits/chosen': -1.0529344081878662, 'logits/rejected': -1.0536704063415527, 'epoch': 0.91} + 91%|█████████▏| 623/681 [26:29<02:21, 2.44s/it] 92%|█████████▏| 624/681 [26:31<02:20, 2.46s/it] {'loss': 1.0585, 'grad_norm': 39.309574127197266, 'learning_rate': 1.0998978889320582e-08, 'fcm_dpo/beta': 0.0009594388538971543, 'fcm_dpo/q_t': 0.3950585722923279, 'fcm_dpo/delta': -0.05099187046289444, 'fcm_dpo/margin': 467.7041015625, 'margin_dpo/margin_mean': 467.7041015625, 'margin_dpo/margin_std': 573.2549438476562, 'logps/chosen': -782.1905517578125, 'logps/rejected': -1275.1080322265625, 'logps/ref_chosen': -71.91862487792969, 'logps/ref_rejected': -97.13203430175781, 'KL/chosen_KL_mean': -710.2718505859375, 'KL/rejected_KL_mean': -1177.9759521484375, 'KL/mean': -944.1239013671875, 'KL/std': 529.1799926757812, 'logits/chosen': -1.0544100999832153, 'logits/rejected': -1.0604016780853271, 'epoch': 0.92} + 92%|█████████▏| 624/681 [26:31<02:20, 2.46s/it] 92%|█████████▏| 625/681 [26:34<02:20, 2.51s/it] {'loss': 1.0137, 'grad_norm': 58.24129867553711, 'learning_rate': 1.0625660234518913e-08, 'fcm_dpo/beta': 0.0009445177856832743, 'fcm_dpo/q_t': 0.38663381338119507, 'fcm_dpo/delta': -0.10063250362873077, 'fcm_dpo/margin': 524.8773193359375, 'margin_dpo/margin_mean': 524.8773193359375, 'margin_dpo/margin_std': 560.148193359375, 'logps/chosen': -734.79296875, 'logps/rejected': -1287.4185791015625, 'logps/ref_chosen': -58.342071533203125, 'logps/ref_rejected': -86.09038543701172, 'KL/chosen_KL_mean': -676.450927734375, 'KL/rejected_KL_mean': -1201.328125, 'KL/mean': -938.8895263671875, 'KL/std': 594.9174194335938, 'logits/chosen': -1.006543755531311, 'logits/rejected': -1.0352264642715454, 'epoch': 0.92} + 92%|█████████▏| 625/681 [26:34<02:20, 2.51s/it] 92%|█████████▏| 626/681 [26:37<02:23, 2.61s/it] {'loss': 1.2195, 'grad_norm': 32.43234634399414, 'learning_rate': 1.0258649316189721e-08, 'fcm_dpo/beta': 0.0009557833545841277, 'fcm_dpo/q_t': 0.4362901449203491, 'fcm_dpo/delta': 0.1316283643245697, 'fcm_dpo/margin': 284.48504638671875, 'margin_dpo/margin_mean': 284.48504638671875, 'margin_dpo/margin_std': 614.84765625, 'logps/chosen': -901.575927734375, 'logps/rejected': -1210.136962890625, 'logps/ref_chosen': -75.11260986328125, 'logps/ref_rejected': -99.188720703125, 'KL/chosen_KL_mean': -826.4632568359375, 'KL/rejected_KL_mean': -1110.9483642578125, 'KL/mean': -968.705810546875, 'KL/std': 636.0460815429688, 'logits/chosen': -0.9522177577018738, 'logits/rejected': -0.9446706771850586, 'epoch': 0.92} + 92%|█████████▏| 626/681 [26:37<02:23, 2.61s/it] 92%|█████████▏| 627/681 [26:40<02:23, 2.66s/it] {'loss': 1.0279, 'grad_norm': 24.69363784790039, 'learning_rate': 9.897955805412e-09, 'fcm_dpo/beta': 0.000945016392506659, 'fcm_dpo/q_t': 0.3842451572418213, 'fcm_dpo/delta': -0.162668839097023, 'fcm_dpo/margin': 586.0703125, 'margin_dpo/margin_mean': 586.0703125, 'margin_dpo/margin_std': 790.52783203125, 'logps/chosen': -620.2424926757812, 'logps/rejected': -1265.32421875, 'logps/ref_chosen': -47.74314880371094, 'logps/ref_rejected': -106.75448608398438, 'KL/chosen_KL_mean': -572.4993286132812, 'KL/rejected_KL_mean': -1158.569580078125, 'KL/mean': -865.5345458984375, 'KL/std': 690.389892578125, 'logits/chosen': -0.8964744806289673, 'logits/rejected': -0.9713860154151917, 'epoch': 0.92} + 92%|█████████▏| 627/681 [26:40<02:23, 2.66s/it] 92%|█████████▏| 628/681 [26:42<02:18, 2.62s/it] {'loss': 1.1039, 'grad_norm': 28.13714599609375, 'learning_rate': 9.543589206795238e-09, 'fcm_dpo/beta': 0.0009279233636334538, 'fcm_dpo/q_t': 0.40770232677459717, 'fcm_dpo/delta': -0.011030579917132854, 'fcm_dpo/margin': 442.3868408203125, 'margin_dpo/margin_mean': 442.3868408203125, 'margin_dpo/margin_std': 658.2325439453125, 'logps/chosen': -801.66943359375, 'logps/rejected': -1285.427978515625, 'logps/ref_chosen': -60.182945251464844, 'logps/ref_rejected': -101.55467224121094, 'KL/chosen_KL_mean': -741.4865112304688, 'KL/rejected_KL_mean': -1183.873291015625, 'KL/mean': -962.6798706054688, 'KL/std': 572.581787109375, 'logits/chosen': -1.0725154876708984, 'logits/rejected': -1.0900166034698486, 'epoch': 0.92} + 92%|█████████▏| 628/681 [26:42<02:18, 2.62s/it] 92%|█████████▏| 629/681 [26:45<02:15, 2.61s/it] {'loss': 1.1046, 'grad_norm': 34.70335006713867, 'learning_rate': 9.19555885822887e-09, 'fcm_dpo/beta': 0.0009324135025963187, 'fcm_dpo/q_t': 0.4115867018699646, 'fcm_dpo/delta': 0.02201123535633087, 'fcm_dpo/margin': 406.26995849609375, 'margin_dpo/margin_mean': 406.26995849609375, 'margin_dpo/margin_std': 546.7474365234375, 'logps/chosen': -797.7471923828125, 'logps/rejected': -1231.457275390625, 'logps/ref_chosen': -64.21354675292969, 'logps/ref_rejected': -91.65367126464844, 'KL/chosen_KL_mean': -733.53369140625, 'KL/rejected_KL_mean': -1139.8037109375, 'KL/mean': -936.6685791015625, 'KL/std': 552.74169921875, 'logits/chosen': -1.0210623741149902, 'logits/rejected': -1.0353336334228516, 'epoch': 0.92} + 92%|█████████▏| 629/681 [26:45<02:15, 2.61s/it] 93%|█████████▎| 630/681 [26:47<02:13, 2.62s/it] {'loss': 1.2718, 'grad_norm': 52.160728454589844, 'learning_rate': 8.85387393063622e-09, 'fcm_dpo/beta': 0.0009379271068610251, 'fcm_dpo/q_t': 0.45478296279907227, 'fcm_dpo/delta': 0.051444362848997116, 'fcm_dpo/margin': 232.05682373046875, 'margin_dpo/margin_mean': 232.05682373046875, 'margin_dpo/margin_std': 672.6798095703125, 'logps/chosen': -713.111083984375, 'logps/rejected': -969.4752197265625, 'logps/ref_chosen': -59.29100036621094, 'logps/ref_rejected': -83.59829711914062, 'KL/chosen_KL_mean': -653.820068359375, 'KL/rejected_KL_mean': -885.8768920898438, 'KL/mean': -769.8485107421875, 'KL/std': 560.4996337890625, 'logits/chosen': -1.0045530796051025, 'logits/rejected': -0.9794071912765503, 'epoch': 0.93} + 93%|█████████▎| 630/681 [26:47<02:13, 2.62s/it] 93%|█████████▎| 631/681 [26:50<02:08, 2.58s/it] {'loss': 1.1567, 'grad_norm': 34.219573974609375, 'learning_rate': 8.518543427732949e-09, 'fcm_dpo/beta': 0.0009518619626760483, 'fcm_dpo/q_t': 0.42036306858062744, 'fcm_dpo/delta': 0.05409633368253708, 'fcm_dpo/margin': 365.2381286621094, 'margin_dpo/margin_mean': 365.2381286621094, 'margin_dpo/margin_std': 643.0179443359375, 'logps/chosen': -851.8079833984375, 'logps/rejected': -1238.5440673828125, 'logps/ref_chosen': -59.45360565185547, 'logps/ref_rejected': -80.95156860351562, 'KL/chosen_KL_mean': -792.3543701171875, 'KL/rejected_KL_mean': -1157.592529296875, 'KL/mean': -974.973388671875, 'KL/std': 561.6439208984375, 'logits/chosen': -1.0938163995742798, 'logits/rejected': -1.1026105880737305, 'epoch': 0.93} + 93%|█████████▎| 631/681 [26:50<02:08, 2.58s/it] 93%|█████████▎| 632/681 [26:52<02:02, 2.50s/it] {'loss': 1.1352, 'grad_norm': 43.099708557128906, 'learning_rate': 8.189576185789637e-09, 'fcm_dpo/beta': 0.0009528810624033213, 'fcm_dpo/q_t': 0.4152371883392334, 'fcm_dpo/delta': 0.03032829239964485, 'fcm_dpo/margin': 388.82421875, 'margin_dpo/margin_mean': 388.82421875, 'margin_dpo/margin_std': 626.7635498046875, 'logps/chosen': -744.080078125, 'logps/rejected': -1157.712890625, 'logps/ref_chosen': -61.35155487060547, 'logps/ref_rejected': -86.16017150878906, 'KL/chosen_KL_mean': -682.728515625, 'KL/rejected_KL_mean': -1071.552734375, 'KL/mean': -877.140625, 'KL/std': 512.9617919921875, 'logits/chosen': -1.0412629842758179, 'logits/rejected': -1.0417115688323975, 'epoch': 0.93} + 93%|█████████▎| 632/681 [26:52<02:02, 2.50s/it] 93%|█████████▎| 633/681 [26:55<01:58, 2.48s/it] {'loss': 1.2016, 'grad_norm': 47.98980712890625, 'learning_rate': 7.866980873399015e-09, 'fcm_dpo/beta': 0.0009792209602892399, 'fcm_dpo/q_t': 0.43543291091918945, 'fcm_dpo/delta': 0.12146103382110596, 'fcm_dpo/margin': 287.41412353515625, 'margin_dpo/margin_mean': 287.41412353515625, 'margin_dpo/margin_std': 580.196533203125, 'logps/chosen': -804.729248046875, 'logps/rejected': -1126.4490966796875, 'logps/ref_chosen': -57.27816390991211, 'logps/ref_rejected': -91.58395385742188, 'KL/chosen_KL_mean': -747.4510498046875, 'KL/rejected_KL_mean': -1034.8651123046875, 'KL/mean': -891.1580810546875, 'KL/std': 504.61749267578125, 'logits/chosen': -1.0630054473876953, 'logits/rejected': -1.0733153820037842, 'epoch': 0.93} + 93%|█████████▎| 633/681 [26:55<01:58, 2.48s/it] 93%|█████████▎| 634/681 [26:57<01:58, 2.52s/it] {'loss': 1.2382, 'grad_norm': 35.45087814331055, 'learning_rate': 7.550765991247654e-09, 'fcm_dpo/beta': 0.0009960609022527933, 'fcm_dpo/q_t': 0.44638699293136597, 'fcm_dpo/delta': 0.06913906335830688, 'fcm_dpo/margin': 242.82333374023438, 'margin_dpo/margin_mean': 242.82333374023438, 'margin_dpo/margin_std': 585.6327514648438, 'logps/chosen': -922.45068359375, 'logps/rejected': -1205.78076171875, 'logps/ref_chosen': -66.61896514892578, 'logps/ref_rejected': -107.12564849853516, 'KL/chosen_KL_mean': -855.8317260742188, 'KL/rejected_KL_mean': -1098.655029296875, 'KL/mean': -977.2432861328125, 'KL/std': 611.3326416015625, 'logits/chosen': -0.9742704033851624, 'logits/rejected': -0.9662094116210938, 'epoch': 0.93} + 93%|█████████▎| 634/681 [26:57<01:58, 2.52s/it] 93%|█████████▎| 635/681 [27:00<01:55, 2.51s/it] {'loss': 1.1545, 'grad_norm': 38.723793029785156, 'learning_rate': 7.240939871891699e-09, 'fcm_dpo/beta': 0.0010028297547250986, 'fcm_dpo/q_t': 0.42215800285339355, 'fcm_dpo/delta': 0.04947870969772339, 'fcm_dpo/margin': 351.2921142578125, 'margin_dpo/margin_mean': 351.2921142578125, 'margin_dpo/margin_std': 629.5093994140625, 'logps/chosen': -815.1234741210938, 'logps/rejected': -1174.96044921875, 'logps/ref_chosen': -73.95551300048828, 'logps/ref_rejected': -82.50045776367188, 'KL/chosen_KL_mean': -741.16796875, 'KL/rejected_KL_mean': -1092.4599609375, 'KL/mean': -916.81396484375, 'KL/std': 625.1411743164062, 'logits/chosen': -1.0590343475341797, 'logits/rejected': -1.0402554273605347, 'epoch': 0.93} + 93%|█████████▎| 635/681 [27:00<01:55, 2.51s/it] 93%|█████████▎| 636/681 [27:02<01:55, 2.56s/it] {'loss': 1.0923, 'grad_norm': 27.428804397583008, 'learning_rate': 6.937510679537628e-09, 'fcm_dpo/beta': 0.0010084551759064198, 'fcm_dpo/q_t': 0.40682026743888855, 'fcm_dpo/delta': -0.03108617290854454, 'fcm_dpo/margin': 425.3899841308594, 'margin_dpo/margin_mean': 425.3899841308594, 'margin_dpo/margin_std': 645.3447265625, 'logps/chosen': -754.7973022460938, 'logps/rejected': -1202.537109375, 'logps/ref_chosen': -59.628910064697266, 'logps/ref_rejected': -81.97883605957031, 'KL/chosen_KL_mean': -695.1683959960938, 'KL/rejected_KL_mean': -1120.558349609375, 'KL/mean': -907.8634033203125, 'KL/std': 626.8406372070312, 'logits/chosen': -0.974394679069519, 'logits/rejected': -0.9779649972915649, 'epoch': 0.93} + 93%|█████████▎| 636/681 [27:02<01:55, 2.56s/it] 94%|█████████▎| 637/681 [27:05<01:53, 2.58s/it] {'loss': 1.0719, 'grad_norm': 29.097070693969727, 'learning_rate': 6.640486409826785e-09, 'fcm_dpo/beta': 0.0009865246247500181, 'fcm_dpo/q_t': 0.3993714153766632, 'fcm_dpo/delta': -0.0564747154712677, 'fcm_dpo/margin': 459.3734436035156, 'margin_dpo/margin_mean': 459.3734130859375, 'margin_dpo/margin_std': 634.1755981445312, 'logps/chosen': -762.1837158203125, 'logps/rejected': -1270.3095703125, 'logps/ref_chosen': -49.652687072753906, 'logps/ref_rejected': -98.40513610839844, 'KL/chosen_KL_mean': -712.531005859375, 'KL/rejected_KL_mean': -1171.904541015625, 'KL/mean': -942.2177124023438, 'KL/std': 613.010009765625, 'logits/chosen': -1.068098545074463, 'logits/rejected': -1.115422248840332, 'epoch': 0.94} + 94%|█████████▎| 637/681 [27:05<01:53, 2.58s/it] 94%|█████████▎| 638/681 [27:08<01:54, 2.67s/it] {'loss': 1.1675, 'grad_norm': 43.48149490356445, 'learning_rate': 6.349874889624962e-09, 'fcm_dpo/beta': 0.0009809336625039577, 'fcm_dpo/q_t': 0.41367873549461365, 'fcm_dpo/delta': -0.07716827094554901, 'fcm_dpo/margin': 358.84271240234375, 'margin_dpo/margin_mean': 358.84271240234375, 'margin_dpo/margin_std': 657.916748046875, 'logps/chosen': -734.0889892578125, 'logps/rejected': -1114.0765380859375, 'logps/ref_chosen': -58.156639099121094, 'logps/ref_rejected': -79.3014907836914, 'KL/chosen_KL_mean': -675.932373046875, 'KL/rejected_KL_mean': -1034.775146484375, 'KL/mean': -855.3536987304688, 'KL/std': 574.1265869140625, 'logits/chosen': -0.966257631778717, 'logits/rejected': -0.9517063498497009, 'epoch': 0.94} + 94%|█████████▎| 638/681 [27:08<01:54, 2.67s/it] 94%|█████████▍| 639/681 [27:10<01:52, 2.67s/it] {'loss': 1.3435, 'grad_norm': 106.04683685302734, 'learning_rate': 6.065683776815933e-09, 'fcm_dpo/beta': 0.0009733641054481268, 'fcm_dpo/q_t': 0.4636532962322235, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 165.8230743408203, 'margin_dpo/margin_mean': 165.82305908203125, 'margin_dpo/margin_std': 706.1915893554688, 'logps/chosen': -1007.2169189453125, 'logps/rejected': -1174.99169921875, 'logps/ref_chosen': -72.32319641113281, 'logps/ref_rejected': -74.2749252319336, 'KL/chosen_KL_mean': -934.8937377929688, 'KL/rejected_KL_mean': -1100.716796875, 'KL/mean': -1017.8052368164062, 'KL/std': 551.710205078125, 'logits/chosen': -0.9813928604125977, 'logits/rejected': -0.9263367652893066, 'epoch': 0.94} + 94%|█████████▍| 639/681 [27:10<01:52, 2.67s/it] 94%|█████████▍| 640/681 [27:13<01:48, 2.65s/it] {'loss': 1.0338, 'grad_norm': 39.72331237792969, 'learning_rate': 5.7879205600998296e-09, 'fcm_dpo/beta': 0.0009570815600454807, 'fcm_dpo/q_t': 0.38426363468170166, 'fcm_dpo/delta': -0.14221924543380737, 'fcm_dpo/margin': 558.8187255859375, 'margin_dpo/margin_mean': 558.8187255859375, 'margin_dpo/margin_std': 753.1952514648438, 'logps/chosen': -771.5770263671875, 'logps/rejected': -1382.8614501953125, 'logps/ref_chosen': -56.13436508178711, 'logps/ref_rejected': -108.60014343261719, 'KL/chosen_KL_mean': -715.442626953125, 'KL/rejected_KL_mean': -1274.2613525390625, 'KL/mean': -994.8519897460938, 'KL/std': 640.503173828125, 'logits/chosen': -0.9474629163742065, 'logits/rejected': -0.9777064919471741, 'epoch': 0.94} + 94%|█████████▍| 640/681 [27:13<01:48, 2.65s/it] 94%|█████████▍| 641/681 [27:16<01:44, 2.62s/it] {'loss': 1.1809, 'grad_norm': 35.67609405517578, 'learning_rate': 5.516592558795746e-09, 'fcm_dpo/beta': 0.0009527778020128608, 'fcm_dpo/q_t': 0.42465877532958984, 'fcm_dpo/delta': 0.060891155153512955, 'fcm_dpo/margin': 358.13702392578125, 'margin_dpo/margin_mean': 358.1370544433594, 'margin_dpo/margin_std': 718.3710327148438, 'logps/chosen': -892.5226440429688, 'logps/rejected': -1272.6551513671875, 'logps/ref_chosen': -64.99689483642578, 'logps/ref_rejected': -86.99232482910156, 'KL/chosen_KL_mean': -827.5257568359375, 'KL/rejected_KL_mean': -1185.662841796875, 'KL/mean': -1006.5942993164062, 'KL/std': 546.6756591796875, 'logits/chosen': -1.007719874382019, 'logits/rejected': -1.0168031454086304, 'epoch': 0.94} + 94%|█████████▍| 641/681 [27:16<01:44, 2.62s/it] 94%|█████████▍| 642/681 [27:18<01:40, 2.58s/it] {'loss': 1.1499, 'grad_norm': 38.891204833984375, 'learning_rate': 5.251706922648868e-09, 'fcm_dpo/beta': 0.0009587721433490515, 'fcm_dpo/q_t': 0.41409242153167725, 'fcm_dpo/delta': -0.022414878010749817, 'fcm_dpo/margin': 439.2843017578125, 'margin_dpo/margin_mean': 439.2843017578125, 'margin_dpo/margin_std': 870.07275390625, 'logps/chosen': -857.9935302734375, 'logps/rejected': -1341.830810546875, 'logps/ref_chosen': -65.68924713134766, 'logps/ref_rejected': -110.24205017089844, 'KL/chosen_KL_mean': -792.3043212890625, 'KL/rejected_KL_mean': -1231.588623046875, 'KL/mean': -1011.9464721679688, 'KL/std': 722.4783935546875, 'logits/chosen': -0.9521088600158691, 'logits/rejected': -0.9873976707458496, 'epoch': 0.94} + 94%|█████████▍| 642/681 [27:18<01:40, 2.58s/it] 94%|█████████▍| 643/681 [27:21<01:39, 2.61s/it] {'loss': 1.1498, 'grad_norm': 37.40614700317383, 'learning_rate': 4.993270631642038e-09, 'fcm_dpo/beta': 0.0009463735623285174, 'fcm_dpo/q_t': 0.42428651452064514, 'fcm_dpo/delta': -0.036860737949609756, 'fcm_dpo/margin': 344.0841979980469, 'margin_dpo/margin_mean': 344.0841979980469, 'margin_dpo/margin_std': 523.859130859375, 'logps/chosen': -758.7708740234375, 'logps/rejected': -1138.3734130859375, 'logps/ref_chosen': -51.94999694824219, 'logps/ref_rejected': -87.46833801269531, 'KL/chosen_KL_mean': -706.8209228515625, 'KL/rejected_KL_mean': -1050.905029296875, 'KL/mean': -878.863037109375, 'KL/std': 530.0516357421875, 'logits/chosen': -1.0797677040100098, 'logits/rejected': -1.0802643299102783, 'epoch': 0.94} + 94%|█████████▍| 643/681 [27:21<01:39, 2.61s/it] 95%|█████████▍| 644/681 [27:23<01:36, 2.62s/it] {'loss': 1.1823, 'grad_norm': 45.14137649536133, 'learning_rate': 4.741290495811873e-09, 'fcm_dpo/beta': 0.000954576360527426, 'fcm_dpo/q_t': 0.4248698949813843, 'fcm_dpo/delta': 0.06034265458583832, 'fcm_dpo/margin': 358.000244140625, 'margin_dpo/margin_mean': 358.000244140625, 'margin_dpo/margin_std': 717.85693359375, 'logps/chosen': -753.589599609375, 'logps/rejected': -1139.708740234375, 'logps/ref_chosen': -59.017662048339844, 'logps/ref_rejected': -87.13668823242188, 'KL/chosen_KL_mean': -694.5718994140625, 'KL/rejected_KL_mean': -1052.5721435546875, 'KL/mean': -873.572021484375, 'KL/std': 631.7380981445312, 'logits/chosen': -0.9851275682449341, 'logits/rejected': -0.9928478002548218, 'epoch': 0.95} + 95%|█████████▍| 644/681 [27:23<01:36, 2.62s/it] 95%|█████████▍| 645/681 [27:26<01:34, 2.61s/it] {'loss': 1.3219, 'grad_norm': 88.68705749511719, 'learning_rate': 4.495773155069299e-09, 'fcm_dpo/beta': 0.0009710404556244612, 'fcm_dpo/q_t': 0.4625673294067383, 'fcm_dpo/delta': 0.06831113994121552, 'fcm_dpo/margin': 177.82762145996094, 'margin_dpo/margin_mean': 177.8275909423828, 'margin_dpo/margin_std': 645.446533203125, 'logps/chosen': -780.501708984375, 'logps/rejected': -1000.2340698242188, 'logps/ref_chosen': -55.87602233886719, 'logps/ref_rejected': -97.78080749511719, 'KL/chosen_KL_mean': -724.6256103515625, 'KL/rejected_KL_mean': -902.4532470703125, 'KL/mean': -813.5394287109375, 'KL/std': 482.83013916015625, 'logits/chosen': -0.9806017875671387, 'logits/rejected': -0.9682430028915405, 'epoch': 0.95} + 95%|█████████▍| 645/681 [27:26<01:34, 2.61s/it] 95%|█████████▍| 646/681 [27:28<01:29, 2.56s/it] {'loss': 1.1842, 'grad_norm': 52.068904876708984, 'learning_rate': 4.256725079024553e-09, 'fcm_dpo/beta': 0.0009829029440879822, 'fcm_dpo/q_t': 0.432457834482193, 'fcm_dpo/delta': 0.1032671183347702, 'fcm_dpo/margin': 305.16351318359375, 'margin_dpo/margin_mean': 305.1635437011719, 'margin_dpo/margin_std': 565.9290771484375, 'logps/chosen': -759.093994140625, 'logps/rejected': -1080.487548828125, 'logps/ref_chosen': -61.275787353515625, 'logps/ref_rejected': -77.50580596923828, 'KL/chosen_KL_mean': -697.8182373046875, 'KL/rejected_KL_mean': -1002.981689453125, 'KL/mean': -850.4000244140625, 'KL/std': 472.5347900390625, 'logits/chosen': -1.0154389142990112, 'logits/rejected': -0.9988906383514404, 'epoch': 0.95} + 95%|█████████▍| 646/681 [27:29<01:29, 2.56s/it] 95%|█████████▌| 647/681 [27:31<01:28, 2.60s/it] {'loss': 1.1078, 'grad_norm': 27.42867088317871, 'learning_rate': 4.024152566816791e-09, 'fcm_dpo/beta': 0.0009943554177880287, 'fcm_dpo/q_t': 0.41246411204338074, 'fcm_dpo/delta': 0.02068711817264557, 'fcm_dpo/margin': 382.257080078125, 'margin_dpo/margin_mean': 382.257080078125, 'margin_dpo/margin_std': 527.3770751953125, 'logps/chosen': -663.1343383789062, 'logps/rejected': -1084.0584716796875, 'logps/ref_chosen': -54.8524169921875, 'logps/ref_rejected': -93.5194091796875, 'KL/chosen_KL_mean': -608.2819213867188, 'KL/rejected_KL_mean': -990.5390625, 'KL/mean': -799.4105224609375, 'KL/std': 541.8460693359375, 'logits/chosen': -0.8789236545562744, 'logits/rejected': -0.9076966047286987, 'epoch': 0.95} + 95%|█████████▌| 647/681 [27:31<01:28, 2.60s/it] 95%|█████████▌| 648/681 [27:34<01:23, 2.53s/it] {'loss': 1.0232, 'grad_norm': 28.24399757385254, 'learning_rate': 3.798061746947995e-09, 'fcm_dpo/beta': 0.0009720301604829729, 'fcm_dpo/q_t': 0.38105693459510803, 'fcm_dpo/delta': -0.16539156436920166, 'fcm_dpo/margin': 572.3054809570312, 'margin_dpo/margin_mean': 572.3054809570312, 'margin_dpo/margin_std': 778.92529296875, 'logps/chosen': -711.3363037109375, 'logps/rejected': -1328.18310546875, 'logps/ref_chosen': -54.17146682739258, 'logps/ref_rejected': -98.7127914428711, 'KL/chosen_KL_mean': -657.1648559570312, 'KL/rejected_KL_mean': -1229.47021484375, 'KL/mean': -943.3175659179688, 'KL/std': 657.721435546875, 'logits/chosen': -1.045109510421753, 'logits/rejected': -1.1041361093521118, 'epoch': 0.95} + 95%|█████████▌| 648/681 [27:34<01:23, 2.53s/it] 95%|█████████▌| 649/681 [27:36<01:22, 2.57s/it] {'loss': 1.2366, 'grad_norm': 29.93989372253418, 'learning_rate': 3.5784585771215235e-09, 'fcm_dpo/beta': 0.000985685153864324, 'fcm_dpo/q_t': 0.4443369209766388, 'fcm_dpo/delta': 0.1423780918121338, 'fcm_dpo/margin': 264.7760009765625, 'margin_dpo/margin_mean': 264.7760009765625, 'margin_dpo/margin_std': 629.158935546875, 'logps/chosen': -744.0819091796875, 'logps/rejected': -1026.454833984375, 'logps/ref_chosen': -62.480350494384766, 'logps/ref_rejected': -80.07717895507812, 'KL/chosen_KL_mean': -681.6015625, 'KL/rejected_KL_mean': -946.3775634765625, 'KL/mean': -813.9896240234375, 'KL/std': 507.69940185546875, 'logits/chosen': -1.052908182144165, 'logits/rejected': -1.044716238975525, 'epoch': 0.95} + 95%|█████████▌| 649/681 [27:36<01:22, 2.57s/it] 95%|█████████▌| 650/681 [27:39<01:20, 2.60s/it] {'loss': 1.129, 'grad_norm': 31.88619041442871, 'learning_rate': 3.3653488440851253e-09, 'fcm_dpo/beta': 0.0009861743310466409, 'fcm_dpo/q_t': 0.40908634662628174, 'fcm_dpo/delta': -0.016445815563201904, 'fcm_dpo/margin': 421.45196533203125, 'margin_dpo/margin_mean': 421.45196533203125, 'margin_dpo/margin_std': 725.9293212890625, 'logps/chosen': -797.4398193359375, 'logps/rejected': -1261.063720703125, 'logps/ref_chosen': -56.09281921386719, 'logps/ref_rejected': -98.26483917236328, 'KL/chosen_KL_mean': -741.3469848632812, 'KL/rejected_KL_mean': -1162.798828125, 'KL/mean': -952.0729370117188, 'KL/std': 635.6630859375, 'logits/chosen': -1.0458638668060303, 'logits/rejected': -1.0673601627349854, 'epoch': 0.95} + 95%|█████████▌| 650/681 [27:39<01:20, 2.60s/it] 96%|█████████▌| 651/681 [27:41<01:17, 2.59s/it] {'loss': 1.0016, 'grad_norm': 43.920989990234375, 'learning_rate': 3.158738163478475e-09, 'fcm_dpo/beta': 0.000967178144492209, 'fcm_dpo/q_t': 0.3810211718082428, 'fcm_dpo/delta': -0.13220591843128204, 'fcm_dpo/margin': 542.8966064453125, 'margin_dpo/margin_mean': 542.8966064453125, 'margin_dpo/margin_std': 594.7532958984375, 'logps/chosen': -541.2603759765625, 'logps/rejected': -1140.689453125, 'logps/ref_chosen': -43.42544937133789, 'logps/ref_rejected': -99.95791625976562, 'KL/chosen_KL_mean': -497.8349304199219, 'KL/rejected_KL_mean': -1040.7314453125, 'KL/mean': -769.283203125, 'KL/std': 584.7640380859375, 'logits/chosen': -1.0429898500442505, 'logits/rejected': -1.0994410514831543, 'epoch': 0.96} + 96%|█████████▌| 651/681 [27:41<01:17, 2.59s/it] 96%|█████████▌| 652/681 [27:44<01:15, 2.59s/it] {'loss': 1.1247, 'grad_norm': 32.01892852783203, 'learning_rate': 2.9586319796851555e-09, 'fcm_dpo/beta': 0.0009628928382880986, 'fcm_dpo/q_t': 0.41178420186042786, 'fcm_dpo/delta': 0.0014214273542165756, 'fcm_dpo/margin': 413.9795837402344, 'margin_dpo/margin_mean': 413.9796142578125, 'margin_dpo/margin_std': 687.262939453125, 'logps/chosen': -695.14990234375, 'logps/rejected': -1158.320556640625, 'logps/ref_chosen': -62.57680892944336, 'logps/ref_rejected': -111.76779174804688, 'KL/chosen_KL_mean': -632.5731201171875, 'KL/rejected_KL_mean': -1046.552734375, 'KL/mean': -839.5629272460938, 'KL/std': 598.0744018554688, 'logits/chosen': -1.0343468189239502, 'logits/rejected': -1.0592379570007324, 'epoch': 0.96} + 96%|█████████▌| 652/681 [27:44<01:15, 2.59s/it] 96%|█████████▌| 653/681 [27:47<01:11, 2.57s/it] {'loss': 1.1492, 'grad_norm': 33.566246032714844, 'learning_rate': 2.7650355656892166e-09, 'fcm_dpo/beta': 0.0009692892199382186, 'fcm_dpo/q_t': 0.4203724265098572, 'fcm_dpo/delta': 0.04052945226430893, 'fcm_dpo/margin': 372.2716064453125, 'margin_dpo/margin_mean': 372.2716369628906, 'margin_dpo/margin_std': 651.8836669921875, 'logps/chosen': -830.693115234375, 'logps/rejected': -1245.101318359375, 'logps/ref_chosen': -61.11295700073242, 'logps/ref_rejected': -103.24960327148438, 'KL/chosen_KL_mean': -769.5801391601562, 'KL/rejected_KL_mean': -1141.851806640625, 'KL/mean': -955.7160034179688, 'KL/std': 637.4415283203125, 'logits/chosen': -1.0515234470367432, 'logits/rejected': -1.0738110542297363, 'epoch': 0.96} + 96%|█████████▌| 653/681 [27:47<01:11, 2.57s/it] 96%|█████████▌| 654/681 [27:49<01:09, 2.57s/it] {'loss': 1.1428, 'grad_norm': 31.80510711669922, 'learning_rate': 2.577954022936174e-09, 'fcm_dpo/beta': 0.0009763325797393918, 'fcm_dpo/q_t': 0.4218224287033081, 'fcm_dpo/delta': 0.050495948642492294, 'fcm_dpo/margin': 359.82257080078125, 'margin_dpo/margin_mean': 359.8226013183594, 'margin_dpo/margin_std': 591.5650634765625, 'logps/chosen': -749.8182373046875, 'logps/rejected': -1146.6865234375, 'logps/ref_chosen': -61.7281379699707, 'logps/ref_rejected': -98.7738037109375, 'KL/chosen_KL_mean': -688.090087890625, 'KL/rejected_KL_mean': -1047.9127197265625, 'KL/mean': -868.0014038085938, 'KL/std': 508.9238586425781, 'logits/chosen': -1.045449137687683, 'logits/rejected': -1.0607552528381348, 'epoch': 0.96} + 96%|█████████▌| 654/681 [27:49<01:09, 2.57s/it] 96%|█████████▌| 655/681 [27:52<01:06, 2.55s/it] {'loss': 1.1326, 'grad_norm': 26.179494857788086, 'learning_rate': 2.397392281198729e-09, 'fcm_dpo/beta': 0.0009844074957072735, 'fcm_dpo/q_t': 0.41697460412979126, 'fcm_dpo/delta': 0.029366791248321533, 'fcm_dpo/margin': 377.60040283203125, 'margin_dpo/margin_mean': 377.6003723144531, 'margin_dpo/margin_std': 612.6904296875, 'logps/chosen': -704.3048095703125, 'logps/rejected': -1130.6201171875, 'logps/ref_chosen': -49.576812744140625, 'logps/ref_rejected': -98.29183197021484, 'KL/chosen_KL_mean': -654.7279663085938, 'KL/rejected_KL_mean': -1032.328369140625, 'KL/mean': -843.5281982421875, 'KL/std': 518.3046264648438, 'logits/chosen': -1.0613682270050049, 'logits/rejected': -1.1010310649871826, 'epoch': 0.96} + 96%|█████████▌| 655/681 [27:52<01:06, 2.55s/it] 96%|█████████▋| 656/681 [27:54<01:05, 2.60s/it] {'loss': 0.957, 'grad_norm': 79.35772705078125, 'learning_rate': 2.223355098446622e-09, 'fcm_dpo/beta': 0.0009554917924106121, 'fcm_dpo/q_t': 0.364484578371048, 'fcm_dpo/delta': -0.23265045881271362, 'fcm_dpo/margin': 647.600830078125, 'margin_dpo/margin_mean': 647.600830078125, 'margin_dpo/margin_std': 704.768310546875, 'logps/chosen': -759.1444091796875, 'logps/rejected': -1467.870361328125, 'logps/ref_chosen': -52.54943084716797, 'logps/ref_rejected': -113.67464447021484, 'KL/chosen_KL_mean': -706.594970703125, 'KL/rejected_KL_mean': -1354.19580078125, 'KL/mean': -1030.3953857421875, 'KL/std': 699.4261474609375, 'logits/chosen': -0.9079943299293518, 'logits/rejected': -0.978103518486023, 'epoch': 0.96} + 96%|█████████▋| 656/681 [27:54<01:05, 2.60s/it] 96%|█████████▋| 657/681 [27:57<00:59, 2.49s/it] {'loss': 1.051, 'grad_norm': 33.481868743896484, 'learning_rate': 2.055847060721566e-09, 'fcm_dpo/beta': 0.0009273520554415882, 'fcm_dpo/q_t': 0.3917388916015625, 'fcm_dpo/delta': -0.08619820326566696, 'fcm_dpo/margin': 519.6265258789062, 'margin_dpo/margin_mean': 519.6265258789062, 'margin_dpo/margin_std': 686.9745483398438, 'logps/chosen': -706.7352294921875, 'logps/rejected': -1277.5760498046875, 'logps/ref_chosen': -46.700538635253906, 'logps/ref_rejected': -97.91487121582031, 'KL/chosen_KL_mean': -660.0347290039062, 'KL/rejected_KL_mean': -1179.6611328125, 'KL/mean': -919.8479614257812, 'KL/std': 662.580810546875, 'logits/chosen': -1.0952857732772827, 'logits/rejected': -1.1410545110702515, 'epoch': 0.96} + 96%|█████████▋| 657/681 [27:57<00:59, 2.49s/it] 97%|█████████▋| 658/681 [27:59<00:56, 2.45s/it] {'loss': 1.1103, 'grad_norm': 32.001991271972656, 'learning_rate': 1.8948725820160662e-09, 'fcm_dpo/beta': 0.0009218085906468332, 'fcm_dpo/q_t': 0.4112616181373596, 'fcm_dpo/delta': 0.014132943004369736, 'fcm_dpo/margin': 418.6898193359375, 'margin_dpo/margin_mean': 418.6898193359375, 'margin_dpo/margin_std': 594.6806640625, 'logps/chosen': -777.8120727539062, 'logps/rejected': -1231.483154296875, 'logps/ref_chosen': -60.95820999145508, 'logps/ref_rejected': -95.93949127197266, 'KL/chosen_KL_mean': -716.8538818359375, 'KL/rejected_KL_mean': -1135.543701171875, 'KL/mean': -926.1987915039062, 'KL/std': 523.1241455078125, 'logits/chosen': -1.0506086349487305, 'logits/rejected': -1.0856657028198242, 'epoch': 0.97} + 97%|█████████▋| 658/681 [27:59<00:56, 2.45s/it] 97%|█████████▋| 659/681 [28:02<00:55, 2.53s/it] {'loss': 1.1164, 'grad_norm': 38.490875244140625, 'learning_rate': 1.7404359041573723e-09, 'fcm_dpo/beta': 0.0009279932710342109, 'fcm_dpo/q_t': 0.4154208302497864, 'fcm_dpo/delta': 0.02739275060594082, 'fcm_dpo/margin': 402.483642578125, 'margin_dpo/margin_mean': 402.483642578125, 'margin_dpo/margin_std': 586.9424438476562, 'logps/chosen': -691.6051635742188, 'logps/rejected': -1104.8167724609375, 'logps/ref_chosen': -76.74298095703125, 'logps/ref_rejected': -87.4709701538086, 'KL/chosen_KL_mean': -614.8621826171875, 'KL/rejected_KL_mean': -1017.3458251953125, 'KL/mean': -816.10400390625, 'KL/std': 519.0294189453125, 'logits/chosen': -0.9568224549293518, 'logits/rejected': -0.9347273111343384, 'epoch': 0.97} + 97%|█████████▋| 659/681 [28:02<00:55, 2.53s/it] 97%|█████████▋| 660/681 [28:04<00:52, 2.51s/it] {'loss': 1.0413, 'grad_norm': 45.50758743286133, 'learning_rate': 1.592541096695571e-09, 'fcm_dpo/beta': 0.0009214339079335332, 'fcm_dpo/q_t': 0.3903365135192871, 'fcm_dpo/delta': -0.08826512098312378, 'fcm_dpo/margin': 525.3435668945312, 'margin_dpo/margin_mean': 525.3435668945312, 'margin_dpo/margin_std': 652.117431640625, 'logps/chosen': -729.3735961914062, 'logps/rejected': -1271.62939453125, 'logps/ref_chosen': -59.04788589477539, 'logps/ref_rejected': -75.96005249023438, 'KL/chosen_KL_mean': -670.32568359375, 'KL/rejected_KL_mean': -1195.6693115234375, 'KL/mean': -932.9974365234375, 'KL/std': 613.5252685546875, 'logits/chosen': -1.0583207607269287, 'logits/rejected': -1.0816309452056885, 'epoch': 0.97} + 97%|█████████▋| 660/681 [28:04<00:52, 2.51s/it] 97%|█████████▋| 661/681 [28:06<00:48, 2.44s/it] {'loss': 1.0806, 'grad_norm': 44.195091247558594, 'learning_rate': 1.4511920567963908e-09, 'fcm_dpo/beta': 0.0009160168119706213, 'fcm_dpo/q_t': 0.40546107292175293, 'fcm_dpo/delta': -0.028154436498880386, 'fcm_dpo/margin': 465.85284423828125, 'margin_dpo/margin_mean': 465.85284423828125, 'margin_dpo/margin_std': 654.1995849609375, 'logps/chosen': -643.99853515625, 'logps/rejected': -1145.18310546875, 'logps/ref_chosen': -50.673973083496094, 'logps/ref_rejected': -86.00569152832031, 'KL/chosen_KL_mean': -593.3245849609375, 'KL/rejected_KL_mean': -1059.177490234375, 'KL/mean': -826.2509765625, 'KL/std': 661.68408203125, 'logits/chosen': -1.0162544250488281, 'logits/rejected': -1.0294699668884277, 'epoch': 0.97} + 97%|█████████▋| 661/681 [28:06<00:48, 2.44s/it] 97%|█████████▋| 662/681 [28:09<00:49, 2.58s/it] {'loss': 1.1657, 'grad_norm': 27.159738540649414, 'learning_rate': 1.3163925091384532e-09, 'fcm_dpo/beta': 0.0009198813932016492, 'fcm_dpo/q_t': 0.42443883419036865, 'fcm_dpo/delta': 0.06546258926391602, 'fcm_dpo/margin': 366.068359375, 'margin_dpo/margin_mean': 366.068359375, 'margin_dpo/margin_std': 672.253662109375, 'logps/chosen': -756.2188720703125, 'logps/rejected': -1142.08203125, 'logps/ref_chosen': -69.26106262207031, 'logps/ref_rejected': -89.05593872070312, 'KL/chosen_KL_mean': -686.9578857421875, 'KL/rejected_KL_mean': -1053.026123046875, 'KL/mean': -869.9920043945312, 'KL/std': 554.1969604492188, 'logits/chosen': -0.9714980125427246, 'logits/rejected': -0.9738001823425293, 'epoch': 0.97} + 97%|█████████▋| 662/681 [28:09<00:49, 2.58s/it] 97%|█████████▋| 663/681 [28:12<00:47, 2.67s/it] {'loss': 1.1271, 'grad_norm': 26.556825637817383, 'learning_rate': 1.1881460058152382e-09, 'fcm_dpo/beta': 0.0009213130106218159, 'fcm_dpo/q_t': 0.4122500717639923, 'fcm_dpo/delta': 0.0008019153028726578, 'fcm_dpo/margin': 433.2557373046875, 'margin_dpo/margin_mean': 433.25579833984375, 'margin_dpo/margin_std': 735.6624145507812, 'logps/chosen': -737.3433837890625, 'logps/rejected': -1219.6456298828125, 'logps/ref_chosen': -64.87890625, 'logps/ref_rejected': -113.92536926269531, 'KL/chosen_KL_mean': -672.4644775390625, 'KL/rejected_KL_mean': -1105.72021484375, 'KL/mean': -889.0924072265625, 'KL/std': 637.3165283203125, 'logits/chosen': -1.0173933506011963, 'logits/rejected': -1.0415921211242676, 'epoch': 0.97} + 97%|█████████▋| 663/681 [28:12<00:47, 2.67s/it] 98%|█████████▊| 664/681 [28:15<00:45, 2.67s/it] {'loss': 1.0681, 'grad_norm': 29.249494552612305, 'learning_rate': 1.066455926241383e-09, 'fcm_dpo/beta': 0.0009112852858379483, 'fcm_dpo/q_t': 0.39850109815597534, 'fcm_dpo/delta': -0.057732854038476944, 'fcm_dpo/margin': 498.7710266113281, 'margin_dpo/margin_mean': 498.77105712890625, 'margin_dpo/margin_std': 673.9359130859375, 'logps/chosen': -757.817138671875, 'logps/rejected': -1301.221435546875, 'logps/ref_chosen': -60.88847351074219, 'logps/ref_rejected': -105.521728515625, 'KL/chosen_KL_mean': -696.9286499023438, 'KL/rejected_KL_mean': -1195.69970703125, 'KL/mean': -946.314208984375, 'KL/std': 625.337158203125, 'logits/chosen': -1.0290577411651611, 'logits/rejected': -1.0633100271224976, 'epoch': 0.98} + 98%|█████████▊| 664/681 [28:15<00:45, 2.67s/it] 98%|█████████▊| 665/681 [28:17<00:41, 2.57s/it] {'loss': 1.0981, 'grad_norm': 39.16621780395508, 'learning_rate': 9.513254770636137e-10, 'fcm_dpo/beta': 0.0009156306041404605, 'fcm_dpo/q_t': 0.4127081632614136, 'fcm_dpo/delta': 0.02248411625623703, 'fcm_dpo/margin': 413.22760009765625, 'margin_dpo/margin_mean': 413.22760009765625, 'margin_dpo/margin_std': 524.672607421875, 'logps/chosen': -685.4776611328125, 'logps/rejected': -1122.949951171875, 'logps/ref_chosen': -60.56413269042969, 'logps/ref_rejected': -84.80882263183594, 'KL/chosen_KL_mean': -624.91357421875, 'KL/rejected_KL_mean': -1038.14111328125, 'KL/mean': -831.52734375, 'KL/std': 521.3961791992188, 'logits/chosen': -1.1201207637786865, 'logits/rejected': -1.1449217796325684, 'epoch': 0.98} + 98%|█████████▊| 665/681 [28:17<00:41, 2.57s/it] 98%|█████████▊| 666/681 [28:20<00:39, 2.61s/it] {'loss': 1.106, 'grad_norm': 25.72242546081543, 'learning_rate': 8.427576920763956e-10, 'fcm_dpo/beta': 0.0009172533173114061, 'fcm_dpo/q_t': 0.41190439462661743, 'fcm_dpo/delta': 0.012579199858009815, 'fcm_dpo/margin': 422.88995361328125, 'margin_dpo/margin_mean': 422.88995361328125, 'margin_dpo/margin_std': 591.49951171875, 'logps/chosen': -729.2271118164062, 'logps/rejected': -1183.5887451171875, 'logps/ref_chosen': -64.41996002197266, 'logps/ref_rejected': -95.8916244506836, 'KL/chosen_KL_mean': -664.80712890625, 'KL/rejected_KL_mean': -1087.697021484375, 'KL/mean': -876.2521362304688, 'KL/std': 528.6064453125, 'logits/chosen': -0.9221373200416565, 'logits/rejected': -0.9332491755485535, 'epoch': 0.98} + 98%|█████████▊| 666/681 [28:20<00:39, 2.61s/it] 98%|█████████▊| 667/681 [28:23<00:36, 2.62s/it] {'loss': 1.0614, 'grad_norm': 35.70097732543945, 'learning_rate': 7.407554321417764e-10, 'fcm_dpo/beta': 0.0009089080849662423, 'fcm_dpo/q_t': 0.3967137038707733, 'fcm_dpo/delta': -0.058049269020557404, 'fcm_dpo/margin': 500.79840087890625, 'margin_dpo/margin_mean': 500.79840087890625, 'margin_dpo/margin_std': 649.6275634765625, 'logps/chosen': -816.3968505859375, 'logps/rejected': -1335.753662109375, 'logps/ref_chosen': -69.27702331542969, 'logps/ref_rejected': -87.83549499511719, 'KL/chosen_KL_mean': -747.1198120117188, 'KL/rejected_KL_mean': -1247.918212890625, 'KL/mean': -997.51904296875, 'KL/std': 593.6838989257812, 'logits/chosen': -0.9862961769104004, 'logits/rejected': -0.9898433089256287, 'epoch': 0.98} + 98%|█████████▊| 667/681 [28:23<00:36, 2.62s/it] 98%|█████████▊| 668/681 [28:25<00:34, 2.63s/it] {'loss': 1.2061, 'grad_norm': 46.50619888305664, 'learning_rate': 6.453213851142225e-10, 'fcm_dpo/beta': 0.0009242600062862039, 'fcm_dpo/q_t': 0.43164360523223877, 'fcm_dpo/delta': 0.08742087334394455, 'fcm_dpo/margin': 340.3535461425781, 'margin_dpo/margin_mean': 340.35357666015625, 'margin_dpo/margin_std': 745.7200927734375, 'logps/chosen': -871.8690795898438, 'logps/rejected': -1243.357666015625, 'logps/ref_chosen': -72.60400390625, 'logps/ref_rejected': -103.73905944824219, 'KL/chosen_KL_mean': -799.2650756835938, 'KL/rejected_KL_mean': -1139.61865234375, 'KL/mean': -969.44189453125, 'KL/std': 626.5878295898438, 'logits/chosen': -1.027015209197998, 'logits/rejected': -1.0318031311035156, 'epoch': 0.98} + 98%|█████████▊| 668/681 [28:25<00:34, 2.63s/it] 98%|█████████▊| 669/681 [28:28<00:31, 2.65s/it] {'loss': 1.0728, 'grad_norm': 24.051504135131836, 'learning_rate': 5.564580657695939e-10, 'fcm_dpo/beta': 0.0009196557221002877, 'fcm_dpo/q_t': 0.39873257279396057, 'fcm_dpo/delta': -0.04609519988298416, 'fcm_dpo/margin': 482.8586120605469, 'margin_dpo/margin_mean': 482.8586120605469, 'margin_dpo/margin_std': 649.423583984375, 'logps/chosen': -653.135009765625, 'logps/rejected': -1167.801513671875, 'logps/ref_chosen': -46.116416931152344, 'logps/ref_rejected': -77.92434692382812, 'KL/chosen_KL_mean': -607.0186157226562, 'KL/rejected_KL_mean': -1089.877197265625, 'KL/mean': -848.4479370117188, 'KL/std': 578.434326171875, 'logits/chosen': -1.0002648830413818, 'logits/rejected': -1.0088210105895996, 'epoch': 0.98} + 98%|█████████▊| 669/681 [28:28<00:31, 2.65s/it] 98%|█████████▊| 670/681 [28:30<00:29, 2.64s/it] {'loss': 1.0764, 'grad_norm': 27.57679557800293, 'learning_rate': 4.741678157389739e-10, 'fcm_dpo/beta': 0.0009132723789662123, 'fcm_dpo/q_t': 0.3985205292701721, 'fcm_dpo/delta': -0.04978980869054794, 'fcm_dpo/margin': 489.85028076171875, 'margin_dpo/margin_mean': 489.85028076171875, 'margin_dpo/margin_std': 666.9021606445312, 'logps/chosen': -652.3175048828125, 'logps/rejected': -1176.7625732421875, 'logps/ref_chosen': -62.34575271606445, 'logps/ref_rejected': -96.9405517578125, 'KL/chosen_KL_mean': -589.9717407226562, 'KL/rejected_KL_mean': -1079.822021484375, 'KL/mean': -834.8968505859375, 'KL/std': 548.0260009765625, 'logits/chosen': -0.9369679689407349, 'logits/rejected': -0.956099271774292, 'epoch': 0.98} + 98%|█████████▊| 670/681 [28:31<00:29, 2.64s/it] 99%|█████████▊| 671/681 [28:33<00:25, 2.56s/it] {'loss': 1.1359, 'grad_norm': 31.614513397216797, 'learning_rate': 3.9845280344705245e-10, 'fcm_dpo/beta': 0.0009144209325313568, 'fcm_dpo/q_t': 0.4150552749633789, 'fcm_dpo/delta': 0.03224237263202667, 'fcm_dpo/margin': 403.0694580078125, 'margin_dpo/margin_mean': 403.0694885253906, 'margin_dpo/margin_std': 654.9969482421875, 'logps/chosen': -787.0851440429688, 'logps/rejected': -1225.973876953125, 'logps/ref_chosen': -48.00010681152344, 'logps/ref_rejected': -83.81932067871094, 'KL/chosen_KL_mean': -739.0850219726562, 'KL/rejected_KL_mean': -1142.154541015625, 'KL/mean': -940.6197509765625, 'KL/std': 546.3319091796875, 'logits/chosen': -1.047501564025879, 'logits/rejected': -1.0776722431182861, 'epoch': 0.99} + 99%|█████████▊| 671/681 [28:33<00:25, 2.56s/it] 99%|█████████▊| 672/681 [28:35<00:23, 2.58s/it] {'loss': 1.1531, 'grad_norm': 50.31674575805664, 'learning_rate': 3.293150240547549e-10, 'fcm_dpo/beta': 0.0009142364142462611, 'fcm_dpo/q_t': 0.4172729551792145, 'fcm_dpo/delta': 0.02527567557990551, 'fcm_dpo/margin': 410.8834228515625, 'margin_dpo/margin_mean': 410.8834228515625, 'margin_dpo/margin_std': 742.9523315429688, 'logps/chosen': -876.0934448242188, 'logps/rejected': -1321.53369140625, 'logps/ref_chosen': -58.58328628540039, 'logps/ref_rejected': -93.14015197753906, 'KL/chosen_KL_mean': -817.5101318359375, 'KL/rejected_KL_mean': -1228.3935546875, 'KL/mean': -1022.951904296875, 'KL/std': 672.7301025390625, 'logits/chosen': -1.097043514251709, 'logits/rejected': -1.1035444736480713, 'epoch': 0.99} + 99%|█████████▊| 672/681 [28:36<00:23, 2.58s/it] 99%|█████████▉| 673/681 [28:38<00:19, 2.49s/it] {'loss': 1.1356, 'grad_norm': 33.52194595336914, 'learning_rate': 2.6675629940689504e-10, 'fcm_dpo/beta': 0.0009239012142643332, 'fcm_dpo/q_t': 0.41999146342277527, 'fcm_dpo/delta': 0.048241592943668365, 'fcm_dpo/margin': 382.5746765136719, 'margin_dpo/margin_mean': 382.5746765136719, 'margin_dpo/margin_std': 613.573974609375, 'logps/chosen': -772.8348388671875, 'logps/rejected': -1193.982421875, 'logps/ref_chosen': -46.72320556640625, 'logps/ref_rejected': -85.29623413085938, 'KL/chosen_KL_mean': -726.111572265625, 'KL/rejected_KL_mean': -1108.686279296875, 'KL/mean': -917.39892578125, 'KL/std': 555.8173828125, 'logits/chosen': -1.051267147064209, 'logits/rejected': -1.0552277565002441, 'epoch': 0.99} + 99%|█████████▉| 673/681 [28:38<00:19, 2.49s/it] 99%|█████████▉| 674/681 [28:40<00:17, 2.56s/it] {'loss': 1.0662, 'grad_norm': 42.7830810546875, 'learning_rate': 2.1077827798404725e-10, 'fcm_dpo/beta': 0.0009207893162965775, 'fcm_dpo/q_t': 0.39986640214920044, 'fcm_dpo/delta': -0.05484557896852493, 'fcm_dpo/margin': 491.2944641113281, 'margin_dpo/margin_mean': 491.29443359375, 'margin_dpo/margin_std': 659.54931640625, 'logps/chosen': -641.7603759765625, 'logps/rejected': -1157.6552734375, 'logps/ref_chosen': -45.445526123046875, 'logps/ref_rejected': -70.04593658447266, 'KL/chosen_KL_mean': -596.3148803710938, 'KL/rejected_KL_mean': -1087.6092529296875, 'KL/mean': -841.9620971679688, 'KL/std': 549.2061157226562, 'logits/chosen': -0.9467175602912903, 'logits/rejected': -0.96770179271698, 'epoch': 0.99} + 99%|█████████▉| 674/681 [28:41<00:17, 2.56s/it] 99%|█████████▉| 675/681 [28:43<00:15, 2.59s/it] {'loss': 1.0636, 'grad_norm': 28.38162612915039, 'learning_rate': 1.6138243485910863e-10, 'fcm_dpo/beta': 0.0009013206581585109, 'fcm_dpo/q_t': 0.3982793688774109, 'fcm_dpo/delta': -0.06186992675065994, 'fcm_dpo/margin': 507.7829284667969, 'margin_dpo/margin_mean': 507.782958984375, 'margin_dpo/margin_std': 652.8712158203125, 'logps/chosen': -725.8426513671875, 'logps/rejected': -1263.541259765625, 'logps/ref_chosen': -44.17628479003906, 'logps/ref_rejected': -74.09197998046875, 'KL/chosen_KL_mean': -681.6663818359375, 'KL/rejected_KL_mean': -1189.44921875, 'KL/mean': -935.557861328125, 'KL/std': 612.2513427734375, 'logits/chosen': -1.023393154144287, 'logits/rejected': -1.0347087383270264, 'epoch': 0.99} + 99%|█████████▉| 675/681 [28:43<00:15, 2.59s/it] 99%|█████████▉| 676/681 [28:46<00:13, 2.61s/it] {'loss': 1.0689, 'grad_norm': 25.832805633544922, 'learning_rate': 1.1857007165852472e-10, 'fcm_dpo/beta': 0.0009026783518493176, 'fcm_dpo/q_t': 0.4024215042591095, 'fcm_dpo/delta': -0.028323372825980186, 'fcm_dpo/margin': 473.1407775878906, 'margin_dpo/margin_mean': 473.14080810546875, 'margin_dpo/margin_std': 579.1337280273438, 'logps/chosen': -800.4295043945312, 'logps/rejected': -1290.530517578125, 'logps/ref_chosen': -71.39852905273438, 'logps/ref_rejected': -88.3587646484375, 'KL/chosen_KL_mean': -729.031005859375, 'KL/rejected_KL_mean': -1202.1717529296875, 'KL/mean': -965.601318359375, 'KL/std': 577.6213989257812, 'logits/chosen': -0.9483187198638916, 'logits/rejected': -0.9624805450439453, 'epoch': 0.99} + 99%|█████████▉| 676/681 [28:46<00:13, 2.61s/it] 99%|█████████▉| 677/681 [28:48<00:10, 2.53s/it] {'loss': 1.1093, 'grad_norm': 29.9565372467041, 'learning_rate': 8.23423165278725e-11, 'fcm_dpo/beta': 0.0008973192889243364, 'fcm_dpo/q_t': 0.41222789883613586, 'fcm_dpo/delta': -0.008067594841122627, 'fcm_dpo/margin': 454.34246826171875, 'margin_dpo/margin_mean': 454.34246826171875, 'margin_dpo/margin_std': 722.945068359375, 'logps/chosen': -790.8522338867188, 'logps/rejected': -1266.893798828125, 'logps/ref_chosen': -56.527435302734375, 'logps/ref_rejected': -78.22654724121094, 'KL/chosen_KL_mean': -734.3248291015625, 'KL/rejected_KL_mean': -1188.667236328125, 'KL/mean': -961.4959716796875, 'KL/std': 600.77294921875, 'logits/chosen': -1.0780959129333496, 'logits/rejected': -1.0737848281860352, 'epoch': 0.99} + 99%|█████████▉| 677/681 [28:48<00:10, 2.53s/it] 100%|█████████▉| 678/681 [28:51<00:07, 2.50s/it] {'loss': 1.058, 'grad_norm': 33.316654205322266, 'learning_rate': 5.270012410216185e-11, 'fcm_dpo/beta': 0.0008914459031075239, 'fcm_dpo/q_t': 0.39344462752342224, 'fcm_dpo/delta': -0.07900315523147583, 'fcm_dpo/margin': 533.0916137695312, 'margin_dpo/margin_mean': 533.0916137695312, 'margin_dpo/margin_std': 715.0037841796875, 'logps/chosen': -666.4847412109375, 'logps/rejected': -1234.0465087890625, 'logps/ref_chosen': -46.13447570800781, 'logps/ref_rejected': -80.60462951660156, 'KL/chosen_KL_mean': -620.3502197265625, 'KL/rejected_KL_mean': -1153.44189453125, 'KL/mean': -886.8961181640625, 'KL/std': 635.0799560546875, 'logits/chosen': -0.9998750686645508, 'logits/rejected': -1.036036491394043, 'epoch': 1.0} + 100%|█████████▉| 678/681 [28:51<00:07, 2.50s/it] 100%|█████████▉| 679/681 [28:53<00:05, 2.58s/it] {'loss': 1.1415, 'grad_norm': 31.741161346435547, 'learning_rate': 2.9644275480772416e-11, 'fcm_dpo/beta': 0.0008914553327485919, 'fcm_dpo/q_t': 0.42303356528282166, 'fcm_dpo/delta': 0.06134221330285072, 'fcm_dpo/margin': 382.27972412109375, 'margin_dpo/margin_mean': 382.2797546386719, 'margin_dpo/margin_std': 604.84912109375, 'logps/chosen': -751.6414184570312, 'logps/rejected': -1160.224365234375, 'logps/ref_chosen': -50.294921875, 'logps/ref_rejected': -76.59813690185547, 'KL/chosen_KL_mean': -701.3464965820312, 'KL/rejected_KL_mean': -1083.626220703125, 'KL/mean': -892.486328125, 'KL/std': 525.0972900390625, 'logits/chosen': -1.020527720451355, 'logits/rejected': -1.010463833808899, 'epoch': 1.0} + 100%|█████████▉| 679/681 [28:53<00:05, 2.58s/it] 100%|█████████▉| 680/681 [28:56<00:02, 2.59s/it] {'loss': 1.0861, 'grad_norm': 40.12180709838867, 'learning_rate': 1.31753782067201e-11, 'fcm_dpo/beta': 0.0008838686626404524, 'fcm_dpo/q_t': 0.3975260853767395, 'fcm_dpo/delta': -0.05259976163506508, 'fcm_dpo/margin': 508.6307678222656, 'margin_dpo/margin_mean': 508.6307678222656, 'margin_dpo/margin_std': 746.896484375, 'logps/chosen': -789.1945190429688, 'logps/rejected': -1333.29443359375, 'logps/ref_chosen': -76.91569519042969, 'logps/ref_rejected': -112.384765625, 'KL/chosen_KL_mean': -712.27880859375, 'KL/rejected_KL_mean': -1220.90966796875, 'KL/mean': -966.59423828125, 'KL/std': 689.5088500976562, 'logits/chosen': -1.00642728805542, 'logits/rejected': -1.0368506908416748, 'epoch': 1.0} + 100%|█████████▉| 680/681 [28:56<00:02, 2.59s/it] 100%|██████████| 681/681 [28:58<00:00, 2.57s/it] {'loss': 1.1392, 'grad_norm': 33.870479583740234, 'learning_rate': 3.2938662507808745e-12, 'fcm_dpo/beta': 0.0008977074176073074, 'fcm_dpo/q_t': 0.4188354015350342, 'fcm_dpo/delta': 0.04187752678990364, 'fcm_dpo/margin': 399.12420654296875, 'margin_dpo/margin_mean': 399.1241760253906, 'margin_dpo/margin_std': 622.114501953125, 'logps/chosen': -771.1492309570312, 'logps/rejected': -1197.8740234375, 'logps/ref_chosen': -60.957279205322266, 'logps/ref_rejected': -88.55797576904297, 'KL/chosen_KL_mean': -710.1919555664062, 'KL/rejected_KL_mean': -1109.316162109375, 'KL/mean': -909.7540283203125, 'KL/std': 567.23486328125, 'logits/chosen': -1.0663830041885376, 'logits/rejected': -1.0852856636047363, 'epoch': 1.0} + 100%|██████████| 681/681 [28:59<00:00, 2.57s/it][INFO|trainer.py:2681] 2026-04-29 17:46:31,383 >> + +Training completed. Do not forget to share your model on huggingface.co/models =) + + + {'train_runtime': 1739.0324, 'train_samples_per_second': 25.07, 'train_steps_per_second': 0.392, 'train_loss': 1.0911195537242244, 'epoch': 1.0} + 100%|██████████| 681/681 [28:59<00:00, 2.57s/it] 100%|██████████| 681/681 [28:59<00:00, 2.55s/it] +***** train metrics ***** + epoch = 1.0 + total_flos = 0GF + train_loss = 1.0911 + train_runtime = 0:28:59.03 + train_samples = 43598 + train_samples_per_second = 25.07 + train_steps_per_second = 0.392 +2026-04-29 17:46:31 - INFO - __main__ - *** Training complete *** +2026-04-29 17:46:31 - INFO - __main__ - *** Save model *** +[INFO|configuration_utils.py:419] 2026-04-29 17:47:04,127 >> Configuration saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/config.json +[INFO|configuration_utils.py:911] 2026-04-29 17:47:04,130 >> Configuration saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/generation_config.json +[INFO|modeling_utils.py:3580] 2026-04-29 17:48:19,190 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 7 checkpoint shards. You can find where each parameters has been saved in the index located at /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/model.safetensors.index.json. +[INFO|tokenization_utils_base.py:2510] 2026-04-29 17:48:19,195 >> tokenizer config file saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/tokenizer_config.json +[INFO|tokenization_utils_base.py:2519] 2026-04-29 17:48:19,197 >> Special tokens file saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/special_tokens_map.json +2026-04-29 17:48:19 - INFO - __main__ - Saved HF-compatible model artifacts to /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449 +[INFO|modelcard.py:450] 2026-04-29 17:48:20,708 >> Dropping the following result as it does not have all the necessary fields: +{'dataset': {'name': 'Anthropic/hh-rlhf', 'type': 'Anthropic/hh-rlhf'}} +[INFO|configuration_utils.py:419] 2026-04-29 17:48:20,715 >> Configuration saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/config.json +2026-04-29 17:48:20 - INFO - __main__ - Skipping margin dataset upload because push_margin_dataset is false. +2026-04-29 17:48:20 - INFO - __main__ - *** Training complete! *** +wandb: - 0.011 MB of 0.011 MB uploaded wandb: \ 0.011 MB of 0.011 MB uploaded wandb: | 0.011 MB of 0.011 MB uploaded wandb: / 0.011 MB of 0.640 MB uploaded wandb: - 0.640 MB of 0.640 MB uploaded wandb: \ 0.640 MB of 0.640 MB uploaded wandb: | 0.640 MB of 0.640 MB uploaded wandb: +wandb: Run history: +wandb: train/KL/chosen_KL_mean ████████▇▇▇▇▇▆▆▆▆▅▅▅▅▆▄▅▄▅▄▁▃▃▃▃▃▁▁▂▁▁▁▁ +wandb: train/KL/mean ████████▇▇▇▇▇▆▆▆▆▅▅▅▅▅▅▅▄▅▄▂▃▃▃▃▂▂▂▂▂▁▁▁ +wandb: train/KL/rejected_KL_mean ████████▇▇▇▇▇▇▆▆▆▅▆▅▅▅▅▅▄▅▄▂▃▃▄▃▂▃▂▃▂▁▁▂ +wandb: train/KL/std ▁▁▁▁▁▁▁▂▂▂▂▂▂▃▂▃▃▃▃▃▃▃▄▄▄▄▄▇▆▆▅▇▇▆▆▆▆▇██ +wandb: train/epoch ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███ +wandb: train/fcm_dpo/beta ██▅▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ +wandb: train/fcm_dpo/delta ▆█▁▁▁▅▄▆▆▅▅▆▆█▅▆▅▆▆▆▇▄▇▄▄▅▅▅▇▅▇▄▃█▇▆▅▄▃▅ +wandb: train/fcm_dpo/margin ▁▁▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▃▅▄▄▅▅▅▅▄▆█▄▅▅▆▇█▆ +wandb: train/fcm_dpo/q_t █▆▁▁▁▄▃▄▄▃▃▄▄▅▃▄▃▅▄▄▅▂▄▂▃▃▃▃▄▃▄▃▂▅▄▄▃▃▂▃ +wandb: train/global_step ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███ +wandb: train/grad_norm █▇▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁ +wandb: train/learning_rate ▂▃▅▇██████▇▇▇▇▇▆▆▆▆▅▅▅▄▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁ +wandb: train/logits/chosen ▆▆▆▆▅▅▅▆▇▇██▇▇█▇█▇█▇▇▇▆▇▅▆▅▃▄▃▄▃▃▁▂▂▂▃▃▂ +wandb: train/logits/rejected ▇▇▇▆▅▅▅▆▇▇██▇█▇▇█▇█▇▇▇▆▇▅▆▅▃▄▃▄▃▂▁▂▂▂▂▂▂ +wandb: train/logps/chosen ███████▇▇▇▇▇▆▆▆▆▆▄▅▅▄▆▄▅▄▅▄▁▃▃▃▃▃▁▂▂▁▁▁▁ +wandb: train/logps/ref_chosen ▅█▆▆▄▃▆▆▅▄▇▃▄▃█▅▅▄▇▅▃█▇▅▅▃▆▁▇▅▃▇▅▆▇▆▃▆▆▂ +wandb: train/logps/ref_rejected ▇▄█▅▅▃▆█▇▄▅▃▅▆▅▅▄▅█▅█▄▆▃▅▄▃▁▆▃▄▆▃▄█▅▄▂▂▂ +wandb: train/logps/rejected ████████▇▇▇▇▇▇▆▆▆▅▆▅▅▅▅▅▄▅▄▂▃▃▄▃▂▃▃▃▂▁▁▂ +wandb: train/loss █▅▁▁▁▄▃▄▄▄▃▄▄▄▃▄▃▅▄▃▅▂▄▂▂▃▂▄▄▃▄▃▂▅▅▃▃▃▂▃ +wandb: train/margin_dpo/margin_mean ▁▁▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▃▅▄▄▅▅▅▅▄▆█▄▅▅▆▇█▆ +wandb: train/margin_dpo/margin_std ▁▁▁▁▁▁▁▂▂▂▂▂▂▃▂▃▃▃▃▃▄▃▃▄▄▄▄▇▅▆▅▇▇▆█▆▆███ +wandb: +wandb: Run summary: +wandb: total_flos 0.0 +wandb: train/KL/chosen_KL_mean -710.19196 +wandb: train/KL/mean -909.75403 +wandb: train/KL/rejected_KL_mean -1109.31616 +wandb: train/KL/std 567.23486 +wandb: train/epoch 1.0 +wandb: train/fcm_dpo/beta 0.0009 +wandb: train/fcm_dpo/delta 0.04188 +wandb: train/fcm_dpo/margin 399.12421 +wandb: train/fcm_dpo/q_t 0.41884 +wandb: train/global_step 681 +wandb: train/grad_norm 33.87048 +wandb: train/learning_rate 0.0 +wandb: train/logits/chosen -1.06638 +wandb: train/logits/rejected -1.08529 +wandb: train/logps/chosen -771.14923 +wandb: train/logps/ref_chosen -60.95728 +wandb: train/logps/ref_rejected -88.55798 +wandb: train/logps/rejected -1197.87402 +wandb: train/loss 1.1392 +wandb: train/margin_dpo/margin_mean 399.12418 +wandb: train/margin_dpo/margin_std 622.1145 +wandb: train_loss 1.09112 +wandb: train_runtime 1739.0324 +wandb: train_samples_per_second 25.07 +wandb: train_steps_per_second 0.392 +wandb: +wandb: 🚀 View run llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449 at: https://wandb.ai/can-not-fand-northeastern-university/llama3-hh-new-dpo-multi-beta-sweep/runs/sobwh2jg +wandb: ⭐️ View project at: https://wandb.ai/can-not-fand-northeastern-university/llama3-hh-new-dpo-multi-beta-sweep +wandb: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s) +wandb: Find logs at: ./wandb/wandb/run-20260429_171649-sobwh2jg/logs +wandb: WARNING The new W&B backend becomes opt-out in version 0.18.0; try it out with `wandb.require("core")`! See https://wandb.me/wandb-core for more information. diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..b5ae8dd --- /dev/null +++ b/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 0.0, + "train_loss": 1.0911195537242244, + "train_runtime": 1739.0324, + "train_samples": 43598, + "train_samples_per_second": 25.07, + "train_steps_per_second": 0.392 +} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..63cd8fd --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,15706 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 200, + "global_step": 681, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "KL/chosen_KL_mean": 0.00527191162109375, + "KL/mean": 0.016706019639968872, + "KL/rejected_KL_mean": 0.028141021728515625, + "KL/std": 0.272699236869812, + "epoch": 0.0014684287812041115, + "fcm_dpo/beta": 0.800000011920929, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.02287006378173828, + "fcm_dpo/q_t": 0.5040594935417175, + "grad_norm": 676.3800659179688, + "learning_rate": 0.0, + "logits/chosen": -0.4974287748336792, + "logits/rejected": -0.43299180269241333, + "logps/chosen": -50.1435661315918, + "logps/ref_chosen": -50.14883804321289, + "logps/ref_rejected": -74.1280517578125, + "logps/rejected": -74.09991455078125, + "loss": 1.4324, + "margin_dpo/margin_mean": -0.02287048101425171, + "margin_dpo/margin_std": 0.41920793056488037, + "step": 1 + }, + { + "KL/chosen_KL_mean": -0.03498649597167969, + "KL/mean": -0.00212840735912323, + "KL/rejected_KL_mean": 0.030735015869140625, + "KL/std": 0.24797174334526062, + "epoch": 0.002936857562408223, + "fcm_dpo/beta": 0.800000011920929, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.06572261452674866, + "fcm_dpo/q_t": 0.5128992795944214, + "grad_norm": 589.6188354492188, + "learning_rate": 7.246376811594203e-09, + "logits/chosen": -0.49536412954330444, + "logits/rejected": -0.4594460427761078, + "logps/chosen": -52.65568923950195, + "logps/ref_chosen": -52.620704650878906, + "logps/ref_rejected": -75.30413818359375, + "logps/rejected": -75.27340698242188, + "loss": 1.4592, + "margin_dpo/margin_mean": -0.06572240591049194, + "margin_dpo/margin_std": 0.35048407316207886, + "step": 2 + }, + { + "KL/chosen_KL_mean": -0.0075588226318359375, + "KL/mean": -0.0043991804122924805, + "KL/rejected_KL_mean": -0.001239776611328125, + "KL/std": 0.22414085268974304, + "epoch": 0.004405286343612335, + "fcm_dpo/beta": 0.800000011920929, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.00632166862487793, + "fcm_dpo/q_t": 0.5012327432632446, + "grad_norm": 575.7740478515625, + "learning_rate": 1.4492753623188406e-08, + "logits/chosen": -0.4817797839641571, + "logits/rejected": -0.44226667284965515, + "logps/chosen": -60.98915481567383, + "logps/ref_chosen": -60.981597900390625, + "logps/ref_rejected": -68.67259216308594, + "logps/rejected": -68.67383575439453, + "loss": 1.405, + "margin_dpo/margin_mean": -0.0063214898109436035, + "margin_dpo/margin_std": 0.2866283059120178, + "step": 3 + }, + { + "KL/chosen_KL_mean": -0.04131507873535156, + "KL/mean": -0.0033356696367263794, + "KL/rejected_KL_mean": 0.034641265869140625, + "KL/std": 0.25460168719291687, + "epoch": 0.005873715124816446, + "fcm_dpo/beta": 0.800000011920929, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.07595756649971008, + "fcm_dpo/q_t": 0.5149009227752686, + "grad_norm": 598.5643920898438, + "learning_rate": 2.1739130434782606e-08, + "logits/chosen": -0.4682745039463043, + "logits/rejected": -0.44059938192367554, + "logps/chosen": -56.80902862548828, + "logps/ref_chosen": -56.7677116394043, + "logps/ref_rejected": -86.64710998535156, + "logps/rejected": -86.61247253417969, + "loss": 1.469, + "margin_dpo/margin_mean": -0.0759580135345459, + "margin_dpo/margin_std": 0.36108309030532837, + "step": 4 + }, + { + "KL/chosen_KL_mean": 0.0052433013916015625, + "KL/mean": 0.018906593322753906, + "KL/rejected_KL_mean": 0.032573699951171875, + "KL/std": 0.2835850417613983, + "epoch": 0.007342143906020558, + "fcm_dpo/beta": 0.800000011920929, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.027328133583068848, + "fcm_dpo/q_t": 0.5043825507164001, + "grad_norm": 748.8038940429688, + "learning_rate": 2.898550724637681e-08, + "logits/chosen": -0.5214688777923584, + "logits/rejected": -0.4782792031764984, + "logps/chosen": -53.85413360595703, + "logps/ref_chosen": -53.859375, + "logps/ref_rejected": -84.14918518066406, + "logps/rejected": -84.11660766601562, + "loss": 1.4327, + "margin_dpo/margin_mean": -0.02732786536216736, + "margin_dpo/margin_std": 0.39059120416641235, + "step": 5 + }, + { + "KL/chosen_KL_mean": 0.0131988525390625, + "KL/mean": -0.0006367862224578857, + "KL/rejected_KL_mean": -0.014469146728515625, + "KL/std": 0.2519422471523285, + "epoch": 0.00881057268722467, + "fcm_dpo/beta": 0.800000011920929, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.0276680588722229, + "fcm_dpo/q_t": 0.49454063177108765, + "grad_norm": 761.1502685546875, + "learning_rate": 3.6231884057971014e-08, + "logits/chosen": -0.4976610243320465, + "logits/rejected": -0.4546470046043396, + "logps/chosen": -62.994285583496094, + "logps/ref_chosen": -63.007484436035156, + "logps/ref_rejected": -92.64534759521484, + "logps/rejected": -92.65982055664062, + "loss": 1.3859, + "margin_dpo/margin_mean": 0.027667373418807983, + "margin_dpo/margin_std": 0.35976481437683105, + "step": 6 + }, + { + "KL/chosen_KL_mean": 0.035762786865234375, + "KL/mean": 0.020337015390396118, + "KL/rejected_KL_mean": 0.004913330078125, + "KL/std": 0.2789710462093353, + "epoch": 0.010279001468428781, + "fcm_dpo/beta": 0.800000011920929, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.030850648880004883, + "fcm_dpo/q_t": 0.49415522813796997, + "grad_norm": 648.91259765625, + "learning_rate": 4.347826086956521e-08, + "logits/chosen": -0.5009369254112244, + "logits/rejected": -0.4670419692993164, + "logps/chosen": -57.73905944824219, + "logps/ref_chosen": -57.774818420410156, + "logps/ref_rejected": -103.92059326171875, + "logps/rejected": -103.91567993164062, + "loss": 1.3851, + "margin_dpo/margin_mean": 0.030851304531097412, + "margin_dpo/margin_std": 0.3817327618598938, + "step": 7 + }, + { + "KL/chosen_KL_mean": 0.006595611572265625, + "KL/mean": 0.011890605092048645, + "KL/rejected_KL_mean": 0.01718902587890625, + "KL/std": 0.2876508831977844, + "epoch": 0.011747430249632892, + "fcm_dpo/beta": 0.800000011920929, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.01059296727180481, + "fcm_dpo/q_t": 0.5014467239379883, + "grad_norm": 638.4391479492188, + "learning_rate": 5.0724637681159424e-08, + "logits/chosen": -0.5008213520050049, + "logits/rejected": -0.47419145703315735, + "logps/chosen": -58.709442138671875, + "logps/ref_chosen": -58.716033935546875, + "logps/ref_rejected": -79.3114242553711, + "logps/rejected": -79.29423522949219, + "loss": 1.4194, + "margin_dpo/margin_mean": -0.010592788457870483, + "margin_dpo/margin_std": 0.3931761384010315, + "step": 8 + }, + { + "KL/chosen_KL_mean": 0.03925895690917969, + "KL/mean": 0.02510516345500946, + "KL/rejected_KL_mean": 0.010951995849609375, + "KL/std": 0.30363646149635315, + "epoch": 0.013215859030837005, + "fcm_dpo/beta": 0.800000011920929, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.028308242559432983, + "fcm_dpo/q_t": 0.49435490369796753, + "grad_norm": 672.1676025390625, + "learning_rate": 5.797101449275362e-08, + "logits/chosen": -0.5118868350982666, + "logits/rejected": -0.4663264751434326, + "logps/chosen": -69.82758331298828, + "logps/ref_chosen": -69.8668441772461, + "logps/ref_rejected": -99.6026611328125, + "logps/rejected": -99.59171295166016, + "loss": 1.394, + "margin_dpo/margin_mean": 0.028307169675827026, + "margin_dpo/margin_std": 0.4165334105491638, + "step": 9 + }, + { + "KL/chosen_KL_mean": 0.0066814422607421875, + "KL/mean": -0.015493467450141907, + "KL/rejected_KL_mean": -0.037662506103515625, + "KL/std": 0.2837975323200226, + "epoch": 0.014684287812041116, + "fcm_dpo/beta": 0.800000011920929, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.04434821009635925, + "fcm_dpo/q_t": 0.4914953112602234, + "grad_norm": 545.5849609375, + "learning_rate": 6.521739130434782e-08, + "logits/chosen": -0.4926380515098572, + "logits/rejected": -0.44934237003326416, + "logps/chosen": -48.35100555419922, + "logps/ref_chosen": -48.35768508911133, + "logps/ref_rejected": -80.37206268310547, + "logps/rejected": -80.40972900390625, + "loss": 1.3752, + "margin_dpo/margin_mean": 0.04434826970100403, + "margin_dpo/margin_std": 0.37585416436195374, + "step": 10 + }, + { + "KL/chosen_KL_mean": -0.000308990478515625, + "KL/mean": -0.00840708613395691, + "KL/rejected_KL_mean": -0.01650238037109375, + "KL/std": 0.28651660680770874, + "epoch": 0.016152716593245228, + "fcm_dpo/beta": 0.800000011920929, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.01619333028793335, + "fcm_dpo/q_t": 0.49684467911720276, + "grad_norm": 539.252197265625, + "learning_rate": 7.246376811594203e-08, + "logits/chosen": -0.4729078710079193, + "logits/rejected": -0.44843602180480957, + "logps/chosen": -53.01716613769531, + "logps/ref_chosen": -53.01685333251953, + "logps/ref_rejected": -87.78038024902344, + "logps/rejected": -87.796875, + "loss": 1.3993, + "margin_dpo/margin_mean": 0.016193389892578125, + "margin_dpo/margin_std": 0.378741979598999, + "step": 11 + }, + { + "KL/chosen_KL_mean": -0.0455169677734375, + "KL/mean": -0.0396341010928154, + "KL/rejected_KL_mean": -0.033748626708984375, + "KL/std": 0.27084100246429443, + "epoch": 0.01762114537444934, + "fcm_dpo/beta": 0.800000011920929, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.01176394522190094, + "fcm_dpo/q_t": 0.5020244717597961, + "grad_norm": 731.9530639648438, + "learning_rate": 7.971014492753623e-08, + "logits/chosen": -0.5192070007324219, + "logits/rejected": -0.48252177238464355, + "logps/chosen": -61.8509521484375, + "logps/ref_chosen": -61.80543518066406, + "logps/ref_rejected": -104.8582763671875, + "logps/rejected": -104.89202117919922, + "loss": 1.4199, + "margin_dpo/margin_mean": -0.011764273047447205, + "margin_dpo/margin_std": 0.3872652053833008, + "step": 12 + }, + { + "KL/chosen_KL_mean": 0.0012149810791015625, + "KL/mean": 0.024721741676330566, + "KL/rejected_KL_mean": 0.0482330322265625, + "KL/std": 0.2512255609035492, + "epoch": 0.01908957415565345, + "fcm_dpo/beta": 0.800000011920929, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.047011733055114746, + "fcm_dpo/q_t": 0.5091712474822998, + "grad_norm": 657.2701416015625, + "learning_rate": 8.695652173913042e-08, + "logits/chosen": -0.4995085597038269, + "logits/rejected": -0.47286656498908997, + "logps/chosen": -64.25914001464844, + "logps/ref_chosen": -64.2603530883789, + "logps/ref_rejected": -87.20307922363281, + "logps/rejected": -87.15484619140625, + "loss": 1.4451, + "margin_dpo/margin_mean": -0.047012150287628174, + "margin_dpo/margin_std": 0.3621995747089386, + "step": 13 + }, + { + "KL/chosen_KL_mean": -0.030294418334960938, + "KL/mean": -0.0255916565656662, + "KL/rejected_KL_mean": -0.020893096923828125, + "KL/std": 0.27099841833114624, + "epoch": 0.020558002936857563, + "fcm_dpo/beta": 0.800000011920929, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.009398609399795532, + "fcm_dpo/q_t": 0.5020325183868408, + "grad_norm": 684.386962890625, + "learning_rate": 9.420289855072464e-08, + "logits/chosen": -0.49813684821128845, + "logits/rejected": -0.4595490097999573, + "logps/chosen": -58.1405029296875, + "logps/ref_chosen": -58.11021041870117, + "logps/ref_rejected": -104.04708099365234, + "logps/rejected": -104.06797790527344, + "loss": 1.4211, + "margin_dpo/margin_mean": -0.009398102760314941, + "margin_dpo/margin_std": 0.4144117534160614, + "step": 14 + }, + { + "KL/chosen_KL_mean": -0.031803131103515625, + "KL/mean": -0.05128836631774902, + "KL/rejected_KL_mean": -0.070770263671875, + "KL/std": 0.2677825093269348, + "epoch": 0.022026431718061675, + "fcm_dpo/beta": 0.800000011920929, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.038970112800598145, + "fcm_dpo/q_t": 0.49189913272857666, + "grad_norm": 519.0724487304688, + "learning_rate": 1.0144927536231885e-07, + "logits/chosen": -0.4899941384792328, + "logits/rejected": -0.4712330996990204, + "logps/chosen": -56.99871063232422, + "logps/ref_chosen": -56.96691131591797, + "logps/ref_rejected": -80.80863952636719, + "logps/rejected": -80.87940979003906, + "loss": 1.3835, + "margin_dpo/margin_mean": 0.03897008299827576, + "margin_dpo/margin_std": 0.4108760356903076, + "step": 15 + }, + { + "KL/chosen_KL_mean": 0.010923385620117188, + "KL/mean": -0.03770947456359863, + "KL/rejected_KL_mean": -0.08633804321289062, + "KL/std": 0.25064218044281006, + "epoch": 0.023494860499265784, + "fcm_dpo/beta": 0.800000011920929, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.09726369380950928, + "fcm_dpo/q_t": 0.4807215929031372, + "grad_norm": 658.9254150390625, + "learning_rate": 1.0869565217391303e-07, + "logits/chosen": -0.5206788182258606, + "logits/rejected": -0.4792103171348572, + "logps/chosen": -61.72896957397461, + "logps/ref_chosen": -61.739891052246094, + "logps/ref_rejected": -84.36947631835938, + "logps/rejected": -84.455810546875, + "loss": 1.3318, + "margin_dpo/margin_mean": 0.09726375341415405, + "margin_dpo/margin_std": 0.3595857620239258, + "step": 16 + }, + { + "KL/chosen_KL_mean": 0.007419586181640625, + "KL/mean": -0.00011467933654785156, + "KL/rejected_KL_mean": -0.007648468017578125, + "KL/std": 0.2673434615135193, + "epoch": 0.024963289280469897, + "fcm_dpo/beta": 0.800000011920929, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.015069246292114258, + "fcm_dpo/q_t": 0.49689337611198425, + "grad_norm": 644.1885986328125, + "learning_rate": 1.1594202898550725e-07, + "logits/chosen": -0.4899910092353821, + "logits/rejected": -0.4514046311378479, + "logps/chosen": -67.70291137695312, + "logps/ref_chosen": -67.71033477783203, + "logps/ref_rejected": -85.37865447998047, + "logps/rejected": -85.38630676269531, + "loss": 1.4017, + "margin_dpo/margin_mean": 0.015069544315338135, + "margin_dpo/margin_std": 0.4079738259315491, + "step": 17 + }, + { + "KL/chosen_KL_mean": -0.020544052124023438, + "KL/mean": -0.054789185523986816, + "KL/rejected_KL_mean": -0.0890350341796875, + "KL/std": 0.26502934098243713, + "epoch": 0.02643171806167401, + "fcm_dpo/beta": 0.800000011920929, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.0684882402420044, + "fcm_dpo/q_t": 0.4862971305847168, + "grad_norm": 632.3574829101562, + "learning_rate": 1.2318840579710146e-07, + "logits/chosen": -0.48648545145988464, + "logits/rejected": -0.43023061752319336, + "logps/chosen": -47.760032653808594, + "logps/ref_chosen": -47.7394905090332, + "logps/ref_rejected": -75.4722900390625, + "logps/rejected": -75.56132507324219, + "loss": 1.3528, + "margin_dpo/margin_mean": 0.06848806142807007, + "margin_dpo/margin_std": 0.3598102629184723, + "step": 18 + }, + { + "KL/chosen_KL_mean": -0.00384521484375, + "KL/mean": -0.04403865337371826, + "KL/rejected_KL_mean": -0.08422470092773438, + "KL/std": 0.24243327975273132, + "epoch": 0.027900146842878122, + "fcm_dpo/beta": 0.800000011920929, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.08037757873535156, + "fcm_dpo/q_t": 0.4844985008239746, + "grad_norm": 576.9486083984375, + "learning_rate": 1.3043478260869563e-07, + "logits/chosen": -0.528351902961731, + "logits/rejected": -0.4815219044685364, + "logps/chosen": -70.2092056274414, + "logps/ref_chosen": -70.20536041259766, + "logps/ref_rejected": -89.7575912475586, + "logps/rejected": -89.84181213378906, + "loss": 1.3433, + "margin_dpo/margin_mean": 0.08037763833999634, + "margin_dpo/margin_std": 0.3444629907608032, + "step": 19 + }, + { + "KL/chosen_KL_mean": -0.043910980224609375, + "KL/mean": -0.06724703311920166, + "KL/rejected_KL_mean": -0.09057998657226562, + "KL/std": 0.2558104395866394, + "epoch": 0.02936857562408223, + "fcm_dpo/beta": 0.800000011920929, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.04667180776596069, + "fcm_dpo/q_t": 0.49041998386383057, + "grad_norm": 575.07275390625, + "learning_rate": 1.3768115942028986e-07, + "logits/chosen": -0.5057722330093384, + "logits/rejected": -0.44449201226234436, + "logps/chosen": -50.84715270996094, + "logps/ref_chosen": -50.80324172973633, + "logps/ref_rejected": -78.82334899902344, + "logps/rejected": -78.91392517089844, + "loss": 1.3713, + "margin_dpo/margin_mean": 0.046672046184539795, + "margin_dpo/margin_std": 0.37012988328933716, + "step": 20 + }, + { + "KL/chosen_KL_mean": -0.007419586181640625, + "KL/mean": -0.08116798102855682, + "KL/rejected_KL_mean": -0.15491485595703125, + "KL/std": 0.27864497900009155, + "epoch": 0.030837004405286344, + "fcm_dpo/beta": 0.800000011920929, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.14749857783317566, + "fcm_dpo/q_t": 0.47142279148101807, + "grad_norm": 567.56494140625, + "learning_rate": 1.4492753623188405e-07, + "logits/chosen": -0.471624493598938, + "logits/rejected": -0.4458872079849243, + "logps/chosen": -50.0704345703125, + "logps/ref_chosen": -50.063018798828125, + "logps/ref_rejected": -77.86878967285156, + "logps/rejected": -78.02371215820312, + "loss": 1.2925, + "margin_dpo/margin_mean": 0.1474984586238861, + "margin_dpo/margin_std": 0.3555169403553009, + "step": 21 + }, + { + "KL/chosen_KL_mean": 0.029613494873046875, + "KL/mean": -0.03875645995140076, + "KL/rejected_KL_mean": -0.10712814331054688, + "KL/std": 0.2763916254043579, + "epoch": 0.032305433186490456, + "fcm_dpo/beta": 0.800000011920929, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.13674354553222656, + "fcm_dpo/q_t": 0.47342291474342346, + "grad_norm": 609.1174926757812, + "learning_rate": 1.5217391304347825e-07, + "logits/chosen": -0.4810870885848999, + "logits/rejected": -0.43719351291656494, + "logps/chosen": -59.02802276611328, + "logps/ref_chosen": -59.05763626098633, + "logps/ref_rejected": -97.50466918945312, + "logps/rejected": -97.61180114746094, + "loss": 1.3046, + "margin_dpo/margin_mean": 0.1367432177066803, + "margin_dpo/margin_std": 0.38546815514564514, + "step": 22 + }, + { + "KL/chosen_KL_mean": 0.06751251220703125, + "KL/mean": -0.029902145266532898, + "KL/rejected_KL_mean": -0.1273174285888672, + "KL/std": 0.32757315039634705, + "epoch": 0.033773861967694566, + "fcm_dpo/beta": 0.800000011920929, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.19482764601707458, + "fcm_dpo/q_t": 0.46310946345329285, + "grad_norm": 545.2821044921875, + "learning_rate": 1.5942028985507245e-07, + "logits/chosen": -0.4882713258266449, + "logits/rejected": -0.4656675159931183, + "logps/chosen": -60.01018524169922, + "logps/ref_chosen": -60.07769775390625, + "logps/ref_rejected": -81.13955688476562, + "logps/rejected": -81.2668685913086, + "loss": 1.2669, + "margin_dpo/margin_mean": 0.19482776522636414, + "margin_dpo/margin_std": 0.43239736557006836, + "step": 23 + }, + { + "KL/chosen_KL_mean": 0.057018280029296875, + "KL/mean": -0.06638666987419128, + "KL/rejected_KL_mean": -0.18979644775390625, + "KL/std": 0.3026999235153198, + "epoch": 0.03524229074889868, + "fcm_dpo/beta": 0.8059060573577881, + "fcm_dpo/delta": 0.0732855275273323, + "fcm_dpo/margin": 0.2468125820159912, + "fcm_dpo/q_t": 0.45153895020484924, + "grad_norm": 589.2479248046875, + "learning_rate": 1.6666666666666665e-07, + "logits/chosen": -0.4822083115577698, + "logits/rejected": -0.46618789434432983, + "logps/chosen": -44.23401641845703, + "logps/ref_chosen": -44.29103469848633, + "logps/ref_rejected": -99.12521362304688, + "logps/rejected": -99.31501007080078, + "loss": 1.2205, + "margin_dpo/margin_mean": 0.24681302905082703, + "margin_dpo/margin_std": 0.3777139186859131, + "step": 24 + }, + { + "KL/chosen_KL_mean": 0.016529083251953125, + "KL/mean": -0.07727153599262238, + "KL/rejected_KL_mean": -0.17107009887695312, + "KL/std": 0.34193894267082214, + "epoch": 0.03671071953010279, + "fcm_dpo/beta": 0.8118120431900024, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.18760046362876892, + "fcm_dpo/q_t": 0.4633547067642212, + "grad_norm": 538.94677734375, + "learning_rate": 1.7391304347826085e-07, + "logits/chosen": -0.5210130214691162, + "logits/rejected": -0.4919084310531616, + "logps/chosen": -52.52052307128906, + "logps/ref_chosen": -52.537052154541016, + "logps/ref_rejected": -89.34219360351562, + "logps/rejected": -89.51325988769531, + "loss": 1.2696, + "margin_dpo/margin_mean": 0.18760085105895996, + "margin_dpo/margin_std": 0.4301965832710266, + "step": 25 + }, + { + "KL/chosen_KL_mean": 0.06367874145507812, + "KL/mean": -0.09489929676055908, + "KL/rejected_KL_mean": -0.25347900390625, + "KL/std": 0.40129321813583374, + "epoch": 0.0381791483113069, + "fcm_dpo/beta": 0.8153971433639526, + "fcm_dpo/delta": 0.04396749660372734, + "fcm_dpo/margin": 0.3171558976173401, + "fcm_dpo/q_t": 0.43973731994628906, + "grad_norm": 565.0730590820312, + "learning_rate": 1.8115942028985507e-07, + "logits/chosen": -0.5216317176818848, + "logits/rejected": -0.4897175133228302, + "logps/chosen": -53.859130859375, + "logps/ref_chosen": -53.92280578613281, + "logps/ref_rejected": -103.35971069335938, + "logps/rejected": -103.61318969726562, + "loss": 1.1923, + "margin_dpo/margin_mean": 0.31715625524520874, + "margin_dpo/margin_std": 0.5414035320281982, + "step": 26 + }, + { + "KL/chosen_KL_mean": 0.13181304931640625, + "KL/mean": -0.10721321403980255, + "KL/rejected_KL_mean": -0.3462409973144531, + "KL/std": 0.445562481880188, + "epoch": 0.039647577092511016, + "fcm_dpo/beta": 0.8174295425415039, + "fcm_dpo/delta": 0.00940924696624279, + "fcm_dpo/margin": 0.4780521094799042, + "fcm_dpo/q_t": 0.4092448949813843, + "grad_norm": 575.7881469726562, + "learning_rate": 1.8840579710144927e-07, + "logits/chosen": -0.5156636238098145, + "logits/rejected": -0.47926321625709534, + "logps/chosen": -42.76671600341797, + "logps/ref_chosen": -42.898529052734375, + "logps/ref_rejected": -98.72419738769531, + "logps/rejected": -99.07044219970703, + "loss": 1.0763, + "margin_dpo/margin_mean": 0.47805216908454895, + "margin_dpo/margin_std": 0.5317339897155762, + "step": 27 + }, + { + "KL/chosen_KL_mean": 0.029035568237304688, + "KL/mean": -0.13445699214935303, + "KL/rejected_KL_mean": -0.29795074462890625, + "KL/std": 0.4010791778564453, + "epoch": 0.041116005873715125, + "fcm_dpo/beta": 0.8387187123298645, + "fcm_dpo/delta": 0.12880420684814453, + "fcm_dpo/margin": 0.3269842267036438, + "fcm_dpo/q_t": 0.4359322488307953, + "grad_norm": 499.330810546875, + "learning_rate": 1.9565217391304347e-07, + "logits/chosen": -0.5236212015151978, + "logits/rejected": -0.4699610471725464, + "logps/chosen": -60.5274658203125, + "logps/ref_chosen": -60.55650329589844, + "logps/ref_rejected": -91.40111541748047, + "logps/rejected": -91.69906616210938, + "loss": 1.1808, + "margin_dpo/margin_mean": 0.3269844055175781, + "margin_dpo/margin_std": 0.5416440367698669, + "step": 28 + }, + { + "KL/chosen_KL_mean": 0.13134193420410156, + "KL/mean": -0.14041244983673096, + "KL/rejected_KL_mean": -0.4121665954589844, + "KL/std": 0.44271203875541687, + "epoch": 0.042584434654919234, + "fcm_dpo/beta": 0.8350539207458496, + "fcm_dpo/delta": -0.056411802768707275, + "fcm_dpo/margin": 0.5435106754302979, + "fcm_dpo/q_t": 0.39213281869888306, + "grad_norm": 554.7158813476562, + "learning_rate": 2.028985507246377e-07, + "logits/chosen": -0.5539100170135498, + "logits/rejected": -0.5077922344207764, + "logps/chosen": -57.67644500732422, + "logps/ref_chosen": -57.80778503417969, + "logps/ref_rejected": -97.39434814453125, + "logps/rejected": -97.8065185546875, + "loss": 1.0209, + "margin_dpo/margin_mean": 0.5435110330581665, + "margin_dpo/margin_std": 0.47872793674468994, + "step": 29 + }, + { + "KL/chosen_KL_mean": 0.13145065307617188, + "KL/mean": -0.23007872700691223, + "KL/rejected_KL_mean": -0.59161376953125, + "KL/std": 0.6256662607192993, + "epoch": 0.04405286343612335, + "fcm_dpo/beta": 0.804972231388092, + "fcm_dpo/delta": -0.19466958940029144, + "fcm_dpo/margin": 0.7230579853057861, + "fcm_dpo/q_t": 0.36796897649765015, + "grad_norm": 459.3413391113281, + "learning_rate": 2.1014492753623187e-07, + "logits/chosen": -0.5020028948783875, + "logits/rejected": -0.47157809138298035, + "logps/chosen": -52.4459228515625, + "logps/ref_chosen": -52.577369689941406, + "logps/ref_rejected": -98.48920440673828, + "logps/rejected": -99.08081817626953, + "loss": 0.9594, + "margin_dpo/margin_mean": 0.7230584621429443, + "margin_dpo/margin_std": 0.712450385093689, + "step": 30 + }, + { + "KL/chosen_KL_mean": 0.1332244873046875, + "KL/mean": -0.13057458400726318, + "KL/rejected_KL_mean": -0.394378662109375, + "KL/std": 0.5608391165733337, + "epoch": 0.04552129221732746, + "fcm_dpo/beta": 0.7972604632377625, + "fcm_dpo/delta": -0.0215899795293808, + "fcm_dpo/margin": 0.5276015996932983, + "fcm_dpo/q_t": 0.4051462411880493, + "grad_norm": 401.8319091796875, + "learning_rate": 2.1739130434782607e-07, + "logits/chosen": -0.509661853313446, + "logits/rejected": -0.46542733907699585, + "logps/chosen": -63.67369842529297, + "logps/ref_chosen": -63.806922912597656, + "logps/ref_rejected": -72.89400482177734, + "logps/rejected": -73.28838348388672, + "loss": 1.0803, + "margin_dpo/margin_mean": 0.5276015996932983, + "margin_dpo/margin_std": 0.7064246535301208, + "step": 31 + }, + { + "KL/chosen_KL_mean": 0.20654296875, + "KL/mean": -0.18202102184295654, + "KL/rejected_KL_mean": -0.570587158203125, + "KL/std": 0.7511119842529297, + "epoch": 0.04698972099853157, + "fcm_dpo/beta": 0.772221565246582, + "fcm_dpo/delta": -0.21296542882919312, + "fcm_dpo/margin": 0.7771282196044922, + "fcm_dpo/q_t": 0.3716619610786438, + "grad_norm": 401.576416015625, + "learning_rate": 2.2463768115942027e-07, + "logits/chosen": -0.5210152864456177, + "logits/rejected": -0.4803611934185028, + "logps/chosen": -62.532981872558594, + "logps/ref_chosen": -62.739524841308594, + "logps/ref_rejected": -89.3175048828125, + "logps/rejected": -89.88809204101562, + "loss": 0.979, + "margin_dpo/margin_mean": 0.7771281003952026, + "margin_dpo/margin_std": 0.9537783861160278, + "step": 32 + }, + { + "KL/chosen_KL_mean": 0.11682891845703125, + "KL/mean": -0.20829498767852783, + "KL/rejected_KL_mean": -0.5334129333496094, + "KL/std": 0.5831528902053833, + "epoch": 0.048458149779735685, + "fcm_dpo/beta": 0.7558040022850037, + "fcm_dpo/delta": -0.09609463810920715, + "fcm_dpo/margin": 0.6502407789230347, + "fcm_dpo/q_t": 0.38533806800842285, + "grad_norm": 389.24462890625, + "learning_rate": 2.318840579710145e-07, + "logits/chosen": -0.48112988471984863, + "logits/rejected": -0.4547329545021057, + "logps/chosen": -53.144142150878906, + "logps/ref_chosen": -53.26097106933594, + "logps/ref_rejected": -87.8851318359375, + "logps/rejected": -88.41854858398438, + "loss": 1.0079, + "margin_dpo/margin_mean": 0.6502406597137451, + "margin_dpo/margin_std": 0.6353799104690552, + "step": 33 + }, + { + "KL/chosen_KL_mean": 0.10170745849609375, + "KL/mean": -0.2716452181339264, + "KL/rejected_KL_mean": -0.6450004577636719, + "KL/std": 0.6980259418487549, + "epoch": 0.049926578560939794, + "fcm_dpo/beta": 0.7317001819610596, + "fcm_dpo/delta": -0.15499642491340637, + "fcm_dpo/margin": 0.7467071413993835, + "fcm_dpo/q_t": 0.3767717480659485, + "grad_norm": 378.3127136230469, + "learning_rate": 2.391304347826087e-07, + "logits/chosen": -0.5065457224845886, + "logits/rejected": -0.48904159665107727, + "logps/chosen": -50.71562194824219, + "logps/ref_chosen": -50.81732940673828, + "logps/ref_rejected": -101.92184448242188, + "logps/rejected": -102.56684875488281, + "loss": 0.9903, + "margin_dpo/margin_mean": 0.746705949306488, + "margin_dpo/margin_std": 0.8056973218917847, + "step": 34 + }, + { + "KL/chosen_KL_mean": 0.18681907653808594, + "KL/mean": -0.41164833307266235, + "KL/rejected_KL_mean": -1.0101165771484375, + "KL/std": 0.9510899782180786, + "epoch": 0.0513950073421439, + "fcm_dpo/beta": 0.6779334545135498, + "fcm_dpo/delta": -0.4493417739868164, + "fcm_dpo/margin": 1.1969325542449951, + "fcm_dpo/q_t": 0.3209930658340454, + "grad_norm": 309.58441162109375, + "learning_rate": 2.463768115942029e-07, + "logits/chosen": -0.502815306186676, + "logits/rejected": -0.46622055768966675, + "logps/chosen": -50.837669372558594, + "logps/ref_chosen": -51.02449035644531, + "logps/ref_rejected": -106.82443237304688, + "logps/rejected": -107.83454895019531, + "loss": 0.8254, + "margin_dpo/margin_mean": 1.1969324350357056, + "margin_dpo/margin_std": 1.0367286205291748, + "step": 35 + }, + { + "KL/chosen_KL_mean": 0.05117225646972656, + "KL/mean": -0.48711907863616943, + "KL/rejected_KL_mean": -1.0254096984863281, + "KL/std": 1.0425536632537842, + "epoch": 0.05286343612334802, + "fcm_dpo/beta": 0.6399196982383728, + "fcm_dpo/delta": -0.3091672658920288, + "fcm_dpo/margin": 1.0765844583511353, + "fcm_dpo/q_t": 0.35396426916122437, + "grad_norm": 261.8072814941406, + "learning_rate": 2.536231884057971e-07, + "logits/chosen": -0.5571258068084717, + "logits/rejected": -0.5207737684249878, + "logps/chosen": -51.9403190612793, + "logps/ref_chosen": -51.991493225097656, + "logps/ref_rejected": -86.0406265258789, + "logps/rejected": -87.0660400390625, + "loss": 0.949, + "margin_dpo/margin_mean": 1.0765833854675293, + "margin_dpo/margin_std": 1.2341694831848145, + "step": 36 + }, + { + "KL/chosen_KL_mean": 0.019609451293945312, + "KL/mean": -0.4790758192539215, + "KL/rejected_KL_mean": -0.9777679443359375, + "KL/std": 1.0161794424057007, + "epoch": 0.05433186490455213, + "fcm_dpo/beta": 0.5977625846862793, + "fcm_dpo/delta": -0.2139551043510437, + "fcm_dpo/margin": 0.9973729252815247, + "fcm_dpo/q_t": 0.3746863603591919, + "grad_norm": 244.11151123046875, + "learning_rate": 2.6086956521739126e-07, + "logits/chosen": -0.4970467984676361, + "logits/rejected": -0.45224228501319885, + "logps/chosen": -62.787498474121094, + "logps/ref_chosen": -62.807106018066406, + "logps/ref_rejected": -77.89507293701172, + "logps/rejected": -78.87284088134766, + "loss": 1.0035, + "margin_dpo/margin_mean": 0.9973729848861694, + "margin_dpo/margin_std": 1.285217046737671, + "step": 37 + }, + { + "KL/chosen_KL_mean": 0.21410560607910156, + "KL/mean": -0.46886640787124634, + "KL/rejected_KL_mean": -1.1518363952636719, + "KL/std": 1.316064476966858, + "epoch": 0.055800293685756244, + "fcm_dpo/beta": 0.5653368830680847, + "fcm_dpo/delta": -0.4023910164833069, + "fcm_dpo/margin": 1.365942120552063, + "fcm_dpo/q_t": 0.3435903489589691, + "grad_norm": 240.15562438964844, + "learning_rate": 2.681159420289855e-07, + "logits/chosen": -0.5118378400802612, + "logits/rejected": -0.4790714979171753, + "logps/chosen": -48.176414489746094, + "logps/ref_chosen": -48.39051818847656, + "logps/ref_rejected": -97.91244506835938, + "logps/rejected": -99.06427764892578, + "loss": 0.9045, + "margin_dpo/margin_mean": 1.3659417629241943, + "margin_dpo/margin_std": 1.6197600364685059, + "step": 38 + }, + { + "KL/chosen_KL_mean": 0.08580398559570312, + "KL/mean": -0.7106390595436096, + "KL/rejected_KL_mean": -1.5070762634277344, + "KL/std": 1.289241075515747, + "epoch": 0.05726872246696035, + "fcm_dpo/beta": 0.5120701193809509, + "fcm_dpo/delta": -0.4571428894996643, + "fcm_dpo/margin": 1.5928757190704346, + "fcm_dpo/q_t": 0.3189411163330078, + "grad_norm": 256.7136535644531, + "learning_rate": 2.753623188405797e-07, + "logits/chosen": -0.5451552867889404, + "logits/rejected": -0.5046231746673584, + "logps/chosen": -50.664669036865234, + "logps/ref_chosen": -50.75047302246094, + "logps/ref_rejected": -78.56951141357422, + "logps/rejected": -80.07658386230469, + "loss": 0.8415, + "margin_dpo/margin_mean": 1.5928757190704346, + "margin_dpo/margin_std": 1.4120266437530518, + "step": 39 + }, + { + "KL/chosen_KL_mean": 0.18593215942382812, + "KL/mean": -0.5416154861450195, + "KL/rejected_KL_mean": -1.2691650390625, + "KL/std": 1.3004155158996582, + "epoch": 0.05873715124816446, + "fcm_dpo/beta": 0.4821917414665222, + "fcm_dpo/delta": -0.32341742515563965, + "fcm_dpo/margin": 1.4550951719284058, + "fcm_dpo/q_t": 0.35090136528015137, + "grad_norm": 182.71023559570312, + "learning_rate": 2.8260869565217386e-07, + "logits/chosen": -0.5080227255821228, + "logits/rejected": -0.47728431224823, + "logps/chosen": -57.79913330078125, + "logps/ref_chosen": -57.985069274902344, + "logps/ref_rejected": -74.3000717163086, + "logps/rejected": -75.5692367553711, + "loss": 0.922, + "margin_dpo/margin_mean": 1.455095887184143, + "margin_dpo/margin_std": 1.5767593383789062, + "step": 40 + }, + { + "KL/chosen_KL_mean": 0.07164192199707031, + "KL/mean": -0.858450174331665, + "KL/rejected_KL_mean": -1.7885398864746094, + "KL/std": 1.770848274230957, + "epoch": 0.06020558002936858, + "fcm_dpo/beta": 0.44186830520629883, + "fcm_dpo/delta": -0.45917147397994995, + "fcm_dpo/margin": 1.8601810932159424, + "fcm_dpo/q_t": 0.3276433050632477, + "grad_norm": 186.04129028320312, + "learning_rate": 2.898550724637681e-07, + "logits/chosen": -0.5379878282546997, + "logits/rejected": -0.5013633370399475, + "logps/chosen": -62.624176025390625, + "logps/ref_chosen": -62.69581604003906, + "logps/ref_rejected": -97.02352905273438, + "logps/rejected": -98.81207275390625, + "loss": 0.867, + "margin_dpo/margin_mean": 1.8601820468902588, + "margin_dpo/margin_std": 1.9144206047058105, + "step": 41 + }, + { + "KL/chosen_KL_mean": 0.21912765502929688, + "KL/mean": -0.9815043210983276, + "KL/rejected_KL_mean": -2.1821327209472656, + "KL/std": 1.9854331016540527, + "epoch": 0.06167400881057269, + "fcm_dpo/beta": 0.39385828375816345, + "fcm_dpo/delta": -0.6033186912536621, + "fcm_dpo/margin": 2.401261806488037, + "fcm_dpo/q_t": 0.3034874200820923, + "grad_norm": 167.1330108642578, + "learning_rate": 2.971014492753623e-07, + "logits/chosen": -0.5730389356613159, + "logits/rejected": -0.5269917249679565, + "logps/chosen": -58.74729919433594, + "logps/ref_chosen": -58.966426849365234, + "logps/ref_rejected": -109.90837097167969, + "logps/rejected": -112.09050750732422, + "loss": 0.792, + "margin_dpo/margin_mean": 2.4012622833251953, + "margin_dpo/margin_std": 2.241847276687622, + "step": 42 + }, + { + "KL/chosen_KL_mean": 0.5337352752685547, + "KL/mean": -0.6120513677597046, + "KL/rejected_KL_mean": -1.757843017578125, + "KL/std": 1.6964552402496338, + "epoch": 0.0631424375917768, + "fcm_dpo/beta": 0.3543139696121216, + "fcm_dpo/delta": -0.4530714154243469, + "fcm_dpo/margin": 2.291576385498047, + "fcm_dpo/q_t": 0.31728753447532654, + "grad_norm": 157.29473876953125, + "learning_rate": 3.043478260869565e-07, + "logits/chosen": -0.5307985544204712, + "logits/rejected": -0.505626916885376, + "logps/chosen": -53.62226104736328, + "logps/ref_chosen": -54.15599822998047, + "logps/ref_rejected": -96.48019409179688, + "logps/rejected": -98.238037109375, + "loss": 0.8149, + "margin_dpo/margin_mean": 2.2915759086608887, + "margin_dpo/margin_std": 1.8112150430679321, + "step": 43 + }, + { + "KL/chosen_KL_mean": 0.21957778930664062, + "KL/mean": -1.076310157775879, + "KL/rejected_KL_mean": -2.3721961975097656, + "KL/std": 2.057605266571045, + "epoch": 0.06461086637298091, + "fcm_dpo/beta": 0.3235365152359009, + "fcm_dpo/delta": -0.48091480135917664, + "fcm_dpo/margin": 2.591776132583618, + "fcm_dpo/q_t": 0.31211215257644653, + "grad_norm": 155.63697814941406, + "learning_rate": 3.115942028985507e-07, + "logits/chosen": -0.4682161509990692, + "logits/rejected": -0.4482540488243103, + "logps/chosen": -49.85892105102539, + "logps/ref_chosen": -50.07849884033203, + "logps/ref_rejected": -108.78376007080078, + "logps/rejected": -111.15596008300781, + "loss": 0.7988, + "margin_dpo/margin_mean": 2.59177565574646, + "margin_dpo/margin_std": 2.0130257606506348, + "step": 44 + }, + { + "KL/chosen_KL_mean": 0.1359119415283203, + "KL/mean": -0.8931126594543457, + "KL/rejected_KL_mean": -1.9221420288085938, + "KL/std": 1.8634648323059082, + "epoch": 0.06607929515418502, + "fcm_dpo/beta": 0.3062588572502136, + "fcm_dpo/delta": -0.24527329206466675, + "fcm_dpo/margin": 2.0580525398254395, + "fcm_dpo/q_t": 0.36411553621292114, + "grad_norm": 127.76445007324219, + "learning_rate": 3.188405797101449e-07, + "logits/chosen": -0.4810647964477539, + "logits/rejected": -0.46846526861190796, + "logps/chosen": -48.279014587402344, + "logps/ref_chosen": -48.4149284362793, + "logps/ref_rejected": -77.93643188476562, + "logps/rejected": -79.85856628417969, + "loss": 0.9597, + "margin_dpo/margin_mean": 2.0580527782440186, + "margin_dpo/margin_std": 2.3514111042022705, + "step": 45 + }, + { + "KL/chosen_KL_mean": 0.20447921752929688, + "KL/mean": -1.1372920274734497, + "KL/rejected_KL_mean": -2.4790611267089844, + "KL/std": 2.375460624694824, + "epoch": 0.06754772393538913, + "fcm_dpo/beta": 0.2851349711418152, + "fcm_dpo/delta": -0.3946601152420044, + "fcm_dpo/margin": 2.6835451126098633, + "fcm_dpo/q_t": 0.33826661109924316, + "grad_norm": 132.63743591308594, + "learning_rate": 3.260869565217391e-07, + "logits/chosen": -0.5322977900505066, + "logits/rejected": -0.4815298914909363, + "logps/chosen": -55.794944763183594, + "logps/ref_chosen": -55.999427795410156, + "logps/ref_rejected": -95.652587890625, + "logps/rejected": -98.13165283203125, + "loss": 0.8899, + "margin_dpo/margin_mean": 2.683544158935547, + "margin_dpo/margin_std": 2.860718250274658, + "step": 46 + }, + { + "KL/chosen_KL_mean": 0.3806018829345703, + "KL/mean": -0.9414160251617432, + "KL/rejected_KL_mean": -2.2634353637695312, + "KL/std": 2.2800047397613525, + "epoch": 0.06901615271659324, + "fcm_dpo/beta": 0.2670041620731354, + "fcm_dpo/delta": -0.328019380569458, + "fcm_dpo/margin": 2.6440370082855225, + "fcm_dpo/q_t": 0.3423752188682556, + "grad_norm": 126.02400207519531, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -0.5748400688171387, + "logits/rejected": -0.5222221612930298, + "logps/chosen": -57.54547882080078, + "logps/ref_chosen": -57.92607879638672, + "logps/ref_rejected": -94.67920684814453, + "logps/rejected": -96.94264221191406, + "loss": 0.8904, + "margin_dpo/margin_mean": 2.6440372467041016, + "margin_dpo/margin_std": 2.386019229888916, + "step": 47 + }, + { + "KL/chosen_KL_mean": 0.08409309387207031, + "KL/mean": -1.2250878810882568, + "KL/rejected_KL_mean": -2.534271240234375, + "KL/std": 2.2343883514404297, + "epoch": 0.07048458149779736, + "fcm_dpo/beta": 0.2488497495651245, + "fcm_dpo/delta": -0.27214479446411133, + "fcm_dpo/margin": 2.618363857269287, + "fcm_dpo/q_t": 0.35341036319732666, + "grad_norm": 138.55894470214844, + "learning_rate": 3.4057971014492755e-07, + "logits/chosen": -0.5984026193618774, + "logits/rejected": -0.5413084030151367, + "logps/chosen": -57.103981018066406, + "logps/ref_chosen": -57.188072204589844, + "logps/ref_rejected": -88.0166015625, + "logps/rejected": -90.55087280273438, + "loss": 0.9335, + "margin_dpo/margin_mean": 2.618363618850708, + "margin_dpo/margin_std": 2.488217353820801, + "step": 48 + }, + { + "KL/chosen_KL_mean": 0.43454933166503906, + "KL/mean": -1.1184592247009277, + "KL/rejected_KL_mean": -2.671466827392578, + "KL/std": 2.7026281356811523, + "epoch": 0.07195301027900147, + "fcm_dpo/beta": 0.23550444841384888, + "fcm_dpo/delta": -0.3564870357513428, + "fcm_dpo/margin": 3.106010913848877, + "fcm_dpo/q_t": 0.34103497862815857, + "grad_norm": 104.59957122802734, + "learning_rate": 3.478260869565217e-07, + "logits/chosen": -0.5330841541290283, + "logits/rejected": -0.47351568937301636, + "logps/chosen": -61.2507209777832, + "logps/ref_chosen": -61.685272216796875, + "logps/ref_rejected": -83.76747131347656, + "logps/rejected": -86.4389419555664, + "loss": 0.8935, + "margin_dpo/margin_mean": 3.106010675430298, + "margin_dpo/margin_std": 3.1022186279296875, + "step": 49 + }, + { + "KL/chosen_KL_mean": 0.03325843811035156, + "KL/mean": -1.700391173362732, + "KL/rejected_KL_mean": -3.434040069580078, + "KL/std": 2.8286612033843994, + "epoch": 0.07342143906020558, + "fcm_dpo/beta": 0.21684028208255768, + "fcm_dpo/delta": -0.38183990120887756, + "fcm_dpo/margin": 3.4672958850860596, + "fcm_dpo/q_t": 0.333289235830307, + "grad_norm": 104.01911163330078, + "learning_rate": 3.5507246376811595e-07, + "logits/chosen": -0.5359183549880981, + "logits/rejected": -0.499971866607666, + "logps/chosen": -58.690879821777344, + "logps/ref_chosen": -58.72413635253906, + "logps/ref_rejected": -96.35814666748047, + "logps/rejected": -99.79219055175781, + "loss": 0.8683, + "margin_dpo/margin_mean": 3.4672961235046387, + "margin_dpo/margin_std": 3.1691863536834717, + "step": 50 + }, + { + "KL/chosen_KL_mean": -0.08742523193359375, + "KL/mean": -1.8161455392837524, + "KL/rejected_KL_mean": -3.5448684692382812, + "KL/std": 3.425395965576172, + "epoch": 0.07488986784140969, + "fcm_dpo/beta": 0.20151767134666443, + "fcm_dpo/delta": -0.32323533296585083, + "fcm_dpo/margin": 3.4574427604675293, + "fcm_dpo/q_t": 0.3550441563129425, + "grad_norm": 80.95540618896484, + "learning_rate": 3.6231884057971015e-07, + "logits/chosen": -0.4952942132949829, + "logits/rejected": -0.46139243245124817, + "logps/chosen": -61.46109390258789, + "logps/ref_chosen": -61.3736686706543, + "logps/ref_rejected": -76.00199890136719, + "logps/rejected": -79.546875, + "loss": 0.9468, + "margin_dpo/margin_mean": 3.4574432373046875, + "margin_dpo/margin_std": 4.162242889404297, + "step": 51 + }, + { + "KL/chosen_KL_mean": 0.5666332244873047, + "KL/mean": -2.1044023036956787, + "KL/rejected_KL_mean": -4.7754364013671875, + "KL/std": 4.042649269104004, + "epoch": 0.0763582966226138, + "fcm_dpo/beta": 0.181796133518219, + "fcm_dpo/delta": -0.6337956190109253, + "fcm_dpo/margin": 5.342073440551758, + "fcm_dpo/q_t": 0.2899671792984009, + "grad_norm": 79.82083129882812, + "learning_rate": 3.695652173913043e-07, + "logits/chosen": -0.5962961912155151, + "logits/rejected": -0.543999433517456, + "logps/chosen": -51.770721435546875, + "logps/ref_chosen": -52.33735656738281, + "logps/ref_rejected": -79.97391510009766, + "logps/rejected": -84.74935150146484, + "loss": 0.7499, + "margin_dpo/margin_mean": 5.342073440551758, + "margin_dpo/margin_std": 4.25220251083374, + "step": 52 + }, + { + "KL/chosen_KL_mean": -0.03797149658203125, + "KL/mean": -2.6791794300079346, + "KL/rejected_KL_mean": -5.320384979248047, + "KL/std": 4.5573835372924805, + "epoch": 0.07782672540381791, + "fcm_dpo/beta": 0.16499710083007812, + "fcm_dpo/delta": -0.5135352611541748, + "fcm_dpo/margin": 5.282422065734863, + "fcm_dpo/q_t": 0.3234487771987915, + "grad_norm": 81.50251007080078, + "learning_rate": 3.7681159420289855e-07, + "logits/chosen": -0.6104946136474609, + "logits/rejected": -0.588903546333313, + "logps/chosen": -53.352622985839844, + "logps/ref_chosen": -53.31465148925781, + "logps/ref_rejected": -91.78359985351562, + "logps/rejected": -97.1039810180664, + "loss": 0.8463, + "margin_dpo/margin_mean": 5.282422065734863, + "margin_dpo/margin_std": 5.204236030578613, + "step": 53 + }, + { + "KL/chosen_KL_mean": -0.13544654846191406, + "KL/mean": -2.47892427444458, + "KL/rejected_KL_mean": -4.822395324707031, + "KL/std": 4.284974098205566, + "epoch": 0.07929515418502203, + "fcm_dpo/beta": 0.15150442719459534, + "fcm_dpo/delta": -0.33330458402633667, + "fcm_dpo/margin": 4.68695068359375, + "fcm_dpo/q_t": 0.34295719861984253, + "grad_norm": 71.17793273925781, + "learning_rate": 3.8405797101449274e-07, + "logits/chosen": -0.5825854539871216, + "logits/rejected": -0.528401255607605, + "logps/chosen": -50.8241081237793, + "logps/ref_chosen": -50.68865966796875, + "logps/ref_rejected": -91.71539306640625, + "logps/rejected": -96.53778839111328, + "loss": 0.8869, + "margin_dpo/margin_mean": 4.68695068359375, + "margin_dpo/margin_std": 4.375544548034668, + "step": 54 + }, + { + "KL/chosen_KL_mean": -0.5525169372558594, + "KL/mean": -3.349881887435913, + "KL/rejected_KL_mean": -6.147243499755859, + "KL/std": 5.250433921813965, + "epoch": 0.08076358296622614, + "fcm_dpo/beta": 0.14030683040618896, + "fcm_dpo/delta": -0.4165322184562683, + "fcm_dpo/margin": 5.594724178314209, + "fcm_dpo/q_t": 0.33590346574783325, + "grad_norm": 67.65316772460938, + "learning_rate": 3.9130434782608694e-07, + "logits/chosen": -0.6309506893157959, + "logits/rejected": -0.5673823952674866, + "logps/chosen": -63.16775131225586, + "logps/ref_chosen": -62.615234375, + "logps/ref_rejected": -88.99349975585938, + "logps/rejected": -95.1407470703125, + "loss": 0.9007, + "margin_dpo/margin_mean": 5.594723701477051, + "margin_dpo/margin_std": 6.235048294067383, + "step": 55 + }, + { + "KL/chosen_KL_mean": -0.32973480224609375, + "KL/mean": -3.0303850173950195, + "KL/rejected_KL_mean": -5.7310333251953125, + "KL/std": 5.075455665588379, + "epoch": 0.08223201174743025, + "fcm_dpo/beta": 0.13035638630390167, + "fcm_dpo/delta": -0.32700973749160767, + "fcm_dpo/margin": 5.401305198669434, + "fcm_dpo/q_t": 0.3499238193035126, + "grad_norm": 56.44548034667969, + "learning_rate": 3.9855072463768114e-07, + "logits/chosen": -0.5875349044799805, + "logits/rejected": -0.5435941815376282, + "logps/chosen": -58.2624626159668, + "logps/ref_chosen": -57.9327278137207, + "logps/ref_rejected": -94.1744384765625, + "logps/rejected": -99.90547180175781, + "loss": 0.9377, + "margin_dpo/margin_mean": 5.401305198669434, + "margin_dpo/margin_std": 6.176411151885986, + "step": 56 + }, + { + "KL/chosen_KL_mean": -0.4023780822753906, + "KL/mean": -3.3978283405303955, + "KL/rejected_KL_mean": -6.393280029296875, + "KL/std": 5.025920867919922, + "epoch": 0.08370044052863436, + "fcm_dpo/beta": 0.12205598503351212, + "fcm_dpo/delta": -0.35622814297676086, + "fcm_dpo/margin": 5.9908952713012695, + "fcm_dpo/q_t": 0.3362714648246765, + "grad_norm": 62.98070526123047, + "learning_rate": 4.057971014492754e-07, + "logits/chosen": -0.5721724033355713, + "logits/rejected": -0.544060230255127, + "logps/chosen": -70.89765930175781, + "logps/ref_chosen": -70.49528503417969, + "logps/ref_rejected": -95.56546020507812, + "logps/rejected": -101.958740234375, + "loss": 0.8807, + "margin_dpo/margin_mean": 5.9908952713012695, + "margin_dpo/margin_std": 5.452801704406738, + "step": 57 + }, + { + "KL/chosen_KL_mean": -0.46083641052246094, + "KL/mean": -3.8815183639526367, + "KL/rejected_KL_mean": -7.3022003173828125, + "KL/std": 5.823391437530518, + "epoch": 0.08516886930983847, + "fcm_dpo/beta": 0.11291979253292084, + "fcm_dpo/delta": -0.4023542106151581, + "fcm_dpo/margin": 6.84135627746582, + "fcm_dpo/q_t": 0.33498990535736084, + "grad_norm": 63.307865142822266, + "learning_rate": 4.1304347826086954e-07, + "logits/chosen": -0.574745237827301, + "logits/rejected": -0.4966890215873718, + "logps/chosen": -62.59377670288086, + "logps/ref_chosen": -62.13294219970703, + "logps/ref_rejected": -84.61729431152344, + "logps/rejected": -91.91949462890625, + "loss": 0.8841, + "margin_dpo/margin_mean": 6.841357231140137, + "margin_dpo/margin_std": 6.957030296325684, + "step": 58 + }, + { + "KL/chosen_KL_mean": -0.9522266387939453, + "KL/mean": -4.575308799743652, + "KL/rejected_KL_mean": -8.19839096069336, + "KL/std": 6.170098304748535, + "epoch": 0.08663729809104258, + "fcm_dpo/beta": 0.10300938785076141, + "fcm_dpo/delta": -0.3802601099014282, + "fcm_dpo/margin": 7.246167182922363, + "fcm_dpo/q_t": 0.33733034133911133, + "grad_norm": 60.765480041503906, + "learning_rate": 4.2028985507246374e-07, + "logits/chosen": -0.6427372694015503, + "logits/rejected": -0.6026915311813354, + "logps/chosen": -52.88475036621094, + "logps/ref_chosen": -51.932525634765625, + "logps/ref_rejected": -88.88520050048828, + "logps/rejected": -97.08358764648438, + "loss": 0.8868, + "margin_dpo/margin_mean": 7.246167182922363, + "margin_dpo/margin_std": 7.133745193481445, + "step": 59 + }, + { + "KL/chosen_KL_mean": -1.8632774353027344, + "KL/mean": -4.822530746459961, + "KL/rejected_KL_mean": -7.781780242919922, + "KL/std": 5.658910751342773, + "epoch": 0.0881057268722467, + "fcm_dpo/beta": 0.09867256879806519, + "fcm_dpo/delta": -0.19654613733291626, + "fcm_dpo/margin": 5.918500900268555, + "fcm_dpo/q_t": 0.36642712354660034, + "grad_norm": 63.796966552734375, + "learning_rate": 4.2753623188405794e-07, + "logits/chosen": -0.6072988510131836, + "logits/rejected": -0.5472843050956726, + "logps/chosen": -62.80546569824219, + "logps/ref_chosen": -60.94218826293945, + "logps/ref_rejected": -85.39340209960938, + "logps/rejected": -93.17517852783203, + "loss": 0.9761, + "margin_dpo/margin_mean": 5.918500900268555, + "margin_dpo/margin_std": 6.41326379776001, + "step": 60 + }, + { + "KL/chosen_KL_mean": -0.8773689270019531, + "KL/mean": -4.494866371154785, + "KL/rejected_KL_mean": -8.11236572265625, + "KL/std": 7.751307487487793, + "epoch": 0.08957415565345081, + "fcm_dpo/beta": 0.09338235855102539, + "fcm_dpo/delta": -0.2975808084011078, + "fcm_dpo/margin": 7.234995365142822, + "fcm_dpo/q_t": 0.3608711063861847, + "grad_norm": 51.70878982543945, + "learning_rate": 4.3478260869565214e-07, + "logits/chosen": -0.5910314321517944, + "logits/rejected": -0.5556684732437134, + "logps/chosen": -61.510894775390625, + "logps/ref_chosen": -60.633522033691406, + "logps/ref_rejected": -89.85249328613281, + "logps/rejected": -97.96485900878906, + "loss": 0.9652, + "margin_dpo/margin_mean": 7.234993934631348, + "margin_dpo/margin_std": 9.499799728393555, + "step": 61 + }, + { + "KL/chosen_KL_mean": -1.0850257873535156, + "KL/mean": -3.8349273204803467, + "KL/rejected_KL_mean": -6.584831237792969, + "KL/std": 5.598065376281738, + "epoch": 0.09104258443465492, + "fcm_dpo/beta": 0.09132882952690125, + "fcm_dpo/delta": -0.10761071741580963, + "fcm_dpo/margin": 5.499805927276611, + "fcm_dpo/q_t": 0.3879823684692383, + "grad_norm": 50.53475570678711, + "learning_rate": 4.420289855072464e-07, + "logits/chosen": -0.6022673845291138, + "logits/rejected": -0.5683047771453857, + "logps/chosen": -57.23579788208008, + "logps/ref_chosen": -56.15077209472656, + "logps/ref_rejected": -75.56619262695312, + "logps/rejected": -82.15103149414062, + "loss": 1.0326, + "margin_dpo/margin_mean": 5.499805450439453, + "margin_dpo/margin_std": 6.748025894165039, + "step": 62 + }, + { + "KL/chosen_KL_mean": -1.885568618774414, + "KL/mean": -5.602993011474609, + "KL/rejected_KL_mean": -9.320415496826172, + "KL/std": 7.238819122314453, + "epoch": 0.09251101321585903, + "fcm_dpo/beta": 0.08644914627075195, + "fcm_dpo/delta": -0.26188862323760986, + "fcm_dpo/margin": 7.43485164642334, + "fcm_dpo/q_t": 0.35505515336990356, + "grad_norm": 50.32809829711914, + "learning_rate": 4.4927536231884053e-07, + "logits/chosen": -0.5845507383346558, + "logits/rejected": -0.5376572012901306, + "logps/chosen": -75.032958984375, + "logps/ref_chosen": -73.14739227294922, + "logps/ref_rejected": -97.61006164550781, + "logps/rejected": -106.93048095703125, + "loss": 0.9392, + "margin_dpo/margin_mean": 7.434851169586182, + "margin_dpo/margin_std": 7.581734657287598, + "step": 63 + }, + { + "KL/chosen_KL_mean": -0.5354537963867188, + "KL/mean": -4.859354496002197, + "KL/rejected_KL_mean": -9.183258056640625, + "KL/std": 7.766883850097656, + "epoch": 0.09397944199706314, + "fcm_dpo/beta": 0.08089442551136017, + "fcm_dpo/delta": -0.3278960585594177, + "fcm_dpo/margin": 8.647797584533691, + "fcm_dpo/q_t": 0.3481459617614746, + "grad_norm": 45.80192947387695, + "learning_rate": 4.5652173913043473e-07, + "logits/chosen": -0.5578076243400574, + "logits/rejected": -0.5246820449829102, + "logps/chosen": -54.534053802490234, + "logps/ref_chosen": -53.998600006103516, + "logps/ref_rejected": -93.53019714355469, + "logps/rejected": -102.71345520019531, + "loss": 0.9256, + "margin_dpo/margin_mean": 8.647798538208008, + "margin_dpo/margin_std": 9.425071716308594, + "step": 64 + }, + { + "KL/chosen_KL_mean": -2.3879737854003906, + "KL/mean": -6.748409271240234, + "KL/rejected_KL_mean": -11.108848571777344, + "KL/std": 8.675431251525879, + "epoch": 0.09544787077826726, + "fcm_dpo/beta": 0.076596200466156, + "fcm_dpo/delta": -0.2893035411834717, + "fcm_dpo/margin": 8.720873832702637, + "fcm_dpo/q_t": 0.35113364458084106, + "grad_norm": 46.513771057128906, + "learning_rate": 4.63768115942029e-07, + "logits/chosen": -0.6502401828765869, + "logits/rejected": -0.636010468006134, + "logps/chosen": -67.2239761352539, + "logps/ref_chosen": -64.83599853515625, + "logps/ref_rejected": -109.94645690917969, + "logps/rejected": -121.05531311035156, + "loss": 0.9421, + "margin_dpo/margin_mean": 8.720873832702637, + "margin_dpo/margin_std": 9.467870712280273, + "step": 65 + }, + { + "KL/chosen_KL_mean": -2.218568801879883, + "KL/mean": -6.281618118286133, + "KL/rejected_KL_mean": -10.34466552734375, + "KL/std": 7.9723615646362305, + "epoch": 0.09691629955947137, + "fcm_dpo/beta": 0.07299195230007172, + "fcm_dpo/delta": -0.20810872316360474, + "fcm_dpo/margin": 8.126091957092285, + "fcm_dpo/q_t": 0.3702622056007385, + "grad_norm": 40.38670349121094, + "learning_rate": 4.7101449275362313e-07, + "logits/chosen": -0.6466140747070312, + "logits/rejected": -0.613680362701416, + "logps/chosen": -53.66209411621094, + "logps/ref_chosen": -51.44352722167969, + "logps/ref_rejected": -75.63629913330078, + "logps/rejected": -85.98096466064453, + "loss": 0.9797, + "margin_dpo/margin_mean": 8.126091003417969, + "margin_dpo/margin_std": 9.491106986999512, + "step": 66 + }, + { + "KL/chosen_KL_mean": -1.9192867279052734, + "KL/mean": -6.12217903137207, + "KL/rejected_KL_mean": -10.3250732421875, + "KL/std": 7.945716857910156, + "epoch": 0.09838472834067548, + "fcm_dpo/beta": 0.07074415683746338, + "fcm_dpo/delta": -0.20659056305885315, + "fcm_dpo/margin": 8.405787467956543, + "fcm_dpo/q_t": 0.3689546287059784, + "grad_norm": 41.76984786987305, + "learning_rate": 4.782608695652174e-07, + "logits/chosen": -0.5952399969100952, + "logits/rejected": -0.5532902479171753, + "logps/chosen": -61.260093688964844, + "logps/ref_chosen": -59.34080505371094, + "logps/ref_rejected": -72.78728485107422, + "logps/rejected": -83.11235809326172, + "loss": 0.9742, + "margin_dpo/margin_mean": 8.405787467956543, + "margin_dpo/margin_std": 9.565013885498047, + "step": 67 + }, + { + "KL/chosen_KL_mean": -2.0800132751464844, + "KL/mean": -6.116093635559082, + "KL/rejected_KL_mean": -10.152172088623047, + "KL/std": 7.245296478271484, + "epoch": 0.09985315712187959, + "fcm_dpo/beta": 0.06832877546548843, + "fcm_dpo/delta": -0.16015967726707458, + "fcm_dpo/margin": 8.072154998779297, + "fcm_dpo/q_t": 0.37427279353141785, + "grad_norm": 39.69685363769531, + "learning_rate": 4.855072463768116e-07, + "logits/chosen": -0.6283758878707886, + "logits/rejected": -0.5697811841964722, + "logps/chosen": -67.28584289550781, + "logps/ref_chosen": -65.2058334350586, + "logps/ref_rejected": -77.20724487304688, + "logps/rejected": -87.35942077636719, + "loss": 0.9769, + "margin_dpo/margin_mean": 8.072154998779297, + "margin_dpo/margin_std": 8.083388328552246, + "step": 68 + }, + { + "KL/chosen_KL_mean": -2.5830154418945312, + "KL/mean": -7.418220520019531, + "KL/rejected_KL_mean": -12.253425598144531, + "KL/std": 8.492610931396484, + "epoch": 0.1013215859030837, + "fcm_dpo/beta": 0.06493359059095383, + "fcm_dpo/delta": -0.24463072419166565, + "fcm_dpo/margin": 9.670412063598633, + "fcm_dpo/q_t": 0.35869020223617554, + "grad_norm": 42.51207733154297, + "learning_rate": 4.927536231884058e-07, + "logits/chosen": -0.587549090385437, + "logits/rejected": -0.5634763240814209, + "logps/chosen": -62.40225601196289, + "logps/ref_chosen": -59.81924057006836, + "logps/ref_rejected": -103.38886260986328, + "logps/rejected": -115.64228820800781, + "loss": 0.9315, + "margin_dpo/margin_mean": 9.67041301727295, + "margin_dpo/margin_std": 9.319877624511719, + "step": 69 + }, + { + "KL/chosen_KL_mean": -3.936086654663086, + "KL/mean": -9.134437561035156, + "KL/rejected_KL_mean": -14.33279037475586, + "KL/std": 10.255716323852539, + "epoch": 0.1027900146842878, + "fcm_dpo/beta": 0.06210237741470337, + "fcm_dpo/delta": -0.2620813846588135, + "fcm_dpo/margin": 10.396703720092773, + "fcm_dpo/q_t": 0.3578363060951233, + "grad_norm": 42.537837982177734, + "learning_rate": 5e-07, + "logits/chosen": -0.6045395731925964, + "logits/rejected": -0.5682834386825562, + "logps/chosen": -65.86672973632812, + "logps/ref_chosen": -61.930641174316406, + "logps/ref_rejected": -91.06078338623047, + "logps/rejected": -105.39356994628906, + "loss": 0.9407, + "margin_dpo/margin_mean": 10.396702766418457, + "margin_dpo/margin_std": 10.94558048248291, + "step": 70 + }, + { + "KL/chosen_KL_mean": -3.512136459350586, + "KL/mean": -9.568258285522461, + "KL/rejected_KL_mean": -15.624378204345703, + "KL/std": 10.597580909729004, + "epoch": 0.10425844346549193, + "fcm_dpo/beta": 0.05811074376106262, + "fcm_dpo/delta": -0.32751208543777466, + "fcm_dpo/margin": 12.11224365234375, + "fcm_dpo/q_t": 0.34550318121910095, + "grad_norm": 39.08191680908203, + "learning_rate": 4.999967061337492e-07, + "logits/chosen": -0.6395025253295898, + "logits/rejected": -0.5973723530769348, + "logps/chosen": -65.2624740600586, + "logps/ref_chosen": -61.750335693359375, + "logps/ref_rejected": -97.33662414550781, + "logps/rejected": -112.96099853515625, + "loss": 0.8977, + "margin_dpo/margin_mean": 12.112241744995117, + "margin_dpo/margin_std": 11.558072090148926, + "step": 71 + }, + { + "KL/chosen_KL_mean": -4.052457809448242, + "KL/mean": -10.506906509399414, + "KL/rejected_KL_mean": -16.96135711669922, + "KL/std": 11.406668663024902, + "epoch": 0.10572687224669604, + "fcm_dpo/beta": 0.0543680340051651, + "fcm_dpo/delta": -0.32582148909568787, + "fcm_dpo/margin": 12.908900260925293, + "fcm_dpo/q_t": 0.34477001428604126, + "grad_norm": 39.94166564941406, + "learning_rate": 4.999868246217933e-07, + "logits/chosen": -0.6469070911407471, + "logits/rejected": -0.6107661128044128, + "logps/chosen": -70.10586547851562, + "logps/ref_chosen": -66.05341339111328, + "logps/ref_rejected": -95.2869873046875, + "logps/rejected": -112.24834442138672, + "loss": 0.9069, + "margin_dpo/margin_mean": 12.908900260925293, + "margin_dpo/margin_std": 13.00861930847168, + "step": 72 + }, + { + "KL/chosen_KL_mean": -5.937223434448242, + "KL/mean": -12.432035446166992, + "KL/rejected_KL_mean": -18.92684555053711, + "KL/std": 15.173410415649414, + "epoch": 0.10719530102790015, + "fcm_dpo/beta": 0.05116545781493187, + "fcm_dpo/delta": -0.28520524501800537, + "fcm_dpo/margin": 12.989622116088867, + "fcm_dpo/q_t": 0.36612752079963684, + "grad_norm": 38.19060134887695, + "learning_rate": 4.999703557245192e-07, + "logits/chosen": -0.6732739806175232, + "logits/rejected": -0.6307477951049805, + "logps/chosen": -72.19349670410156, + "logps/ref_chosen": -66.25627136230469, + "logps/ref_rejected": -90.45613098144531, + "logps/rejected": -109.38298034667969, + "loss": 1.0119, + "margin_dpo/margin_mean": 12.989622116088867, + "margin_dpo/margin_std": 18.78784942626953, + "step": 73 + }, + { + "KL/chosen_KL_mean": -6.64216423034668, + "KL/mean": -13.723569869995117, + "KL/rejected_KL_mean": -20.804977416992188, + "KL/std": 16.866836547851562, + "epoch": 0.10866372980910426, + "fcm_dpo/beta": 0.048250216990709305, + "fcm_dpo/delta": -0.3053615391254425, + "fcm_dpo/margin": 14.162809371948242, + "fcm_dpo/q_t": 0.3610179126262665, + "grad_norm": 39.493221282958984, + "learning_rate": 4.999472998758977e-07, + "logits/chosen": -0.6567627191543579, + "logits/rejected": -0.6456471681594849, + "logps/chosen": -60.067047119140625, + "logps/ref_chosen": -53.42488098144531, + "logps/ref_rejected": -95.94693756103516, + "logps/rejected": -116.75191497802734, + "loss": 0.9834, + "margin_dpo/margin_mean": 14.162809371948242, + "margin_dpo/margin_std": 20.69675636291504, + "step": 74 + }, + { + "KL/chosen_KL_mean": -6.570465087890625, + "KL/mean": -16.630451202392578, + "KL/rejected_KL_mean": -26.6904296875, + "KL/std": 17.67294692993164, + "epoch": 0.11013215859030837, + "fcm_dpo/beta": 0.04431544989347458, + "fcm_dpo/delta": -0.5371890068054199, + "fcm_dpo/margin": 20.119972229003906, + "fcm_dpo/q_t": 0.31586113572120667, + "grad_norm": 34.22724533081055, + "learning_rate": 4.999176576834721e-07, + "logits/chosen": -0.6665393114089966, + "logits/rejected": -0.656915009021759, + "logps/chosen": -58.43212890625, + "logps/ref_chosen": -51.861663818359375, + "logps/ref_rejected": -111.25398254394531, + "logps/rejected": -137.9444122314453, + "loss": 0.832, + "margin_dpo/margin_mean": 20.119970321655273, + "margin_dpo/margin_std": 19.77768325805664, + "step": 75 + }, + { + "KL/chosen_KL_mean": -8.186553955078125, + "KL/mean": -14.74972915649414, + "KL/rejected_KL_mean": -21.31290054321289, + "KL/std": 14.5498685836792, + "epoch": 0.11160058737151249, + "fcm_dpo/beta": 0.041812874376773834, + "fcm_dpo/delta": -0.1574609875679016, + "fcm_dpo/margin": 13.126352310180664, + "fcm_dpo/q_t": 0.3751528859138489, + "grad_norm": 34.15813446044922, + "learning_rate": 4.998814299283415e-07, + "logits/chosen": -0.6963686943054199, + "logits/rejected": -0.6531896591186523, + "logps/chosen": -61.45259094238281, + "logps/ref_chosen": -53.26603698730469, + "logps/ref_rejected": -78.21662902832031, + "logps/rejected": -99.52952575683594, + "loss": 1.0058, + "margin_dpo/margin_mean": 13.126352310180664, + "margin_dpo/margin_std": 15.78203010559082, + "step": 76 + }, + { + "KL/chosen_KL_mean": -7.309167861938477, + "KL/mean": -17.81867218017578, + "KL/rejected_KL_mean": -28.328174591064453, + "KL/std": 19.93675994873047, + "epoch": 0.1130690161527166, + "fcm_dpo/beta": 0.03852991759777069, + "fcm_dpo/delta": -0.4501330256462097, + "fcm_dpo/margin": 21.019004821777344, + "fcm_dpo/q_t": 0.3251643776893616, + "grad_norm": 34.96170425415039, + "learning_rate": 4.998386175651409e-07, + "logits/chosen": -0.6893630623817444, + "logits/rejected": -0.6500611305236816, + "logps/chosen": -65.40584564208984, + "logps/ref_chosen": -58.0966796875, + "logps/ref_rejected": -93.77361297607422, + "logps/rejected": -122.10179138183594, + "loss": 0.8772, + "margin_dpo/margin_mean": 21.019004821777344, + "margin_dpo/margin_std": 21.777103424072266, + "step": 77 + }, + { + "KL/chosen_KL_mean": -7.4875030517578125, + "KL/mean": -15.621191024780273, + "KL/rejected_KL_mean": -23.754878997802734, + "KL/std": 16.25094223022461, + "epoch": 0.1145374449339207, + "fcm_dpo/beta": 0.03683791682124138, + "fcm_dpo/delta": -0.21150818467140198, + "fcm_dpo/margin": 16.267370223999023, + "fcm_dpo/q_t": 0.36780738830566406, + "grad_norm": 31.654874801635742, + "learning_rate": 4.997892217220159e-07, + "logits/chosen": -0.6330477595329285, + "logits/rejected": -0.6059073209762573, + "logps/chosen": -63.101287841796875, + "logps/ref_chosen": -55.61378479003906, + "logps/ref_rejected": -84.93436431884766, + "logps/rejected": -108.68923950195312, + "loss": 0.976, + "margin_dpo/margin_mean": 16.267372131347656, + "margin_dpo/margin_std": 18.412311553955078, + "step": 78 + }, + { + "KL/chosen_KL_mean": -7.873466491699219, + "KL/mean": -16.800203323364258, + "KL/rejected_KL_mean": -25.726943969726562, + "KL/std": 18.54357147216797, + "epoch": 0.11600587371512482, + "fcm_dpo/beta": 0.03503450006246567, + "fcm_dpo/delta": -0.24067077040672302, + "fcm_dpo/margin": 17.853469848632812, + "fcm_dpo/q_t": 0.36850211024284363, + "grad_norm": 28.002782821655273, + "learning_rate": 4.997332437005931e-07, + "logits/chosen": -0.67842698097229, + "logits/rejected": -0.6490979194641113, + "logps/chosen": -63.323951721191406, + "logps/ref_chosen": -55.45048522949219, + "logps/ref_rejected": -87.64756774902344, + "logps/rejected": -113.37451171875, + "loss": 0.9887, + "margin_dpo/margin_mean": 17.85346794128418, + "margin_dpo/margin_std": 22.688926696777344, + "step": 79 + }, + { + "KL/chosen_KL_mean": -10.66299057006836, + "KL/mean": -18.957509994506836, + "KL/rejected_KL_mean": -27.252025604248047, + "KL/std": 19.518186569213867, + "epoch": 0.11747430249632893, + "fcm_dpo/beta": 0.03348912298679352, + "fcm_dpo/delta": -0.16750264167785645, + "fcm_dpo/margin": 16.589031219482422, + "fcm_dpo/q_t": 0.3835224509239197, + "grad_norm": 30.331220626831055, + "learning_rate": 4.996706849759452e-07, + "logits/chosen": -0.7170759439468384, + "logits/rejected": -0.6725642085075378, + "logps/chosen": -69.18228149414062, + "logps/ref_chosen": -58.519290924072266, + "logps/ref_rejected": -87.54750061035156, + "logps/rejected": -114.79953002929688, + "loss": 1.0334, + "margin_dpo/margin_mean": 16.589031219482422, + "margin_dpo/margin_std": 22.471614837646484, + "step": 80 + }, + { + "KL/chosen_KL_mean": -9.806392669677734, + "KL/mean": -21.405792236328125, + "KL/rejected_KL_mean": -33.00519561767578, + "KL/std": 23.604204177856445, + "epoch": 0.11894273127753303, + "fcm_dpo/beta": 0.03153174743056297, + "fcm_dpo/delta": -0.3608952760696411, + "fcm_dpo/margin": 23.198793411254883, + "fcm_dpo/q_t": 0.347603440284729, + "grad_norm": 30.84009552001953, + "learning_rate": 4.996015471965529e-07, + "logits/chosen": -0.710389256477356, + "logits/rejected": -0.679502010345459, + "logps/chosen": -76.25526428222656, + "logps/ref_chosen": -66.44886779785156, + "logps/ref_rejected": -129.66270446777344, + "logps/rejected": -162.66790771484375, + "loss": 0.9328, + "margin_dpo/margin_mean": 23.198793411254883, + "margin_dpo/margin_std": 27.952896118164062, + "step": 81 + }, + { + "KL/chosen_KL_mean": -12.119726181030273, + "KL/mean": -21.315797805786133, + "KL/rejected_KL_mean": -30.51186752319336, + "KL/std": 21.45583724975586, + "epoch": 0.12041116005873716, + "fcm_dpo/beta": 0.030675500631332397, + "fcm_dpo/delta": -0.17443646490573883, + "fcm_dpo/margin": 18.392139434814453, + "fcm_dpo/q_t": 0.38255757093429565, + "grad_norm": 33.19075012207031, + "learning_rate": 4.995258321842611e-07, + "logits/chosen": -0.6418750286102295, + "logits/rejected": -0.628775954246521, + "logps/chosen": -64.35211181640625, + "logps/ref_chosen": -52.232383728027344, + "logps/ref_rejected": -90.74325561523438, + "logps/rejected": -121.25511932373047, + "loss": 1.0751, + "margin_dpo/margin_mean": 18.392139434814453, + "margin_dpo/margin_std": 29.462291717529297, + "step": 82 + }, + { + "KL/chosen_KL_mean": -12.515398025512695, + "KL/mean": -23.42616081237793, + "KL/rejected_KL_mean": -34.3369255065918, + "KL/std": 22.522113800048828, + "epoch": 0.12187958883994127, + "fcm_dpo/beta": 0.028894957154989243, + "fcm_dpo/delta": -0.2471744269132614, + "fcm_dpo/margin": 21.821533203125, + "fcm_dpo/q_t": 0.3641561269760132, + "grad_norm": 31.546810150146484, + "learning_rate": 4.994435419342304e-07, + "logits/chosen": -0.6872934103012085, + "logits/rejected": -0.6507028937339783, + "logps/chosen": -68.34278869628906, + "logps/ref_chosen": -55.82738494873047, + "logps/ref_rejected": -103.71589660644531, + "logps/rejected": -138.05282592773438, + "loss": 0.978, + "margin_dpo/margin_mean": 21.821533203125, + "margin_dpo/margin_std": 26.563232421875, + "step": 83 + }, + { + "KL/chosen_KL_mean": -11.519216537475586, + "KL/mean": -21.042236328125, + "KL/rejected_KL_mean": -30.565250396728516, + "KL/std": 19.391069412231445, + "epoch": 0.12334801762114538, + "fcm_dpo/beta": 0.027821559458971024, + "fcm_dpo/delta": -0.13951639831066132, + "fcm_dpo/margin": 19.046039581298828, + "fcm_dpo/q_t": 0.3775210976600647, + "grad_norm": 27.340347290039062, + "learning_rate": 4.993546786148857e-07, + "logits/chosen": -0.6515509486198425, + "logits/rejected": -0.6115979552268982, + "logps/chosen": -78.69538879394531, + "logps/ref_chosen": -67.1761703491211, + "logps/ref_rejected": -87.29859924316406, + "logps/rejected": -117.86385345458984, + "loss": 0.9982, + "margin_dpo/margin_mean": 19.046039581298828, + "margin_dpo/margin_std": 19.851577758789062, + "step": 84 + }, + { + "KL/chosen_KL_mean": -12.147787094116211, + "KL/mean": -21.90998077392578, + "KL/rejected_KL_mean": -31.67218017578125, + "KL/std": 20.009681701660156, + "epoch": 0.12481644640234948, + "fcm_dpo/beta": 0.02746494486927986, + "fcm_dpo/delta": -0.14406868815422058, + "fcm_dpo/margin": 19.524394989013672, + "fcm_dpo/q_t": 0.38153764605522156, + "grad_norm": 27.906219482421875, + "learning_rate": 4.992592445678582e-07, + "logits/chosen": -0.6355807185173035, + "logits/rejected": -0.6021965742111206, + "logps/chosen": -70.55440521240234, + "logps/ref_chosen": -58.4066162109375, + "logps/ref_rejected": -78.63880157470703, + "logps/rejected": -110.31098175048828, + "loss": 1.0232, + "margin_dpo/margin_mean": 19.524394989013672, + "margin_dpo/margin_std": 23.956645965576172, + "step": 85 + }, + { + "KL/chosen_KL_mean": -15.925954818725586, + "KL/mean": -26.277530670166016, + "KL/rejected_KL_mean": -36.62910461425781, + "KL/std": 27.010652542114258, + "epoch": 0.1262848751835536, + "fcm_dpo/beta": 0.026533078402280807, + "fcm_dpo/delta": -0.15769629180431366, + "fcm_dpo/margin": 20.703153610229492, + "fcm_dpo/q_t": 0.393225759267807, + "grad_norm": 31.247163772583008, + "learning_rate": 4.991572423079235e-07, + "logits/chosen": -0.6769875288009644, + "logits/rejected": -0.6635218858718872, + "logps/chosen": -72.06341552734375, + "logps/ref_chosen": -56.13746643066406, + "logps/ref_rejected": -88.12165069580078, + "logps/rejected": -124.7507553100586, + "loss": 1.1039, + "margin_dpo/margin_mean": 20.703155517578125, + "margin_dpo/margin_std": 37.23381042480469, + "step": 86 + }, + { + "KL/chosen_KL_mean": -15.697734832763672, + "KL/mean": -27.728618621826172, + "KL/rejected_KL_mean": -39.7595100402832, + "KL/std": 26.763015747070312, + "epoch": 0.1277533039647577, + "fcm_dpo/beta": 0.025222256779670715, + "fcm_dpo/delta": -0.22208669781684875, + "fcm_dpo/margin": 24.061767578125, + "fcm_dpo/q_t": 0.3686726689338684, + "grad_norm": 26.473875045776367, + "learning_rate": 4.990486745229364e-07, + "logits/chosen": -0.7123448252677917, + "logits/rejected": -0.6856144666671753, + "logps/chosen": -71.33383178710938, + "logps/ref_chosen": -55.63609313964844, + "logps/ref_rejected": -95.46757507324219, + "logps/rejected": -135.22708129882812, + "loss": 1.0143, + "margin_dpo/margin_mean": 24.061767578125, + "margin_dpo/margin_std": 32.34611892700195, + "step": 87 + }, + { + "KL/chosen_KL_mean": -19.72933578491211, + "KL/mean": -28.825397491455078, + "KL/rejected_KL_mean": -37.921451568603516, + "KL/std": 27.96208953857422, + "epoch": 0.12922173274596183, + "fcm_dpo/beta": 0.02473517321050167, + "fcm_dpo/delta": -0.05327114462852478, + "fcm_dpo/margin": 18.192121505737305, + "fcm_dpo/q_t": 0.40377742052078247, + "grad_norm": 27.7163028717041, + "learning_rate": 4.989335440737586e-07, + "logits/chosen": -0.6769958734512329, + "logits/rejected": -0.6691812872886658, + "logps/chosen": -93.40048217773438, + "logps/ref_chosen": -73.67115020751953, + "logps/ref_rejected": -106.70849609375, + "logps/rejected": -144.62994384765625, + "loss": 1.118, + "margin_dpo/margin_mean": 18.192121505737305, + "margin_dpo/margin_std": 29.348196029663086, + "step": 88 + }, + { + "KL/chosen_KL_mean": -12.14109992980957, + "KL/mean": -22.370540618896484, + "KL/rejected_KL_mean": -32.59998321533203, + "KL/std": 23.696063995361328, + "epoch": 0.13069016152716592, + "fcm_dpo/beta": 0.024530138820409775, + "fcm_dpo/delta": -0.10719307512044907, + "fcm_dpo/margin": 20.458881378173828, + "fcm_dpo/q_t": 0.3884883522987366, + "grad_norm": 25.34421730041504, + "learning_rate": 4.988118539941847e-07, + "logits/chosen": -0.7158247232437134, + "logits/rejected": -0.6819084882736206, + "logps/chosen": -72.7660140991211, + "logps/ref_chosen": -60.624916076660156, + "logps/ref_rejected": -82.08354949951172, + "logps/rejected": -114.68353271484375, + "loss": 1.0395, + "margin_dpo/margin_mean": 20.45888328552246, + "margin_dpo/margin_std": 26.786773681640625, + "step": 89 + }, + { + "KL/chosen_KL_mean": -15.632135391235352, + "KL/mean": -29.54084014892578, + "KL/rejected_KL_mean": -43.449546813964844, + "KL/std": 32.48262023925781, + "epoch": 0.13215859030837004, + "fcm_dpo/beta": 0.02354896441102028, + "fcm_dpo/delta": -0.2722369432449341, + "fcm_dpo/margin": 27.817413330078125, + "fcm_dpo/q_t": 0.3705536723136902, + "grad_norm": 27.808216094970703, + "learning_rate": 4.986836074908615e-07, + "logits/chosen": -0.6693460941314697, + "logits/rejected": -0.6799524426460266, + "logps/chosen": -68.91744995117188, + "logps/ref_chosen": -53.285308837890625, + "logps/ref_rejected": -111.54470825195312, + "logps/rejected": -154.9942626953125, + "loss": 1.0206, + "margin_dpo/margin_mean": 27.817413330078125, + "margin_dpo/margin_std": 40.34447479248047, + "step": 90 + }, + { + "KL/chosen_KL_mean": -16.3182430267334, + "KL/mean": -27.830875396728516, + "KL/rejected_KL_mean": -39.34351348876953, + "KL/std": 26.04579734802246, + "epoch": 0.13362701908957417, + "fcm_dpo/beta": 0.022640112787485123, + "fcm_dpo/delta": -0.1277696192264557, + "fcm_dpo/margin": 23.0252685546875, + "fcm_dpo/q_t": 0.38871896266937256, + "grad_norm": 25.545448303222656, + "learning_rate": 4.985488079432037e-07, + "logits/chosen": -0.6979465484619141, + "logits/rejected": -0.6646697521209717, + "logps/chosen": -78.12120056152344, + "logps/ref_chosen": -61.802955627441406, + "logps/ref_rejected": -87.87395477294922, + "logps/rejected": -127.21746826171875, + "loss": 1.059, + "margin_dpo/margin_mean": 23.025266647338867, + "margin_dpo/margin_std": 33.728050231933594, + "step": 91 + }, + { + "KL/chosen_KL_mean": -14.809152603149414, + "KL/mean": -26.451236724853516, + "KL/rejected_KL_mean": -38.093318939208984, + "KL/std": 26.83241081237793, + "epoch": 0.13509544787077826, + "fcm_dpo/beta": 0.022130444645881653, + "fcm_dpo/delta": -0.12148790061473846, + "fcm_dpo/margin": 23.284168243408203, + "fcm_dpo/q_t": 0.38861751556396484, + "grad_norm": 23.78729248046875, + "learning_rate": 4.984074589033043e-07, + "logits/chosen": -0.7112252712249756, + "logits/rejected": -0.6874991655349731, + "logps/chosen": -66.44992065429688, + "logps/ref_chosen": -51.640769958496094, + "logps/ref_rejected": -77.88117980957031, + "logps/rejected": -115.97450256347656, + "loss": 1.0518, + "margin_dpo/margin_mean": 23.284168243408203, + "margin_dpo/margin_std": 32.566123962402344, + "step": 92 + }, + { + "KL/chosen_KL_mean": -16.61697006225586, + "KL/mean": -28.332927703857422, + "KL/rejected_KL_mean": -40.04888153076172, + "KL/std": 25.330059051513672, + "epoch": 0.13656387665198239, + "fcm_dpo/beta": 0.021535798907279968, + "fcm_dpo/delta": -0.11011452972888947, + "fcm_dpo/margin": 23.431915283203125, + "fcm_dpo/q_t": 0.3877296447753906, + "grad_norm": 24.3586368560791, + "learning_rate": 4.982595640958425e-07, + "logits/chosen": -0.7280140519142151, + "logits/rejected": -0.676377534866333, + "logps/chosen": -69.14620971679688, + "logps/ref_chosen": -52.529239654541016, + "logps/ref_rejected": -77.16075134277344, + "logps/rejected": -117.20962524414062, + "loss": 1.0277, + "margin_dpo/margin_mean": 23.431915283203125, + "margin_dpo/margin_std": 29.139026641845703, + "step": 93 + }, + { + "KL/chosen_KL_mean": -17.951906204223633, + "KL/mean": -31.419082641601562, + "KL/rejected_KL_mean": -44.88625717163086, + "KL/std": 29.03601837158203, + "epoch": 0.13803230543318648, + "fcm_dpo/beta": 0.020746299996972084, + "fcm_dpo/delta": -0.1698264628648758, + "fcm_dpo/margin": 26.934350967407227, + "fcm_dpo/q_t": 0.3750302195549011, + "grad_norm": 24.17359733581543, + "learning_rate": 4.98105127417984e-07, + "logits/chosen": -0.6785788536071777, + "logits/rejected": -0.6629636287689209, + "logps/chosen": -79.17451477050781, + "logps/ref_chosen": -61.22261047363281, + "logps/ref_rejected": -99.59902954101562, + "logps/rejected": -144.48529052734375, + "loss": 0.9928, + "margin_dpo/margin_mean": 26.934350967407227, + "margin_dpo/margin_std": 30.304607391357422, + "step": 94 + }, + { + "KL/chosen_KL_mean": -16.976377487182617, + "KL/mean": -27.94607162475586, + "KL/rejected_KL_mean": -38.91576385498047, + "KL/std": 27.511489868164062, + "epoch": 0.1395007342143906, + "fcm_dpo/beta": 0.02045309543609619, + "fcm_dpo/delta": -0.05145730823278427, + "fcm_dpo/margin": 21.939393997192383, + "fcm_dpo/q_t": 0.3971632122993469, + "grad_norm": 22.47222900390625, + "learning_rate": 4.979441529392784e-07, + "logits/chosen": -0.7069982290267944, + "logits/rejected": -0.6784754991531372, + "logps/chosen": -69.5000228881836, + "logps/ref_chosen": -52.523643493652344, + "logps/ref_rejected": -75.8803482055664, + "logps/rejected": -114.79611206054688, + "loss": 1.0678, + "margin_dpo/margin_mean": 21.939393997192383, + "margin_dpo/margin_std": 28.80935287475586, + "step": 95 + }, + { + "KL/chosen_KL_mean": -16.412687301635742, + "KL/mean": -31.368961334228516, + "KL/rejected_KL_mean": -46.32524108886719, + "KL/std": 31.75434112548828, + "epoch": 0.14096916299559473, + "fcm_dpo/beta": 0.019721299409866333, + "fcm_dpo/delta": -0.20427075028419495, + "fcm_dpo/margin": 29.912555694580078, + "fcm_dpo/q_t": 0.37095409631729126, + "grad_norm": 23.20262336730957, + "learning_rate": 4.977766449015534e-07, + "logits/chosen": -0.7112823128700256, + "logits/rejected": -0.6814401149749756, + "logps/chosen": -78.56965637207031, + "logps/ref_chosen": -62.15697479248047, + "logps/ref_rejected": -96.59601593017578, + "logps/rejected": -142.9212646484375, + "loss": 0.9801, + "margin_dpo/margin_mean": 29.912555694580078, + "margin_dpo/margin_std": 34.870269775390625, + "step": 96 + }, + { + "KL/chosen_KL_mean": -17.793996810913086, + "KL/mean": -29.294153213500977, + "KL/rejected_KL_mean": -40.7943115234375, + "KL/std": 26.314815521240234, + "epoch": 0.14243759177679882, + "fcm_dpo/beta": 0.019657842814922333, + "fcm_dpo/delta": -0.05561104789376259, + "fcm_dpo/margin": 23.000316619873047, + "fcm_dpo/q_t": 0.395273894071579, + "grad_norm": 23.92318344116211, + "learning_rate": 4.976026077188012e-07, + "logits/chosen": -0.6561405658721924, + "logits/rejected": -0.6128396987915039, + "logps/chosen": -72.44036102294922, + "logps/ref_chosen": -54.646366119384766, + "logps/ref_rejected": -76.96475219726562, + "logps/rejected": -117.75906372070312, + "loss": 1.0535, + "margin_dpo/margin_mean": 23.000316619873047, + "margin_dpo/margin_std": 26.310195922851562, + "step": 97 + }, + { + "KL/chosen_KL_mean": -21.98511505126953, + "KL/mean": -35.2052001953125, + "KL/rejected_KL_mean": -48.42529296875, + "KL/std": 30.05870819091797, + "epoch": 0.14390602055800295, + "fcm_dpo/beta": 0.019086042419075966, + "fcm_dpo/delta": -0.11044582724571228, + "fcm_dpo/margin": 26.44017791748047, + "fcm_dpo/q_t": 0.3849552869796753, + "grad_norm": 24.65069580078125, + "learning_rate": 4.974220459770639e-07, + "logits/chosen": -0.6879181265830994, + "logits/rejected": -0.6715967059135437, + "logps/chosen": -87.24374389648438, + "logps/ref_chosen": -65.25862884521484, + "logps/ref_rejected": -96.5274887084961, + "logps/rejected": -144.95278930664062, + "loss": 1.0538, + "margin_dpo/margin_mean": 26.44017791748047, + "margin_dpo/margin_std": 36.05833053588867, + "step": 98 + }, + { + "KL/chosen_KL_mean": -17.362274169921875, + "KL/mean": -33.471343994140625, + "KL/rejected_KL_mean": -49.580406188964844, + "KL/std": 32.57749938964844, + "epoch": 0.14537444933920704, + "fcm_dpo/beta": 0.01838843896985054, + "fcm_dpo/delta": -0.20529845356941223, + "fcm_dpo/margin": 32.21813201904297, + "fcm_dpo/q_t": 0.373318076133728, + "grad_norm": 21.651269912719727, + "learning_rate": 4.972349644343108e-07, + "logits/chosen": -0.6547946929931641, + "logits/rejected": -0.6550130248069763, + "logps/chosen": -63.00075912475586, + "logps/ref_chosen": -45.638484954833984, + "logps/ref_rejected": -86.43793487548828, + "logps/rejected": -136.01834106445312, + "loss": 0.9905, + "margin_dpo/margin_mean": 32.21813201904297, + "margin_dpo/margin_std": 40.488311767578125, + "step": 99 + }, + { + "KL/chosen_KL_mean": -19.682992935180664, + "KL/mean": -29.148677825927734, + "KL/rejected_KL_mean": -38.6143684387207, + "KL/std": 26.299453735351562, + "epoch": 0.14684287812041116, + "fcm_dpo/beta": 0.01846114918589592, + "fcm_dpo/delta": 0.05179622396826744, + "fcm_dpo/margin": 18.93136978149414, + "fcm_dpo/q_t": 0.42105787992477417, + "grad_norm": 24.000566482543945, + "learning_rate": 4.970413680203148e-07, + "logits/chosen": -0.6635209321975708, + "logits/rejected": -0.6204158663749695, + "logps/chosen": -77.2769775390625, + "logps/ref_chosen": -57.59397888183594, + "logps/ref_rejected": -74.06021118164062, + "logps/rejected": -112.67457580566406, + "loss": 1.1631, + "margin_dpo/margin_mean": 18.93136978149414, + "margin_dpo/margin_std": 33.863746643066406, + "step": 100 + }, + { + "KL/chosen_KL_mean": -24.127986907958984, + "KL/mean": -35.636356353759766, + "KL/rejected_KL_mean": -47.14472198486328, + "KL/std": 32.51232147216797, + "epoch": 0.14831130690161526, + "fcm_dpo/beta": 0.018270574510097504, + "fcm_dpo/delta": -0.022218167781829834, + "fcm_dpo/margin": 23.0167293548584, + "fcm_dpo/q_t": 0.4101085960865021, + "grad_norm": 23.815645217895508, + "learning_rate": 4.968412618365215e-07, + "logits/chosen": -0.696588933467865, + "logits/rejected": -0.6663883924484253, + "logps/chosen": -85.77684020996094, + "logps/ref_chosen": -61.64885330200195, + "logps/ref_rejected": -83.18968200683594, + "logps/rejected": -130.33441162109375, + "loss": 1.125, + "margin_dpo/margin_mean": 23.01673126220703, + "margin_dpo/margin_std": 39.10064697265625, + "step": 101 + }, + { + "KL/chosen_KL_mean": -26.22239112854004, + "KL/mean": -34.66426086425781, + "KL/rejected_KL_mean": -43.10613250732422, + "KL/std": 30.4959659576416, + "epoch": 0.14977973568281938, + "fcm_dpo/beta": 0.018309336155653, + "fcm_dpo/delta": -0.024285031482577324, + "fcm_dpo/margin": 16.883745193481445, + "fcm_dpo/q_t": 0.43124186992645264, + "grad_norm": 26.869401931762695, + "learning_rate": 4.966346511559149e-07, + "logits/chosen": -0.7078909873962402, + "logits/rejected": -0.6638644933700562, + "logps/chosen": -90.30126953125, + "logps/ref_chosen": -64.0788803100586, + "logps/ref_rejected": -68.18707275390625, + "logps/rejected": -111.293212890625, + "loss": 1.2061, + "margin_dpo/margin_mean": 16.883745193481445, + "margin_dpo/margin_std": 35.99235534667969, + "step": 102 + }, + { + "KL/chosen_KL_mean": -20.61368179321289, + "KL/mean": -37.49169158935547, + "KL/rejected_KL_mean": -54.369693756103516, + "KL/std": 33.84938049316406, + "epoch": 0.1512481644640235, + "fcm_dpo/beta": 0.017673712223768234, + "fcm_dpo/delta": -0.2095203697681427, + "fcm_dpo/margin": 33.75600814819336, + "fcm_dpo/q_t": 0.37030476331710815, + "grad_norm": 23.006120681762695, + "learning_rate": 4.964215414228785e-07, + "logits/chosen": -0.6496819257736206, + "logits/rejected": -0.6131845116615295, + "logps/chosen": -81.9129638671875, + "logps/ref_chosen": -61.299278259277344, + "logps/ref_rejected": -93.57270812988281, + "logps/rejected": -147.94241333007812, + "loss": 0.9829, + "margin_dpo/margin_mean": 33.756011962890625, + "margin_dpo/margin_std": 40.593528747558594, + "step": 103 + }, + { + "KL/chosen_KL_mean": -22.588258743286133, + "KL/mean": -38.630088806152344, + "KL/rejected_KL_mean": -54.67192077636719, + "KL/std": 37.43622589111328, + "epoch": 0.1527165932452276, + "fcm_dpo/beta": 0.017140310257673264, + "fcm_dpo/delta": -0.15842567384243011, + "fcm_dpo/margin": 32.083656311035156, + "fcm_dpo/q_t": 0.3846966028213501, + "grad_norm": 22.44390296936035, + "learning_rate": 4.96201938253052e-07, + "logits/chosen": -0.7081250548362732, + "logits/rejected": -0.6775051355361938, + "logps/chosen": -76.96102905273438, + "logps/ref_chosen": -54.372772216796875, + "logps/ref_rejected": -89.5647201538086, + "logps/rejected": -144.23663330078125, + "loss": 1.0402, + "margin_dpo/margin_mean": 32.083656311035156, + "margin_dpo/margin_std": 45.53196334838867, + "step": 104 + }, + { + "KL/chosen_KL_mean": -21.200937271118164, + "KL/mean": -44.030094146728516, + "KL/rejected_KL_mean": -66.8592529296875, + "KL/std": 36.557212829589844, + "epoch": 0.15418502202643172, + "fcm_dpo/beta": 0.016101296991109848, + "fcm_dpo/delta": -0.36145851016044617, + "fcm_dpo/margin": 45.65831756591797, + "fcm_dpo/q_t": 0.33412182331085205, + "grad_norm": 23.292463302612305, + "learning_rate": 4.959758474331832e-07, + "logits/chosen": -0.6853151321411133, + "logits/rejected": -0.6624854803085327, + "logps/chosen": -75.83988189697266, + "logps/ref_chosen": -54.638946533203125, + "logps/ref_rejected": -97.97351837158203, + "logps/rejected": -164.832763671875, + "loss": 0.8698, + "margin_dpo/margin_mean": 45.65831756591797, + "margin_dpo/margin_std": 40.13270568847656, + "step": 105 + }, + { + "KL/chosen_KL_mean": -23.751272201538086, + "KL/mean": -38.02503967285156, + "KL/rejected_KL_mean": -52.298805236816406, + "KL/std": 31.860212326049805, + "epoch": 0.15565345080763582, + "fcm_dpo/beta": 0.015641074627637863, + "fcm_dpo/delta": -0.04885800927877426, + "fcm_dpo/margin": 28.547529220581055, + "fcm_dpo/q_t": 0.3972783088684082, + "grad_norm": 22.089712142944336, + "learning_rate": 4.957432749209755e-07, + "logits/chosen": -0.6288084983825684, + "logits/rejected": -0.5959875583648682, + "logps/chosen": -78.58416748046875, + "logps/ref_chosen": -54.83289337158203, + "logps/ref_rejected": -85.22461700439453, + "logps/rejected": -137.52342224121094, + "loss": 1.0582, + "margin_dpo/margin_mean": 28.547531127929688, + "margin_dpo/margin_std": 34.79780197143555, + "step": 106 + }, + { + "KL/chosen_KL_mean": -28.478961944580078, + "KL/mean": -44.62788009643555, + "KL/rejected_KL_mean": -60.77680206298828, + "KL/std": 38.920860290527344, + "epoch": 0.15712187958883994, + "fcm_dpo/beta": 0.015334920957684517, + "fcm_dpo/delta": -0.10102778673171997, + "fcm_dpo/margin": 32.29783630371094, + "fcm_dpo/q_t": 0.3877396583557129, + "grad_norm": 21.205881118774414, + "learning_rate": 4.955042268449307e-07, + "logits/chosen": -0.6800429821014404, + "logits/rejected": -0.6334393620491028, + "logps/chosen": -98.18677520751953, + "logps/ref_chosen": -69.70780944824219, + "logps/ref_rejected": -94.73950958251953, + "logps/rejected": -155.5163116455078, + "loss": 1.0433, + "margin_dpo/margin_mean": 32.29783630371094, + "margin_dpo/margin_std": 41.090171813964844, + "step": 107 + }, + { + "KL/chosen_KL_mean": -25.73700523376465, + "KL/mean": -44.92538833618164, + "KL/rejected_KL_mean": -64.11376953125, + "KL/std": 42.71458435058594, + "epoch": 0.15859030837004406, + "fcm_dpo/beta": 0.014912154525518417, + "fcm_dpo/delta": -0.18271130323410034, + "fcm_dpo/margin": 38.376766204833984, + "fcm_dpo/q_t": 0.3810121417045593, + "grad_norm": 21.520112991333008, + "learning_rate": 4.952587095041881e-07, + "logits/chosen": -0.6687978506088257, + "logits/rejected": -0.646253228187561, + "logps/chosen": -81.74688720703125, + "logps/ref_chosen": -56.0098876953125, + "logps/ref_rejected": -95.79601287841797, + "logps/rejected": -159.9097900390625, + "loss": 1.039, + "margin_dpo/margin_mean": 38.376766204833984, + "margin_dpo/margin_std": 55.44823455810547, + "step": 108 + }, + { + "KL/chosen_KL_mean": -24.07698631286621, + "KL/mean": -44.76786804199219, + "KL/rejected_KL_mean": -65.45874786376953, + "KL/std": 41.37312316894531, + "epoch": 0.16005873715124816, + "fcm_dpo/beta": 0.014279071241617203, + "fcm_dpo/delta": -0.20386075973510742, + "fcm_dpo/margin": 41.38175964355469, + "fcm_dpo/q_t": 0.3684207797050476, + "grad_norm": 21.89470100402832, + "learning_rate": 4.95006729368358e-07, + "logits/chosen": -0.6142909526824951, + "logits/rejected": -0.5930590629577637, + "logps/chosen": -86.96247863769531, + "logps/ref_chosen": -62.88549041748047, + "logps/ref_rejected": -98.68573760986328, + "logps/rejected": -164.1444854736328, + "loss": 0.9868, + "margin_dpo/margin_mean": 41.38175964355469, + "margin_dpo/margin_std": 48.550384521484375, + "step": 109 + }, + { + "KL/chosen_KL_mean": -24.62155532836914, + "KL/mean": -43.333526611328125, + "KL/rejected_KL_mean": -62.04550552368164, + "KL/std": 41.19173049926758, + "epoch": 0.16152716593245228, + "fcm_dpo/beta": 0.013750969432294369, + "fcm_dpo/delta": -0.12534289062023163, + "fcm_dpo/margin": 37.42394256591797, + "fcm_dpo/q_t": 0.3864942193031311, + "grad_norm": 19.2232608795166, + "learning_rate": 4.947482930773511e-07, + "logits/chosen": -0.608720064163208, + "logits/rejected": -0.5711803436279297, + "logps/chosen": -83.375244140625, + "logps/ref_chosen": -58.753684997558594, + "logps/ref_rejected": -79.75001525878906, + "logps/rejected": -141.79551696777344, + "loss": 1.0488, + "margin_dpo/margin_mean": 37.42394256591797, + "margin_dpo/margin_std": 49.388160705566406, + "step": 110 + }, + { + "KL/chosen_KL_mean": -28.314008712768555, + "KL/mean": -49.46540832519531, + "KL/rejected_KL_mean": -70.61680603027344, + "KL/std": 46.69132995605469, + "epoch": 0.16299559471365638, + "fcm_dpo/beta": 0.01339393574744463, + "fcm_dpo/delta": -0.17819947004318237, + "fcm_dpo/margin": 42.30280303955078, + "fcm_dpo/q_t": 0.37644529342651367, + "grad_norm": 21.545623779296875, + "learning_rate": 4.944834074412042e-07, + "logits/chosen": -0.6583347320556641, + "logits/rejected": -0.6357636451721191, + "logps/chosen": -96.9381103515625, + "logps/ref_chosen": -68.62410736083984, + "logps/ref_rejected": -98.42886352539062, + "logps/rejected": -169.04566955566406, + "loss": 1.0303, + "margin_dpo/margin_mean": 42.30280303955078, + "margin_dpo/margin_std": 57.269569396972656, + "step": 111 + }, + { + "KL/chosen_KL_mean": -26.175884246826172, + "KL/mean": -39.6971435546875, + "KL/rejected_KL_mean": -53.2183952331543, + "KL/std": 32.65059280395508, + "epoch": 0.1644640234948605, + "fcm_dpo/beta": 0.0133673045784235, + "fcm_dpo/delta": 0.03976195305585861, + "fcm_dpo/margin": 27.04251480102539, + "fcm_dpo/q_t": 0.41706275939941406, + "grad_norm": 19.833799362182617, + "learning_rate": 4.942120794399002e-07, + "logits/chosen": -0.6356754302978516, + "logits/rejected": -0.5966525077819824, + "logps/chosen": -76.42552185058594, + "logps/ref_chosen": -50.24964141845703, + "logps/ref_rejected": -64.77442932128906, + "logps/rejected": -117.99282836914062, + "loss": 1.1238, + "margin_dpo/margin_mean": 27.042512893676758, + "margin_dpo/margin_std": 39.28888702392578, + "step": 112 + }, + { + "KL/chosen_KL_mean": -32.902503967285156, + "KL/mean": -47.536865234375, + "KL/rejected_KL_mean": -62.17121887207031, + "KL/std": 34.14165496826172, + "epoch": 0.16593245227606462, + "fcm_dpo/beta": 0.013465975411236286, + "fcm_dpo/delta": 0.006106908433139324, + "fcm_dpo/margin": 29.26871109008789, + "fcm_dpo/q_t": 0.40915369987487793, + "grad_norm": 20.532014846801758, + "learning_rate": 4.939343162231841e-07, + "logits/chosen": -0.5950823426246643, + "logits/rejected": -0.5499939322471619, + "logps/chosen": -99.6154556274414, + "logps/ref_chosen": -66.71295166015625, + "logps/ref_rejected": -77.96870422363281, + "logps/rejected": -140.13992309570312, + "loss": 1.0911, + "margin_dpo/margin_mean": 29.26871109008789, + "margin_dpo/margin_std": 37.83106231689453, + "step": 113 + }, + { + "KL/chosen_KL_mean": -30.239316940307617, + "KL/mean": -53.44865417480469, + "KL/rejected_KL_mean": -76.65798950195312, + "KL/std": 51.340576171875, + "epoch": 0.16740088105726872, + "fcm_dpo/beta": 0.012956779450178146, + "fcm_dpo/delta": -0.21660971641540527, + "fcm_dpo/margin": 46.41866683959961, + "fcm_dpo/q_t": 0.3740847706794739, + "grad_norm": 21.725406646728516, + "learning_rate": 4.936501251103751e-07, + "logits/chosen": -0.6112878918647766, + "logits/rejected": -0.5779776573181152, + "logps/chosen": -88.02439880371094, + "logps/ref_chosen": -57.78507995605469, + "logps/ref_rejected": -87.10966491699219, + "logps/rejected": -163.7676544189453, + "loss": 0.9983, + "margin_dpo/margin_mean": 46.41866683959961, + "margin_dpo/margin_std": 62.333763122558594, + "step": 114 + }, + { + "KL/chosen_KL_mean": -39.5991325378418, + "KL/mean": -55.670440673828125, + "KL/rejected_KL_mean": -71.74173736572266, + "KL/std": 49.06171417236328, + "epoch": 0.16886930983847284, + "fcm_dpo/beta": 0.01290312223136425, + "fcm_dpo/delta": -0.015496611595153809, + "fcm_dpo/margin": 32.142608642578125, + "fcm_dpo/q_t": 0.41342228651046753, + "grad_norm": 31.30410385131836, + "learning_rate": 4.933595135901732e-07, + "logits/chosen": -0.6504275798797607, + "logits/rejected": -0.6302141547203064, + "logps/chosen": -105.18177795410156, + "logps/ref_chosen": -65.5826416015625, + "logps/ref_rejected": -98.56552124023438, + "logps/rejected": -170.3072509765625, + "loss": 1.163, + "margin_dpo/margin_mean": 32.142608642578125, + "margin_dpo/margin_std": 64.65020751953125, + "step": 115 + }, + { + "KL/chosen_KL_mean": -29.908945083618164, + "KL/mean": -47.60071563720703, + "KL/rejected_KL_mean": -65.29248809814453, + "KL/std": 41.23687744140625, + "epoch": 0.17033773861967694, + "fcm_dpo/beta": 0.012806812301278114, + "fcm_dpo/delta": -0.05582479387521744, + "fcm_dpo/margin": 35.38352966308594, + "fcm_dpo/q_t": 0.397646963596344, + "grad_norm": 21.841028213500977, + "learning_rate": 4.930624893204624e-07, + "logits/chosen": -0.6186249256134033, + "logits/rejected": -0.608126163482666, + "logps/chosen": -81.30925750732422, + "logps/ref_chosen": -51.40031433105469, + "logps/ref_rejected": -80.5218505859375, + "logps/rejected": -145.8143310546875, + "loss": 1.0577, + "margin_dpo/margin_mean": 35.38352966308594, + "margin_dpo/margin_std": 44.40971374511719, + "step": 116 + }, + { + "KL/chosen_KL_mean": -38.531761169433594, + "KL/mean": -52.92847442626953, + "KL/rejected_KL_mean": -67.3251953125, + "KL/std": 44.12018585205078, + "epoch": 0.17180616740088106, + "fcm_dpo/beta": 0.012780335731804371, + "fcm_dpo/delta": 0.03322294354438782, + "fcm_dpo/margin": 28.793424606323242, + "fcm_dpo/q_t": 0.41756001114845276, + "grad_norm": 28.672874450683594, + "learning_rate": 4.927590601281083e-07, + "logits/chosen": -0.590896487236023, + "logits/rejected": -0.5536011457443237, + "logps/chosen": -107.83016967773438, + "logps/ref_chosen": -69.29840850830078, + "logps/ref_rejected": -66.583984375, + "logps/rejected": -133.9091796875, + "loss": 1.1456, + "margin_dpo/margin_mean": 28.793426513671875, + "margin_dpo/margin_std": 50.167694091796875, + "step": 117 + }, + { + "KL/chosen_KL_mean": -30.69570541381836, + "KL/mean": -48.43621826171875, + "KL/rejected_KL_mean": -66.17672729492188, + "KL/std": 40.15031814575195, + "epoch": 0.17327459618208516, + "fcm_dpo/beta": 0.01270340196788311, + "fcm_dpo/delta": -0.05310482531785965, + "fcm_dpo/margin": 35.48102569580078, + "fcm_dpo/q_t": 0.3987045884132385, + "grad_norm": 21.261783599853516, + "learning_rate": 4.924492340087524e-07, + "logits/chosen": -0.6285079717636108, + "logits/rejected": -0.6085944771766663, + "logps/chosen": -86.33668518066406, + "logps/ref_chosen": -55.6409797668457, + "logps/ref_rejected": -75.66905975341797, + "logps/rejected": -141.84579467773438, + "loss": 1.0621, + "margin_dpo/margin_mean": 35.48102569580078, + "margin_dpo/margin_std": 45.933509826660156, + "step": 118 + }, + { + "KL/chosen_KL_mean": -42.2201042175293, + "KL/mean": -59.702537536621094, + "KL/rejected_KL_mean": -77.18496704101562, + "KL/std": 45.075523376464844, + "epoch": 0.17474302496328928, + "fcm_dpo/beta": 0.012506300583481789, + "fcm_dpo/delta": -0.04011544585227966, + "fcm_dpo/margin": 34.964866638183594, + "fcm_dpo/q_t": 0.4051211178302765, + "grad_norm": 23.512651443481445, + "learning_rate": 4.92133019126601e-07, + "logits/chosen": -0.5862429738044739, + "logits/rejected": -0.5710224509239197, + "logps/chosen": -115.73030090332031, + "logps/ref_chosen": -73.51019287109375, + "logps/ref_rejected": -102.977294921875, + "logps/rejected": -180.16226196289062, + "loss": 1.1028, + "margin_dpo/margin_mean": 34.964866638183594, + "margin_dpo/margin_std": 54.86541748046875, + "step": 119 + }, + { + "KL/chosen_KL_mean": -43.40005111694336, + "KL/mean": -67.5398941040039, + "KL/rejected_KL_mean": -91.67974853515625, + "KL/std": 56.29936218261719, + "epoch": 0.1762114537444934, + "fcm_dpo/beta": 0.012185569852590561, + "fcm_dpo/delta": -0.20024745166301727, + "fcm_dpo/margin": 48.279685974121094, + "fcm_dpo/q_t": 0.3711671531200409, + "grad_norm": 22.41628074645996, + "learning_rate": 4.918104238142103e-07, + "logits/chosen": -0.629610002040863, + "logits/rejected": -0.5978201031684875, + "logps/chosen": -120.1808853149414, + "logps/ref_chosen": -76.78083801269531, + "logps/ref_rejected": -108.02374267578125, + "logps/rejected": -199.7034912109375, + "loss": 0.9945, + "margin_dpo/margin_mean": 48.27968215942383, + "margin_dpo/margin_std": 59.22111892700195, + "step": 120 + }, + { + "KL/chosen_KL_mean": -41.76482391357422, + "KL/mean": -68.51100158691406, + "KL/rejected_KL_mean": -95.25717163085938, + "KL/std": 55.03094482421875, + "epoch": 0.1776798825256975, + "fcm_dpo/beta": 0.011589834466576576, + "fcm_dpo/delta": -0.23640641570091248, + "fcm_dpo/margin": 53.492347717285156, + "fcm_dpo/q_t": 0.36686164140701294, + "grad_norm": 25.108898162841797, + "learning_rate": 4.91481456572267e-07, + "logits/chosen": -0.6011873483657837, + "logits/rejected": -0.5975435972213745, + "logps/chosen": -103.55471801757812, + "logps/ref_chosen": -61.789894104003906, + "logps/ref_rejected": -109.99456787109375, + "logps/rejected": -205.25173950195312, + "loss": 0.996, + "margin_dpo/margin_mean": 53.492347717285156, + "margin_dpo/margin_std": 68.95616912841797, + "step": 121 + }, + { + "KL/chosen_KL_mean": -37.74582290649414, + "KL/mean": -71.00506591796875, + "KL/rejected_KL_mean": -104.26431274414062, + "KL/std": 62.45606994628906, + "epoch": 0.17914831130690162, + "fcm_dpo/beta": 0.010934034362435341, + "fcm_dpo/delta": -0.352782666683197, + "fcm_dpo/margin": 66.51848602294922, + "fcm_dpo/q_t": 0.3402714133262634, + "grad_norm": 23.686704635620117, + "learning_rate": 4.911461260693638e-07, + "logits/chosen": -0.5722811222076416, + "logits/rejected": -0.5880405306816101, + "logps/chosen": -84.64803314208984, + "logps/ref_chosen": -46.9022102355957, + "logps/ref_rejected": -106.71418762207031, + "logps/rejected": -210.97850036621094, + "loss": 0.893, + "margin_dpo/margin_mean": 66.51847839355469, + "margin_dpo/margin_std": 65.36463928222656, + "step": 122 + }, + { + "KL/chosen_KL_mean": -41.76447296142578, + "KL/mean": -63.59715270996094, + "KL/rejected_KL_mean": -85.42982482910156, + "KL/std": 53.64918518066406, + "epoch": 0.18061674008810572, + "fcm_dpo/beta": 0.010566072538495064, + "fcm_dpo/delta": -0.06529982388019562, + "fcm_dpo/margin": 43.66535186767578, + "fcm_dpo/q_t": 0.40092289447784424, + "grad_norm": 20.792469024658203, + "learning_rate": 4.908044411417711e-07, + "logits/chosen": -0.5638459920883179, + "logits/rejected": -0.5461542010307312, + "logps/chosen": -103.10311126708984, + "logps/ref_chosen": -61.33863830566406, + "logps/ref_rejected": -87.775390625, + "logps/rejected": -173.20523071289062, + "loss": 1.1066, + "margin_dpo/margin_mean": 43.66535186767578, + "margin_dpo/margin_std": 71.86073303222656, + "step": 123 + }, + { + "KL/chosen_KL_mean": -47.43387222290039, + "KL/mean": -79.43309020996094, + "KL/rejected_KL_mean": -111.43231201171875, + "KL/std": 73.24830627441406, + "epoch": 0.18208516886930984, + "fcm_dpo/beta": 0.010147863999009132, + "fcm_dpo/delta": -0.2676956057548523, + "fcm_dpo/margin": 63.99842834472656, + "fcm_dpo/q_t": 0.3697102665901184, + "grad_norm": 22.633270263671875, + "learning_rate": 4.904564107932048e-07, + "logits/chosen": -0.5460699796676636, + "logits/rejected": -0.5479286909103394, + "logps/chosen": -118.8822021484375, + "logps/ref_chosen": -71.44833374023438, + "logps/ref_rejected": -117.58056640625, + "logps/rejected": -229.01287841796875, + "loss": 1.0168, + "margin_dpo/margin_mean": 63.998435974121094, + "margin_dpo/margin_std": 92.30806732177734, + "step": 124 + }, + { + "KL/chosen_KL_mean": -39.13515090942383, + "KL/mean": -67.69670104980469, + "KL/rejected_KL_mean": -96.25823974609375, + "KL/std": 61.70295333862305, + "epoch": 0.18355359765051396, + "fcm_dpo/beta": 0.009806671179831028, + "fcm_dpo/delta": -0.1694386899471283, + "fcm_dpo/margin": 57.12309646606445, + "fcm_dpo/q_t": 0.3796992301940918, + "grad_norm": 19.055896759033203, + "learning_rate": 4.90102044194588e-07, + "logits/chosen": -0.5038829445838928, + "logits/rejected": -0.5049155950546265, + "logps/chosen": -89.2720947265625, + "logps/ref_chosen": -50.136940002441406, + "logps/ref_rejected": -83.98861694335938, + "logps/rejected": -180.24685668945312, + "loss": 1.0252, + "margin_dpo/margin_mean": 57.12309646606445, + "margin_dpo/margin_std": 76.22639465332031, + "step": 125 + }, + { + "KL/chosen_KL_mean": -42.27728271484375, + "KL/mean": -68.87530517578125, + "KL/rejected_KL_mean": -95.47331237792969, + "KL/std": 55.436668395996094, + "epoch": 0.18502202643171806, + "fcm_dpo/beta": 0.009523214772343636, + "fcm_dpo/delta": -0.11256399005651474, + "fcm_dpo/margin": 53.19603729248047, + "fcm_dpo/q_t": 0.38864102959632874, + "grad_norm": 20.268415451049805, + "learning_rate": 4.897413506838102e-07, + "logits/chosen": -0.5412114858627319, + "logits/rejected": -0.5357469320297241, + "logps/chosen": -97.94435119628906, + "logps/ref_chosen": -55.66706848144531, + "logps/ref_rejected": -98.1297607421875, + "logps/rejected": -193.6030731201172, + "loss": 1.0422, + "margin_dpo/margin_mean": 53.1960334777832, + "margin_dpo/margin_std": 70.52363586425781, + "step": 126 + }, + { + "KL/chosen_KL_mean": -41.10186767578125, + "KL/mean": -60.70246887207031, + "KL/rejected_KL_mean": -80.30307006835938, + "KL/std": 48.769588470458984, + "epoch": 0.18649045521292218, + "fcm_dpo/beta": 0.009529907256364822, + "fcm_dpo/delta": 0.027420198544859886, + "fcm_dpo/margin": 39.201202392578125, + "fcm_dpo/q_t": 0.4137781858444214, + "grad_norm": 20.927669525146484, + "learning_rate": 4.89374339765481e-07, + "logits/chosen": -0.5445564985275269, + "logits/rejected": -0.5252886414527893, + "logps/chosen": -97.65654754638672, + "logps/ref_chosen": -56.55467987060547, + "logps/ref_rejected": -76.7957763671875, + "logps/rejected": -157.09884643554688, + "loss": 1.128, + "margin_dpo/margin_mean": 39.201202392578125, + "margin_dpo/margin_std": 61.48206329345703, + "step": 127 + }, + { + "KL/chosen_KL_mean": -43.87337875366211, + "KL/mean": -64.43997192382812, + "KL/rejected_KL_mean": -85.00656127929688, + "KL/std": 57.35568618774414, + "epoch": 0.18795888399412627, + "fcm_dpo/beta": 0.009586036205291748, + "fcm_dpo/delta": 0.005548093467950821, + "fcm_dpo/margin": 41.13318634033203, + "fcm_dpo/q_t": 0.41241586208343506, + "grad_norm": 30.452770233154297, + "learning_rate": 4.890010211106795e-07, + "logits/chosen": -0.5140960812568665, + "logits/rejected": -0.4917343258857727, + "logps/chosen": -101.99433898925781, + "logps/ref_chosen": -58.12095642089844, + "logps/ref_rejected": -76.43896484375, + "logps/rejected": -161.44552612304688, + "loss": 1.1401, + "margin_dpo/margin_mean": 41.13318634033203, + "margin_dpo/margin_std": 71.68881225585938, + "step": 128 + }, + { + "KL/chosen_KL_mean": -53.54546356201172, + "KL/mean": -75.27021789550781, + "KL/rejected_KL_mean": -96.9949722290039, + "KL/std": 63.507652282714844, + "epoch": 0.1894273127753304, + "fcm_dpo/beta": 0.009535422548651695, + "fcm_dpo/delta": -0.014929811470210552, + "fcm_dpo/margin": 43.44950866699219, + "fcm_dpo/q_t": 0.41339975595474243, + "grad_norm": 21.074941635131836, + "learning_rate": 4.88621404556699e-07, + "logits/chosen": -0.5416771769523621, + "logits/rejected": -0.5308274626731873, + "logps/chosen": -120.46183776855469, + "logps/ref_chosen": -66.91637420654297, + "logps/ref_rejected": -96.6422119140625, + "logps/rejected": -193.63717651367188, + "loss": 1.1471, + "margin_dpo/margin_mean": 43.44950866699219, + "margin_dpo/margin_std": 82.56187438964844, + "step": 129 + }, + { + "KL/chosen_KL_mean": -39.77076721191406, + "KL/mean": -72.65290069580078, + "KL/rejected_KL_mean": -105.5350341796875, + "KL/std": 65.77041625976562, + "epoch": 0.19089574155653452, + "fcm_dpo/beta": 0.009305297397077084, + "fcm_dpo/delta": -0.2256622165441513, + "fcm_dpo/margin": 65.76427459716797, + "fcm_dpo/q_t": 0.36885032057762146, + "grad_norm": 20.190004348754883, + "learning_rate": 4.882355001067891e-07, + "logits/chosen": -0.47879669070243835, + "logits/rejected": -0.4725271463394165, + "logps/chosen": -84.4376220703125, + "logps/ref_chosen": -44.66685104370117, + "logps/ref_rejected": -82.78165435791016, + "logps/rejected": -188.31668090820312, + "loss": 0.9939, + "margin_dpo/margin_mean": 65.76426696777344, + "margin_dpo/margin_std": 80.05824279785156, + "step": 130 + }, + { + "KL/chosen_KL_mean": -35.21397399902344, + "KL/mean": -67.79527282714844, + "KL/rejected_KL_mean": -100.37657165527344, + "KL/std": 64.40512084960938, + "epoch": 0.19236417033773862, + "fcm_dpo/beta": 0.00883854366838932, + "fcm_dpo/delta": -0.18700018525123596, + "fcm_dpo/margin": 65.16259002685547, + "fcm_dpo/q_t": 0.36890602111816406, + "grad_norm": 29.07965850830078, + "learning_rate": 4.878433179298909e-07, + "logits/chosen": -0.49699753522872925, + "logits/rejected": -0.5037678480148315, + "logps/chosen": -80.13856506347656, + "logps/ref_chosen": -44.924591064453125, + "logps/ref_rejected": -88.44401550292969, + "logps/rejected": -188.82058715820312, + "loss": 0.9793, + "margin_dpo/margin_mean": 65.16259002685547, + "margin_dpo/margin_std": 70.59186553955078, + "step": 131 + }, + { + "KL/chosen_KL_mean": -47.16490173339844, + "KL/mean": -74.09092712402344, + "KL/rejected_KL_mean": -101.01695251464844, + "KL/std": 64.85047912597656, + "epoch": 0.19383259911894274, + "fcm_dpo/beta": 0.008657930418848991, + "fcm_dpo/delta": -0.06971244513988495, + "fcm_dpo/margin": 53.85203552246094, + "fcm_dpo/q_t": 0.40050774812698364, + "grad_norm": 19.968883514404297, + "learning_rate": 4.874448683603694e-07, + "logits/chosen": -0.5425466299057007, + "logits/rejected": -0.54119473695755, + "logps/chosen": -106.16598510742188, + "logps/ref_chosen": -59.00108337402344, + "logps/ref_rejected": -87.89215087890625, + "logps/rejected": -188.9091033935547, + "loss": 1.0858, + "margin_dpo/margin_mean": 53.85203552246094, + "margin_dpo/margin_std": 84.09346008300781, + "step": 132 + }, + { + "KL/chosen_KL_mean": -55.64082336425781, + "KL/mean": -79.35708618164062, + "KL/rejected_KL_mean": -103.07334899902344, + "KL/std": 58.46957015991211, + "epoch": 0.19530102790014683, + "fcm_dpo/beta": 0.008616752922534943, + "fcm_dpo/delta": -0.009173337370157242, + "fcm_dpo/margin": 47.432525634765625, + "fcm_dpo/q_t": 0.4102787375450134, + "grad_norm": 25.95366096496582, + "learning_rate": 4.870401618977415e-07, + "logits/chosen": -0.5186644792556763, + "logits/rejected": -0.5046030879020691, + "logps/chosen": -122.24532318115234, + "logps/ref_chosen": -66.60449981689453, + "logps/ref_rejected": -96.33355712890625, + "logps/rejected": -199.40692138671875, + "loss": 1.1096, + "margin_dpo/margin_mean": 47.432525634765625, + "margin_dpo/margin_std": 74.17190551757812, + "step": 133 + }, + { + "KL/chosen_KL_mean": -44.13493347167969, + "KL/mean": -69.59406280517578, + "KL/rejected_KL_mean": -95.05320739746094, + "KL/std": 57.12010955810547, + "epoch": 0.19676945668135096, + "fcm_dpo/beta": 0.008605021983385086, + "fcm_dpo/delta": -0.039981499314308167, + "fcm_dpo/margin": 50.918270111083984, + "fcm_dpo/q_t": 0.4016202986240387, + "grad_norm": 19.049375534057617, + "learning_rate": 4.866292092063986e-07, + "logits/chosen": -0.47495368123054504, + "logits/rejected": -0.45979058742523193, + "logps/chosen": -96.20419311523438, + "logps/ref_chosen": -52.06925582885742, + "logps/ref_rejected": -87.6545181274414, + "logps/rejected": -182.70773315429688, + "loss": 1.0667, + "margin_dpo/margin_mean": 50.91827392578125, + "margin_dpo/margin_std": 66.06755065917969, + "step": 134 + }, + { + "KL/chosen_KL_mean": -48.97834014892578, + "KL/mean": -85.58039855957031, + "KL/rejected_KL_mean": -122.18243408203125, + "KL/std": 75.99504089355469, + "epoch": 0.19823788546255505, + "fcm_dpo/beta": 0.008282874710857868, + "fcm_dpo/delta": -0.21973907947540283, + "fcm_dpo/margin": 73.2041015625, + "fcm_dpo/q_t": 0.37036457657814026, + "grad_norm": 22.302026748657227, + "learning_rate": 4.862120211153265e-07, + "logits/chosen": -0.4845237731933594, + "logits/rejected": -0.5190806984901428, + "logps/chosen": -99.33219909667969, + "logps/ref_chosen": -50.353858947753906, + "logps/ref_rejected": -115.97975158691406, + "logps/rejected": -238.1621856689453, + "loss": 0.993, + "margin_dpo/margin_mean": 73.2041015625, + "margin_dpo/margin_std": 91.65379333496094, + "step": 135 + }, + { + "KL/chosen_KL_mean": -58.40410614013672, + "KL/mean": -82.88113403320312, + "KL/rejected_KL_mean": -107.358154296875, + "KL/std": 69.1063461303711, + "epoch": 0.19970631424375918, + "fcm_dpo/beta": 0.008144080638885498, + "fcm_dpo/delta": 0.0006435923278331757, + "fcm_dpo/margin": 48.954044342041016, + "fcm_dpo/q_t": 0.4184736907482147, + "grad_norm": 20.428884506225586, + "learning_rate": 4.857886086178193e-07, + "logits/chosen": -0.4917562007904053, + "logits/rejected": -0.48317497968673706, + "logps/chosen": -123.47661590576172, + "logps/ref_chosen": -65.072509765625, + "logps/ref_rejected": -96.32122802734375, + "logps/rejected": -203.67938232421875, + "loss": 1.1412, + "margin_dpo/margin_mean": 48.95404052734375, + "margin_dpo/margin_std": 88.62368774414062, + "step": 136 + }, + { + "KL/chosen_KL_mean": -56.22892761230469, + "KL/mean": -94.69110107421875, + "KL/rejected_KL_mean": -133.1532745361328, + "KL/std": 91.96360778808594, + "epoch": 0.2011747430249633, + "fcm_dpo/beta": 0.007917901501059532, + "fcm_dpo/delta": -0.22291553020477295, + "fcm_dpo/margin": 76.92433166503906, + "fcm_dpo/q_t": 0.37597256898880005, + "grad_norm": 18.094011306762695, + "learning_rate": 4.853589828711902e-07, + "logits/chosen": -0.44450414180755615, + "logits/rejected": -0.47170525789260864, + "logps/chosen": -104.988037109375, + "logps/ref_chosen": -48.759117126464844, + "logps/ref_rejected": -113.86376953125, + "logps/rejected": -247.0170440673828, + "loss": 1.0223, + "margin_dpo/margin_mean": 76.9243392944336, + "margin_dpo/margin_std": 109.81398010253906, + "step": 137 + }, + { + "KL/chosen_KL_mean": -58.91520690917969, + "KL/mean": -87.9725341796875, + "KL/rejected_KL_mean": -117.02986145019531, + "KL/std": 70.14852905273438, + "epoch": 0.2026431718061674, + "fcm_dpo/beta": 0.007785219699144363, + "fcm_dpo/delta": -0.054885830730199814, + "fcm_dpo/margin": 58.114654541015625, + "fcm_dpo/q_t": 0.3965170383453369, + "grad_norm": 21.79376220703125, + "learning_rate": 4.849231551964771e-07, + "logits/chosen": -0.4243091940879822, + "logits/rejected": -0.41211992502212524, + "logps/chosen": -119.43486022949219, + "logps/ref_chosen": -60.519649505615234, + "logps/ref_rejected": -93.19694519042969, + "logps/rejected": -210.226806640625, + "loss": 1.0558, + "margin_dpo/margin_mean": 58.114654541015625, + "margin_dpo/margin_std": 71.69284057617188, + "step": 138 + }, + { + "KL/chosen_KL_mean": -49.914390563964844, + "KL/mean": -84.76184844970703, + "KL/rejected_KL_mean": -119.60931396484375, + "KL/std": 66.84112548828125, + "epoch": 0.20411160058737152, + "fcm_dpo/beta": 0.0076102884486317635, + "fcm_dpo/delta": -0.13750019669532776, + "fcm_dpo/margin": 69.69491577148438, + "fcm_dpo/q_t": 0.382676362991333, + "grad_norm": 18.322908401489258, + "learning_rate": 4.844811370781446e-07, + "logits/chosen": -0.44147640466690063, + "logits/rejected": -0.4316656291484833, + "logps/chosen": -96.80577087402344, + "logps/ref_chosen": -46.89138412475586, + "logps/ref_rejected": -79.72798156738281, + "logps/rejected": -199.33729553222656, + "loss": 1.0187, + "margin_dpo/margin_mean": 69.6949234008789, + "margin_dpo/margin_std": 86.69469451904297, + "step": 139 + }, + { + "KL/chosen_KL_mean": -60.419158935546875, + "KL/mean": -92.12008666992188, + "KL/rejected_KL_mean": -123.82099914550781, + "KL/std": 74.5600814819336, + "epoch": 0.2055800293685756, + "fcm_dpo/beta": 0.00744934706017375, + "fcm_dpo/delta": -0.075960174202919, + "fcm_dpo/margin": 63.40184783935547, + "fcm_dpo/q_t": 0.3951931893825531, + "grad_norm": 22.85267448425293, + "learning_rate": 4.840329401637809e-07, + "logits/chosen": -0.44698405265808105, + "logits/rejected": -0.43398311734199524, + "logps/chosen": -119.39387512207031, + "logps/ref_chosen": -58.97471618652344, + "logps/ref_rejected": -83.28410339355469, + "logps/rejected": -207.1051025390625, + "loss": 1.0674, + "margin_dpo/margin_mean": 63.40184783935547, + "margin_dpo/margin_std": 89.21878051757812, + "step": 140 + }, + { + "KL/chosen_KL_mean": -68.77699279785156, + "KL/mean": -98.60934448242188, + "KL/rejected_KL_mean": -128.44168090820312, + "KL/std": 82.50788879394531, + "epoch": 0.20704845814977973, + "fcm_dpo/beta": 0.007387247867882252, + "fcm_dpo/delta": -0.04261501878499985, + "fcm_dpo/margin": 59.66469955444336, + "fcm_dpo/q_t": 0.4012778103351593, + "grad_norm": 27.14394760131836, + "learning_rate": 4.83578576263792e-07, + "logits/chosen": -0.43428778648376465, + "logits/rejected": -0.4219193756580353, + "logps/chosen": -143.8526611328125, + "logps/ref_chosen": -75.07566833496094, + "logps/ref_rejected": -98.1922607421875, + "logps/rejected": -226.63394165039062, + "loss": 1.1057, + "margin_dpo/margin_mean": 59.664695739746094, + "margin_dpo/margin_std": 95.55046844482422, + "step": 141 + }, + { + "KL/chosen_KL_mean": -70.15496826171875, + "KL/mean": -104.54232025146484, + "KL/rejected_KL_mean": -138.9296875, + "KL/std": 90.10287475585938, + "epoch": 0.20851688693098386, + "fcm_dpo/beta": 0.007280835881829262, + "fcm_dpo/delta": -0.10598242282867432, + "fcm_dpo/margin": 68.77471923828125, + "fcm_dpo/q_t": 0.39226555824279785, + "grad_norm": 28.602947235107422, + "learning_rate": 4.83118057351089e-07, + "logits/chosen": -0.4074459671974182, + "logits/rejected": -0.4063273072242737, + "logps/chosen": -128.18289184570312, + "logps/ref_chosen": -58.027931213378906, + "logps/ref_rejected": -94.58222961425781, + "logps/rejected": -233.51190185546875, + "loss": 1.0853, + "margin_dpo/margin_mean": 68.77471923828125, + "margin_dpo/margin_std": 106.35022735595703, + "step": 142 + }, + { + "KL/chosen_KL_mean": -73.86743927001953, + "KL/mean": -95.766845703125, + "KL/rejected_KL_mean": -117.66624450683594, + "KL/std": 79.29611206054688, + "epoch": 0.20998531571218795, + "fcm_dpo/beta": 0.007270464673638344, + "fcm_dpo/delta": 0.08424904197454453, + "fcm_dpo/margin": 43.79881286621094, + "fcm_dpo/q_t": 0.43221598863601685, + "grad_norm": 24.429250717163086, + "learning_rate": 4.826513955607734e-07, + "logits/chosen": -0.41874316334724426, + "logits/rejected": -0.41285938024520874, + "logps/chosen": -131.46389770507812, + "logps/ref_chosen": -57.59645080566406, + "logps/ref_rejected": -78.99957275390625, + "logps/rejected": -196.6658172607422, + "loss": 1.1982, + "margin_dpo/margin_mean": 43.79881286621094, + "margin_dpo/margin_std": 92.73600769042969, + "step": 143 + }, + { + "KL/chosen_KL_mean": -64.49729919433594, + "KL/mean": -91.391357421875, + "KL/rejected_KL_mean": -118.28541564941406, + "KL/std": 66.07506561279297, + "epoch": 0.21145374449339208, + "fcm_dpo/beta": 0.007330943364650011, + "fcm_dpo/delta": 0.005903269629925489, + "fcm_dpo/margin": 53.788116455078125, + "fcm_dpo/q_t": 0.41074827313423157, + "grad_norm": 20.96710205078125, + "learning_rate": 4.821786031898176e-07, + "logits/chosen": -0.42471039295196533, + "logits/rejected": -0.41297537088394165, + "logps/chosen": -124.40365600585938, + "logps/ref_chosen": -59.90636444091797, + "logps/ref_rejected": -82.00025939941406, + "logps/rejected": -200.28567504882812, + "loss": 1.1061, + "margin_dpo/margin_mean": 53.788116455078125, + "margin_dpo/margin_std": 77.68215942382812, + "step": 144 + }, + { + "KL/chosen_KL_mean": -61.503116607666016, + "KL/mean": -90.38899993896484, + "KL/rejected_KL_mean": -119.27488708496094, + "KL/std": 66.21023559570312, + "epoch": 0.21292217327459617, + "fcm_dpo/beta": 0.007309791631996632, + "fcm_dpo/delta": -0.02330685406923294, + "fcm_dpo/margin": 57.771766662597656, + "fcm_dpo/q_t": 0.40421411395072937, + "grad_norm": 24.383813858032227, + "learning_rate": 4.816996926967401e-07, + "logits/chosen": -0.4303405284881592, + "logits/rejected": -0.41304582357406616, + "logps/chosen": -118.1037826538086, + "logps/ref_chosen": -56.60066604614258, + "logps/ref_rejected": -77.86631774902344, + "logps/rejected": -197.14120483398438, + "loss": 1.0875, + "margin_dpo/margin_mean": 57.771766662597656, + "margin_dpo/margin_std": 80.48458862304688, + "step": 145 + }, + { + "KL/chosen_KL_mean": -84.24576568603516, + "KL/mean": -106.54432678222656, + "KL/rejected_KL_mean": -128.8428955078125, + "KL/std": 72.23680877685547, + "epoch": 0.2143906020558003, + "fcm_dpo/beta": 0.007366587873548269, + "fcm_dpo/delta": 0.07394760102033615, + "fcm_dpo/margin": 44.597129821777344, + "fcm_dpo/q_t": 0.4255162477493286, + "grad_norm": 27.966459274291992, + "learning_rate": 4.812146767012779e-07, + "logits/chosen": -0.4398476481437683, + "logits/rejected": -0.4156040847301483, + "logps/chosen": -150.2462158203125, + "logps/ref_chosen": -66.00045013427734, + "logps/ref_rejected": -81.70278930664062, + "logps/rejected": -210.54568481445312, + "loss": 1.1839, + "margin_dpo/margin_mean": 44.597129821777344, + "margin_dpo/margin_std": 87.12408447265625, + "step": 146 + }, + { + "KL/chosen_KL_mean": -61.255210876464844, + "KL/mean": -91.42532348632812, + "KL/rejected_KL_mean": -121.59544372558594, + "KL/std": 72.65022277832031, + "epoch": 0.21585903083700442, + "fcm_dpo/beta": 0.007349137216806412, + "fcm_dpo/delta": -0.045649539679288864, + "fcm_dpo/margin": 60.3402214050293, + "fcm_dpo/q_t": 0.40209323167800903, + "grad_norm": 19.574729919433594, + "learning_rate": 4.807235679840536e-07, + "logits/chosen": -0.4582536816596985, + "logits/rejected": -0.43836987018585205, + "logps/chosen": -114.66069793701172, + "logps/ref_chosen": -53.405487060546875, + "logps/ref_rejected": -71.39060974121094, + "logps/rejected": -192.98605346679688, + "loss": 1.09, + "margin_dpo/margin_mean": 60.34022521972656, + "margin_dpo/margin_std": 90.08858489990234, + "step": 147 + }, + { + "KL/chosen_KL_mean": -59.720096588134766, + "KL/mean": -85.48670959472656, + "KL/rejected_KL_mean": -111.25332641601562, + "KL/std": 71.65913391113281, + "epoch": 0.2173274596182085, + "fcm_dpo/beta": 0.007287460379302502, + "fcm_dpo/delta": -0.0821787416934967, + "fcm_dpo/margin": 51.533233642578125, + "fcm_dpo/q_t": 0.4167160391807556, + "grad_norm": 19.111360549926758, + "learning_rate": 4.802263794862384e-07, + "logits/chosen": -0.5154159665107727, + "logits/rejected": -0.5085688829421997, + "logps/chosen": -124.65718078613281, + "logps/ref_chosen": -64.93708038330078, + "logps/ref_rejected": -103.09384155273438, + "logps/rejected": -214.34716796875, + "loss": 1.1251, + "margin_dpo/margin_mean": 51.533233642578125, + "margin_dpo/margin_std": 75.76239776611328, + "step": 148 + }, + { + "KL/chosen_KL_mean": -56.87557601928711, + "KL/mean": -89.17583465576172, + "KL/rejected_KL_mean": -121.4760971069336, + "KL/std": 65.12533569335938, + "epoch": 0.21879588839941264, + "fcm_dpo/beta": 0.007102725096046925, + "fcm_dpo/delta": -0.0634830892086029, + "fcm_dpo/margin": 64.60050964355469, + "fcm_dpo/q_t": 0.39505213499069214, + "grad_norm": 17.82282257080078, + "learning_rate": 4.797231243092118e-07, + "logits/chosen": -0.49969860911369324, + "logits/rejected": -0.4846518933773041, + "logps/chosen": -115.34933471679688, + "logps/ref_chosen": -58.47376251220703, + "logps/ref_rejected": -99.31474304199219, + "logps/rejected": -220.79083251953125, + "loss": 1.0534, + "margin_dpo/margin_mean": 64.60050964355469, + "margin_dpo/margin_std": 77.06610870361328, + "step": 149 + }, + { + "KL/chosen_KL_mean": -50.41044235229492, + "KL/mean": -81.95865631103516, + "KL/rejected_KL_mean": -113.50686645507812, + "KL/std": 76.46592712402344, + "epoch": 0.22026431718061673, + "fcm_dpo/beta": 0.007035818882286549, + "fcm_dpo/delta": -0.04736195132136345, + "fcm_dpo/margin": 63.096431732177734, + "fcm_dpo/q_t": 0.40432196855545044, + "grad_norm": 18.326509475708008, + "learning_rate": 4.792138157142157e-07, + "logits/chosen": -0.45320773124694824, + "logits/rejected": -0.456167995929718, + "logps/chosen": -96.11625671386719, + "logps/ref_chosen": -45.705810546875, + "logps/ref_rejected": -83.34759521484375, + "logps/rejected": -196.85446166992188, + "loss": 1.0814, + "margin_dpo/margin_mean": 63.096431732177734, + "margin_dpo/margin_std": 91.71922302246094, + "step": 150 + }, + { + "KL/chosen_KL_mean": -62.595176696777344, + "KL/mean": -94.06363677978516, + "KL/rejected_KL_mean": -125.53208923339844, + "KL/std": 71.80149841308594, + "epoch": 0.22173274596182085, + "fcm_dpo/beta": 0.007020828314125538, + "fcm_dpo/delta": -0.04382166266441345, + "fcm_dpo/margin": 62.936920166015625, + "fcm_dpo/q_t": 0.3986474275588989, + "grad_norm": 21.404926300048828, + "learning_rate": 4.786984671220053e-07, + "logits/chosen": -0.5180387496948242, + "logits/rejected": -0.4879586100578308, + "logps/chosen": -133.166015625, + "logps/ref_chosen": -70.57083129882812, + "logps/ref_rejected": -100.46382141113281, + "logps/rejected": -225.99591064453125, + "loss": 1.0626, + "margin_dpo/margin_mean": 62.936920166015625, + "margin_dpo/margin_std": 78.73812866210938, + "step": 151 + }, + { + "KL/chosen_KL_mean": -54.41718292236328, + "KL/mean": -92.68287658691406, + "KL/rejected_KL_mean": -130.9485626220703, + "KL/std": 73.81169128417969, + "epoch": 0.22320117474302498, + "fcm_dpo/beta": 0.006885044276714325, + "fcm_dpo/delta": -0.13378563523292542, + "fcm_dpo/margin": 76.53138732910156, + "fcm_dpo/q_t": 0.3824828267097473, + "grad_norm": 20.40810203552246, + "learning_rate": 4.78177092112495e-07, + "logits/chosen": -0.4824361205101013, + "logits/rejected": -0.47918662428855896, + "logps/chosen": -114.58157348632812, + "logps/ref_chosen": -60.16438674926758, + "logps/ref_rejected": -106.14045715332031, + "logps/rejected": -237.08901977539062, + "loss": 1.017, + "margin_dpo/margin_mean": 76.53138732910156, + "margin_dpo/margin_std": 90.15208435058594, + "step": 152 + }, + { + "KL/chosen_KL_mean": -55.965492248535156, + "KL/mean": -88.55425262451172, + "KL/rejected_KL_mean": -121.14301300048828, + "KL/std": 80.69013977050781, + "epoch": 0.22466960352422907, + "fcm_dpo/beta": 0.006790122948586941, + "fcm_dpo/delta": -0.044522788375616074, + "fcm_dpo/margin": 65.17752075195312, + "fcm_dpo/q_t": 0.40387213230133057, + "grad_norm": 15.532352447509766, + "learning_rate": 4.776497044244016e-07, + "logits/chosen": -0.49872875213623047, + "logits/rejected": -0.4938894510269165, + "logps/chosen": -112.28076934814453, + "logps/ref_chosen": -56.315277099609375, + "logps/ref_rejected": -85.65583801269531, + "logps/rejected": -206.79884338378906, + "loss": 1.0909, + "margin_dpo/margin_mean": 65.17752075195312, + "margin_dpo/margin_std": 99.14810180664062, + "step": 153 + }, + { + "KL/chosen_KL_mean": -66.57502746582031, + "KL/mean": -97.78536987304688, + "KL/rejected_KL_mean": -128.99571228027344, + "KL/std": 80.61070251464844, + "epoch": 0.2261380323054332, + "fcm_dpo/beta": 0.006765860132873058, + "fcm_dpo/delta": -0.023557795211672783, + "fcm_dpo/margin": 62.42070007324219, + "fcm_dpo/q_t": 0.4070885479450226, + "grad_norm": 18.9123592376709, + "learning_rate": 4.771163179548808e-07, + "logits/chosen": -0.4788493514060974, + "logits/rejected": -0.48109960556030273, + "logps/chosen": -129.31759643554688, + "logps/ref_chosen": -62.74256896972656, + "logps/ref_rejected": -104.24420166015625, + "logps/rejected": -233.2399139404297, + "loss": 1.1206, + "margin_dpo/margin_mean": 62.42070388793945, + "margin_dpo/margin_std": 102.98345947265625, + "step": 154 + }, + { + "KL/chosen_KL_mean": -61.065223693847656, + "KL/mean": -92.69122314453125, + "KL/rejected_KL_mean": -124.31721496582031, + "KL/std": 75.25825500488281, + "epoch": 0.2276064610866373, + "fcm_dpo/beta": 0.006722897756844759, + "fcm_dpo/delta": -0.026448355987668037, + "fcm_dpo/margin": 63.251991271972656, + "fcm_dpo/q_t": 0.4044472575187683, + "grad_norm": 19.345184326171875, + "learning_rate": 4.7657694675916247e-07, + "logits/chosen": -0.4656848907470703, + "logits/rejected": -0.44397997856140137, + "logps/chosen": -121.7184066772461, + "logps/ref_chosen": -60.65318298339844, + "logps/ref_rejected": -77.49220275878906, + "logps/rejected": -201.80941772460938, + "loss": 1.0946, + "margin_dpo/margin_mean": 63.25199890136719, + "margin_dpo/margin_std": 93.20378112792969, + "step": 155 + }, + { + "KL/chosen_KL_mean": -85.85633850097656, + "KL/mean": -104.20632934570312, + "KL/rejected_KL_mean": -122.55632019042969, + "KL/std": 80.0552978515625, + "epoch": 0.2290748898678414, + "fcm_dpo/beta": 0.0067663900554180145, + "fcm_dpo/delta": 0.05330243334174156, + "fcm_dpo/margin": 36.699989318847656, + "fcm_dpo/q_t": 0.44376257061958313, + "grad_norm": 29.024635314941406, + "learning_rate": 4.7603160505017893e-07, + "logits/chosen": -0.41135138273239136, + "logits/rejected": -0.40221792459487915, + "logps/chosen": -155.34820556640625, + "logps/ref_chosen": -69.49188232421875, + "logps/ref_rejected": -77.16929626464844, + "logps/rejected": -199.72561645507812, + "loss": 1.2725, + "margin_dpo/margin_mean": 36.699989318847656, + "margin_dpo/margin_std": 105.47511291503906, + "step": 156 + }, + { + "KL/chosen_KL_mean": -78.11371612548828, + "KL/mean": -118.10308837890625, + "KL/rejected_KL_mean": -158.09246826171875, + "KL/std": 87.00740051269531, + "epoch": 0.2305433186490455, + "fcm_dpo/beta": 0.0065932744182646275, + "fcm_dpo/delta": -0.13586004078388214, + "fcm_dpo/margin": 79.97874450683594, + "fcm_dpo/q_t": 0.37946271896362305, + "grad_norm": 23.550922393798828, + "learning_rate": 4.7548030719819154e-07, + "logits/chosen": -0.40428751707077026, + "logits/rejected": -0.4115862250328064, + "logps/chosen": -139.48214721679688, + "logps/ref_chosen": -61.368438720703125, + "logps/ref_rejected": -107.64636993408203, + "logps/rejected": -265.73883056640625, + "loss": 1.0299, + "margin_dpo/margin_mean": 79.97874450683594, + "margin_dpo/margin_std": 98.83998107910156, + "step": 157 + }, + { + "KL/chosen_KL_mean": -78.0158462524414, + "KL/mean": -120.59271240234375, + "KL/rejected_KL_mean": -163.16958618164062, + "KL/std": 107.82058715820312, + "epoch": 0.23201174743024963, + "fcm_dpo/beta": 0.006434428971260786, + "fcm_dpo/delta": -0.15647649765014648, + "fcm_dpo/margin": 85.15372467041016, + "fcm_dpo/q_t": 0.38724130392074585, + "grad_norm": 20.167179107666016, + "learning_rate": 4.7492306773041136e-07, + "logits/chosen": -0.3908860683441162, + "logits/rejected": -0.40809518098831177, + "logps/chosen": -135.62876892089844, + "logps/ref_chosen": -57.612918853759766, + "logps/ref_rejected": -113.6946792602539, + "logps/rejected": -276.8642578125, + "loss": 1.0558, + "margin_dpo/margin_mean": 85.15372467041016, + "margin_dpo/margin_std": 129.78738403320312, + "step": 158 + }, + { + "KL/chosen_KL_mean": -88.34893798828125, + "KL/mean": -117.44178771972656, + "KL/rejected_KL_mean": -146.5346221923828, + "KL/std": 95.12521362304688, + "epoch": 0.23348017621145375, + "fcm_dpo/beta": 0.006435505114495754, + "fcm_dpo/delta": 0.026020796969532967, + "fcm_dpo/margin": 58.185665130615234, + "fcm_dpo/q_t": 0.4166509807109833, + "grad_norm": 22.233327865600586, + "learning_rate": 4.743599013306165e-07, + "logits/chosen": -0.3991559147834778, + "logits/rejected": -0.36633995175361633, + "logps/chosen": -169.90928649902344, + "logps/ref_chosen": -81.56034851074219, + "logps/ref_rejected": -88.89871215820312, + "logps/rejected": -235.43331909179688, + "loss": 1.1488, + "margin_dpo/margin_mean": 58.1856689453125, + "margin_dpo/margin_std": 102.5018310546875, + "step": 159 + }, + { + "KL/chosen_KL_mean": -90.17693328857422, + "KL/mean": -129.38868713378906, + "KL/rejected_KL_mean": -168.6004638671875, + "KL/std": 101.24613952636719, + "epoch": 0.23494860499265785, + "fcm_dpo/beta": 0.00629377830773592, + "fcm_dpo/delta": -0.09926701337099075, + "fcm_dpo/margin": 78.42352294921875, + "fcm_dpo/q_t": 0.3964502215385437, + "grad_norm": 23.24005126953125, + "learning_rate": 4.737908228387656e-07, + "logits/chosen": -0.3764195144176483, + "logits/rejected": -0.3670395612716675, + "logps/chosen": -155.90780639648438, + "logps/ref_chosen": -65.73088073730469, + "logps/ref_rejected": -97.21781921386719, + "logps/rejected": -265.8182678222656, + "loss": 1.0906, + "margin_dpo/margin_mean": 78.42352294921875, + "margin_dpo/margin_std": 126.56271362304688, + "step": 160 + }, + { + "KL/chosen_KL_mean": -76.84190368652344, + "KL/mean": -111.3232650756836, + "KL/rejected_KL_mean": -145.80462646484375, + "KL/std": 80.283203125, + "epoch": 0.23641703377386197, + "fcm_dpo/beta": 0.006256973836570978, + "fcm_dpo/delta": -0.03292801231145859, + "fcm_dpo/margin": 68.96272277832031, + "fcm_dpo/q_t": 0.40471774339675903, + "grad_norm": 21.411426544189453, + "learning_rate": 4.7321584725060594e-07, + "logits/chosen": -0.34909725189208984, + "logits/rejected": -0.34455615282058716, + "logps/chosen": -129.27838134765625, + "logps/ref_chosen": -52.43647003173828, + "logps/ref_rejected": -83.43095397949219, + "logps/rejected": -229.23558044433594, + "loss": 1.0934, + "margin_dpo/margin_mean": 68.96272277832031, + "margin_dpo/margin_std": 102.0528564453125, + "step": 161 + }, + { + "KL/chosen_KL_mean": -74.41575622558594, + "KL/mean": -108.84408569335938, + "KL/rejected_KL_mean": -143.27243041992188, + "KL/std": 89.80424499511719, + "epoch": 0.23788546255506607, + "fcm_dpo/beta": 0.006169519387185574, + "fcm_dpo/delta": -0.02764543890953064, + "fcm_dpo/margin": 68.85668182373047, + "fcm_dpo/q_t": 0.4066374897956848, + "grad_norm": 22.422332763671875, + "learning_rate": 4.7263498971727905e-07, + "logits/chosen": -0.44204244017601013, + "logits/rejected": -0.4256909489631653, + "logps/chosen": -137.02633666992188, + "logps/ref_chosen": -62.6105842590332, + "logps/ref_rejected": -89.39057922363281, + "logps/rejected": -232.6630096435547, + "loss": 1.1078, + "margin_dpo/margin_mean": 68.85668182373047, + "margin_dpo/margin_std": 106.42919158935547, + "step": 162 + }, + { + "KL/chosen_KL_mean": -83.02301025390625, + "KL/mean": -116.34445190429688, + "KL/rejected_KL_mean": -149.6658935546875, + "KL/std": 89.37168884277344, + "epoch": 0.2393538913362702, + "fcm_dpo/beta": 0.006201374344527721, + "fcm_dpo/delta": -0.013848692178726196, + "fcm_dpo/margin": 66.64288330078125, + "fcm_dpo/q_t": 0.40968990325927734, + "grad_norm": 20.55537223815918, + "learning_rate": 4.720482655449212e-07, + "logits/chosen": -0.37833207845687866, + "logits/rejected": -0.3598299026489258, + "logps/chosen": -138.04464721679688, + "logps/ref_chosen": -55.021629333496094, + "logps/ref_rejected": -75.418212890625, + "logps/rejected": -225.0841064453125, + "loss": 1.1151, + "margin_dpo/margin_mean": 66.64288330078125, + "margin_dpo/margin_std": 107.60564422607422, + "step": 163 + }, + { + "KL/chosen_KL_mean": -75.54098510742188, + "KL/mean": -116.61768341064453, + "KL/rejected_KL_mean": -157.69436645507812, + "KL/std": 87.67422485351562, + "epoch": 0.24082232011747431, + "fcm_dpo/beta": 0.006058148108422756, + "fcm_dpo/delta": -0.10455699265003204, + "fcm_dpo/margin": 82.15338134765625, + "fcm_dpo/q_t": 0.38800323009490967, + "grad_norm": 23.66642189025879, + "learning_rate": 4.714556901942599e-07, + "logits/chosen": -0.38808923959732056, + "logits/rejected": -0.3748926520347595, + "logps/chosen": -131.18165588378906, + "logps/ref_chosen": -55.64066696166992, + "logps/ref_rejected": -79.66463470458984, + "logps/rejected": -237.3590087890625, + "loss": 1.0355, + "margin_dpo/margin_mean": 82.15338134765625, + "margin_dpo/margin_std": 101.20204162597656, + "step": 164 + }, + { + "KL/chosen_KL_mean": -81.94873046875, + "KL/mean": -108.24546813964844, + "KL/rejected_KL_mean": -134.54220581054688, + "KL/std": 74.16279602050781, + "epoch": 0.2422907488986784, + "fcm_dpo/beta": 0.006125118583440781, + "fcm_dpo/delta": 0.08051982522010803, + "fcm_dpo/margin": 52.59346008300781, + "fcm_dpo/q_t": 0.42784789204597473, + "grad_norm": 24.392139434814453, + "learning_rate": 4.708572792802069e-07, + "logits/chosen": -0.3788298964500427, + "logits/rejected": -0.3494594097137451, + "logps/chosen": -143.25941467285156, + "logps/ref_chosen": -61.310691833496094, + "logps/ref_rejected": -73.67060852050781, + "logps/rejected": -208.21279907226562, + "loss": 1.1772, + "margin_dpo/margin_mean": 52.59346008300781, + "margin_dpo/margin_std": 99.10749053955078, + "step": 165 + }, + { + "KL/chosen_KL_mean": -72.17887115478516, + "KL/mean": -121.31666564941406, + "KL/rejected_KL_mean": -170.4544677734375, + "KL/std": 108.35174560546875, + "epoch": 0.24375917767988253, + "fcm_dpo/beta": 0.00596030056476593, + "fcm_dpo/delta": -0.19864240288734436, + "fcm_dpo/margin": 98.27557373046875, + "fcm_dpo/q_t": 0.3804228901863098, + "grad_norm": 17.77814292907715, + "learning_rate": 4.702530485714461e-07, + "logits/chosen": -0.3884061574935913, + "logits/rejected": -0.39925825595855713, + "logps/chosen": -123.1624755859375, + "logps/ref_chosen": -50.98360061645508, + "logps/ref_rejected": -98.09512329101562, + "logps/rejected": -268.5495910644531, + "loss": 1.0179, + "margin_dpo/margin_mean": 98.27557373046875, + "margin_dpo/margin_std": 137.25851440429688, + "step": 166 + }, + { + "KL/chosen_KL_mean": -73.13114929199219, + "KL/mean": -124.726806640625, + "KL/rejected_KL_mean": -176.3224639892578, + "KL/std": 98.56492614746094, + "epoch": 0.24522760646108663, + "fcm_dpo/beta": 0.0057468172162771225, + "fcm_dpo/delta": -0.20526599884033203, + "fcm_dpo/margin": 103.19131469726562, + "fcm_dpo/q_t": 0.36781153082847595, + "grad_norm": 18.200578689575195, + "learning_rate": 4.6964301399001877e-07, + "logits/chosen": -0.36232346296310425, + "logits/rejected": -0.36501890420913696, + "logps/chosen": -123.55524444580078, + "logps/ref_chosen": -50.424095153808594, + "logps/ref_rejected": -96.03042602539062, + "logps/rejected": -272.3529052734375, + "loss": 0.9736, + "margin_dpo/margin_mean": 103.19131469726562, + "margin_dpo/margin_std": 114.00679016113281, + "step": 167 + }, + { + "KL/chosen_KL_mean": -79.71585083007812, + "KL/mean": -117.56804656982422, + "KL/rejected_KL_mean": -155.42022705078125, + "KL/std": 91.46189880371094, + "epoch": 0.24669603524229075, + "fcm_dpo/beta": 0.005651239771395922, + "fcm_dpo/delta": -0.029224606230854988, + "fcm_dpo/margin": 75.70437622070312, + "fcm_dpo/q_t": 0.40402811765670776, + "grad_norm": 19.609725952148438, + "learning_rate": 4.690271916109034e-07, + "logits/chosen": -0.3544921278953552, + "logits/rejected": -0.3437988758087158, + "logps/chosen": -129.17868041992188, + "logps/ref_chosen": -49.462825775146484, + "logps/ref_rejected": -75.30855560302734, + "logps/rejected": -230.72879028320312, + "loss": 1.0797, + "margin_dpo/margin_mean": 75.70437622070312, + "margin_dpo/margin_std": 102.8135986328125, + "step": 168 + }, + { + "KL/chosen_KL_mean": -82.12629699707031, + "KL/mean": -115.36107635498047, + "KL/rejected_KL_mean": -148.59585571289062, + "KL/std": 91.35723876953125, + "epoch": 0.24816446402349487, + "fcm_dpo/beta": 0.005572349298745394, + "fcm_dpo/delta": -0.07357925921678543, + "fcm_dpo/margin": 66.46955108642578, + "fcm_dpo/q_t": 0.42019665241241455, + "grad_norm": 19.77760124206543, + "learning_rate": 4.6840559766159235e-07, + "logits/chosen": -0.3844687044620514, + "logits/rejected": -0.36855146288871765, + "logps/chosen": -141.92974853515625, + "logps/ref_chosen": -59.803443908691406, + "logps/ref_rejected": -83.34574890136719, + "logps/rejected": -231.94158935546875, + "loss": 1.1611, + "margin_dpo/margin_mean": 66.46955108642578, + "margin_dpo/margin_std": 125.47772216796875, + "step": 169 + }, + { + "KL/chosen_KL_mean": -72.95841979980469, + "KL/mean": -112.25492095947266, + "KL/rejected_KL_mean": -151.55142211914062, + "KL/std": 86.3768310546875, + "epoch": 0.24963289280469897, + "fcm_dpo/beta": 0.005510912276804447, + "fcm_dpo/delta": -0.03551424294710159, + "fcm_dpo/margin": 78.5929946899414, + "fcm_dpo/q_t": 0.4006960988044739, + "grad_norm": 18.546783447265625, + "learning_rate": 4.6777824852166437e-07, + "logits/chosen": -0.3328009247779846, + "logits/rejected": -0.321723610162735, + "logps/chosen": -122.4301986694336, + "logps/ref_chosen": -49.471771240234375, + "logps/ref_rejected": -75.91734313964844, + "logps/rejected": -227.46875, + "loss": 1.077, + "margin_dpo/margin_mean": 78.5929946899414, + "margin_dpo/margin_std": 103.26647186279297, + "step": 170 + }, + { + "KL/chosen_KL_mean": -106.71308135986328, + "KL/mean": -138.3514404296875, + "KL/rejected_KL_mean": -169.98980712890625, + "KL/std": 100.98884582519531, + "epoch": 0.2511013215859031, + "fcm_dpo/beta": 0.005571361631155014, + "fcm_dpo/delta": 0.04921392351388931, + "fcm_dpo/margin": 63.276710510253906, + "fcm_dpo/q_t": 0.425686776638031, + "grad_norm": 27.314945220947266, + "learning_rate": 4.6714516072235273e-07, + "logits/chosen": -0.33173030614852905, + "logits/rejected": -0.3102639317512512, + "logps/chosen": -191.21240234375, + "logps/ref_chosen": -84.49931335449219, + "logps/ref_rejected": -109.38209533691406, + "logps/rejected": -279.37188720703125, + "loss": 1.1782, + "margin_dpo/margin_mean": 63.276710510253906, + "margin_dpo/margin_std": 130.07742309570312, + "step": 171 + }, + { + "KL/chosen_KL_mean": -93.982177734375, + "KL/mean": -127.27529907226562, + "KL/rejected_KL_mean": -160.56842041015625, + "KL/std": 97.7683334350586, + "epoch": 0.2525697503671072, + "fcm_dpo/beta": 0.005604305304586887, + "fcm_dpo/delta": 0.027874935418367386, + "fcm_dpo/margin": 66.58624267578125, + "fcm_dpo/q_t": 0.4164371192455292, + "grad_norm": 19.571752548217773, + "learning_rate": 4.6650635094610966e-07, + "logits/chosen": -0.3909180760383606, + "logits/rejected": -0.3718334138393402, + "logps/chosen": -162.6361083984375, + "logps/ref_chosen": -68.65391540527344, + "logps/ref_rejected": -85.43667602539062, + "logps/rejected": -246.00509643554688, + "loss": 1.1342, + "margin_dpo/margin_mean": 66.58624267578125, + "margin_dpo/margin_std": 109.93568420410156, + "step": 172 + }, + { + "KL/chosen_KL_mean": -85.7948989868164, + "KL/mean": -119.3344497680664, + "KL/rejected_KL_mean": -152.87399291992188, + "KL/std": 91.15321350097656, + "epoch": 0.2540381791483113, + "fcm_dpo/beta": 0.00565545866265893, + "fcm_dpo/delta": 0.021153416484594345, + "fcm_dpo/margin": 67.0791015625, + "fcm_dpo/q_t": 0.4138815701007843, + "grad_norm": 19.52805519104004, + "learning_rate": 4.6586183602616687e-07, + "logits/chosen": -0.38386523723602295, + "logits/rejected": -0.353518009185791, + "logps/chosen": -148.8457794189453, + "logps/ref_chosen": -63.050880432128906, + "logps/ref_rejected": -78.68392181396484, + "logps/rejected": -231.55792236328125, + "loss": 1.1093, + "margin_dpo/margin_mean": 67.0791015625, + "margin_dpo/margin_std": 95.22209167480469, + "step": 173 + }, + { + "KL/chosen_KL_mean": -82.10379791259766, + "KL/mean": -121.74862670898438, + "KL/rejected_KL_mean": -161.3934783935547, + "KL/std": 96.38128662109375, + "epoch": 0.2555066079295154, + "fcm_dpo/beta": 0.005634433589875698, + "fcm_dpo/delta": -0.04934954643249512, + "fcm_dpo/margin": 79.28968048095703, + "fcm_dpo/q_t": 0.4018397331237793, + "grad_norm": 26.33484649658203, + "learning_rate": 4.652116329460919e-07, + "logits/chosen": -0.32500776648521423, + "logits/rejected": -0.3427908718585968, + "logps/chosen": -135.46676635742188, + "logps/ref_chosen": -53.36296844482422, + "logps/ref_rejected": -101.91120910644531, + "logps/rejected": -263.3046875, + "loss": 1.0871, + "margin_dpo/margin_mean": 79.28968048095703, + "margin_dpo/margin_std": 115.50275421142578, + "step": 174 + }, + { + "KL/chosen_KL_mean": -75.51565551757812, + "KL/mean": -129.7423095703125, + "KL/rejected_KL_mean": -183.96896362304688, + "KL/std": 102.85943603515625, + "epoch": 0.25697503671071953, + "fcm_dpo/beta": 0.00541552621871233, + "fcm_dpo/delta": -0.19940567016601562, + "fcm_dpo/margin": 108.45330810546875, + "fcm_dpo/q_t": 0.3663579821586609, + "grad_norm": 27.85973358154297, + "learning_rate": 4.645557588393406e-07, + "logits/chosen": -0.32464098930358887, + "logits/rejected": -0.31119775772094727, + "logps/chosen": -120.93342590332031, + "logps/ref_chosen": -45.417762756347656, + "logps/ref_rejected": -89.50579833984375, + "logps/rejected": -273.47479248046875, + "loss": 0.9587, + "margin_dpo/margin_mean": 108.45330810546875, + "margin_dpo/margin_std": 108.14402770996094, + "step": 175 + }, + { + "KL/chosen_KL_mean": -81.18791198730469, + "KL/mean": -126.93669891357422, + "KL/rejected_KL_mean": -172.68548583984375, + "KL/std": 102.40462493896484, + "epoch": 0.25844346549192365, + "fcm_dpo/beta": 0.00530798826366663, + "fcm_dpo/delta": -0.08994344621896744, + "fcm_dpo/margin": 91.49755859375, + "fcm_dpo/q_t": 0.3935086727142334, + "grad_norm": 21.088232040405273, + "learning_rate": 4.638942309888058e-07, + "logits/chosen": -0.3157244324684143, + "logits/rejected": -0.3328602910041809, + "logps/chosen": -131.6407470703125, + "logps/ref_chosen": -50.452842712402344, + "logps/ref_rejected": -95.5589599609375, + "logps/rejected": -268.24444580078125, + "loss": 1.0459, + "margin_dpo/margin_mean": 91.49755859375, + "margin_dpo/margin_std": 118.77714538574219, + "step": 176 + }, + { + "KL/chosen_KL_mean": -93.40528869628906, + "KL/mean": -138.24319458007812, + "KL/rejected_KL_mean": -183.0811309814453, + "KL/std": 109.86962127685547, + "epoch": 0.2599118942731278, + "fcm_dpo/beta": 0.005232410505414009, + "fcm_dpo/delta": -0.07259676605463028, + "fcm_dpo/margin": 89.67583465576172, + "fcm_dpo/q_t": 0.3961235284805298, + "grad_norm": 26.937484741210938, + "learning_rate": 4.6322706682636137e-07, + "logits/chosen": -0.3570261597633362, + "logits/rejected": -0.3483562469482422, + "logps/chosen": -154.6217498779297, + "logps/ref_chosen": -61.216468811035156, + "logps/ref_rejected": -95.89378356933594, + "logps/rejected": -278.97491455078125, + "loss": 1.0546, + "margin_dpo/margin_mean": 89.67582702636719, + "margin_dpo/margin_std": 117.8981704711914, + "step": 177 + }, + { + "KL/chosen_KL_mean": -102.56449890136719, + "KL/mean": -159.57308959960938, + "KL/rejected_KL_mean": -216.5816650390625, + "KL/std": 129.33131408691406, + "epoch": 0.26138032305433184, + "fcm_dpo/beta": 0.005035985726863146, + "fcm_dpo/delta": -0.18584051728248596, + "fcm_dpo/margin": 114.01716613769531, + "fcm_dpo/q_t": 0.37551695108413696, + "grad_norm": 24.992889404296875, + "learning_rate": 4.6255428393240354e-07, + "logits/chosen": -0.2407420575618744, + "logits/rejected": -0.23280589282512665, + "logps/chosen": -160.82928466796875, + "logps/ref_chosen": -58.26478958129883, + "logps/ref_rejected": -105.3653335571289, + "logps/rejected": -321.947021484375, + "loss": 0.9997, + "margin_dpo/margin_mean": 114.01716613769531, + "margin_dpo/margin_std": 141.47775268554688, + "step": 178 + }, + { + "KL/chosen_KL_mean": -103.99703979492188, + "KL/mean": -144.8743438720703, + "KL/rejected_KL_mean": -185.75167846679688, + "KL/std": 110.77649688720703, + "epoch": 0.26284875183553597, + "fcm_dpo/beta": 0.004978477954864502, + "fcm_dpo/delta": -0.007790856063365936, + "fcm_dpo/margin": 81.7546157836914, + "fcm_dpo/q_t": 0.4102519154548645, + "grad_norm": 26.21021270751953, + "learning_rate": 4.6187590003538724e-07, + "logits/chosen": -0.2729560434818268, + "logits/rejected": -0.2793646454811096, + "logps/chosen": -165.05535888671875, + "logps/ref_chosen": -61.05832290649414, + "logps/ref_rejected": -90.52782440185547, + "logps/rejected": -276.27947998046875, + "loss": 1.1302, + "margin_dpo/margin_mean": 81.75462341308594, + "margin_dpo/margin_std": 138.115966796875, + "step": 179 + }, + { + "KL/chosen_KL_mean": -89.33973693847656, + "KL/mean": -141.10313415527344, + "KL/rejected_KL_mean": -192.86651611328125, + "KL/std": 98.31314086914062, + "epoch": 0.2643171806167401, + "fcm_dpo/beta": 0.004909820854663849, + "fcm_dpo/delta": -0.1142577975988388, + "fcm_dpo/margin": 103.52678680419922, + "fcm_dpo/q_t": 0.38421761989593506, + "grad_norm": 17.1671142578125, + "learning_rate": 4.611919330113591e-07, + "logits/chosen": -0.3321910500526428, + "logits/rejected": -0.32747143507003784, + "logps/chosen": -143.68246459960938, + "logps/ref_chosen": -54.34272003173828, + "logps/ref_rejected": -98.21183776855469, + "logps/rejected": -291.078369140625, + "loss": 1.0234, + "margin_dpo/margin_mean": 103.52677917480469, + "margin_dpo/margin_std": 122.91793060302734, + "step": 180 + }, + { + "KL/chosen_KL_mean": -77.30790710449219, + "KL/mean": -109.57101440429688, + "KL/rejected_KL_mean": -141.83412170410156, + "KL/std": 90.15087890625, + "epoch": 0.2657856093979442, + "fcm_dpo/beta": 0.004953712224960327, + "fcm_dpo/delta": 0.08287452906370163, + "fcm_dpo/margin": 64.52621459960938, + "fcm_dpo/q_t": 0.4273186922073364, + "grad_norm": 17.40216064453125, + "learning_rate": 4.605024008834863e-07, + "logits/chosen": -0.3179657459259033, + "logits/rejected": -0.290554940700531, + "logps/chosen": -132.30836486816406, + "logps/ref_chosen": -55.000457763671875, + "logps/ref_rejected": -61.656166076660156, + "logps/rejected": -203.49029541015625, + "loss": 1.1666, + "margin_dpo/margin_mean": 64.52621459960938, + "margin_dpo/margin_std": 114.01302337646484, + "step": 181 + }, + { + "KL/chosen_KL_mean": -73.5950927734375, + "KL/mean": -129.3390350341797, + "KL/rejected_KL_mean": -185.08297729492188, + "KL/std": 109.08509063720703, + "epoch": 0.26725403817914833, + "fcm_dpo/beta": 0.004840575158596039, + "fcm_dpo/delta": -0.1485544741153717, + "fcm_dpo/margin": 111.48786926269531, + "fcm_dpo/q_t": 0.37746167182922363, + "grad_norm": 17.18657112121582, + "learning_rate": 4.598073218215817e-07, + "logits/chosen": -0.30293479561805725, + "logits/rejected": -0.3121221959590912, + "logps/chosen": -114.70294952392578, + "logps/ref_chosen": -41.107852935791016, + "logps/ref_rejected": -89.5215835571289, + "logps/rejected": -274.60455322265625, + "loss": 1.0114, + "margin_dpo/margin_mean": 111.48786926269531, + "margin_dpo/margin_std": 131.83238220214844, + "step": 182 + }, + { + "KL/chosen_KL_mean": -110.05368041992188, + "KL/mean": -140.24612426757812, + "KL/rejected_KL_mean": -170.43858337402344, + "KL/std": 90.4064712524414, + "epoch": 0.2687224669603524, + "fcm_dpo/beta": 0.004783437587320805, + "fcm_dpo/delta": -0.041149888187646866, + "fcm_dpo/margin": 60.38490295410156, + "fcm_dpo/q_t": 0.43268686532974243, + "grad_norm": 18.724519729614258, + "learning_rate": 4.5910671414162484e-07, + "logits/chosen": -0.31450870633125305, + "logits/rejected": -0.30454862117767334, + "logps/chosen": -167.5782470703125, + "logps/ref_chosen": -57.52456283569336, + "logps/ref_rejected": -75.97572326660156, + "logps/rejected": -246.414306640625, + "loss": 1.1804, + "margin_dpo/margin_mean": 60.38490295410156, + "margin_dpo/margin_std": 99.7965087890625, + "step": 183 + }, + { + "KL/chosen_KL_mean": -93.04067993164062, + "KL/mean": -124.70458984375, + "KL/rejected_KL_mean": -156.3684844970703, + "KL/std": 85.67855834960938, + "epoch": 0.2701908957415565, + "fcm_dpo/beta": 0.00478300591930747, + "fcm_dpo/delta": -0.0009023167076520622, + "fcm_dpo/margin": 63.327796936035156, + "fcm_dpo/q_t": 0.43101605772972107, + "grad_norm": 19.71858024597168, + "learning_rate": 4.5840059630527985e-07, + "logits/chosen": -0.34772413969039917, + "logits/rejected": -0.337999165058136, + "logps/chosen": -151.58563232421875, + "logps/ref_chosen": -58.544952392578125, + "logps/ref_rejected": -76.63406372070312, + "logps/rejected": -233.0025634765625, + "loss": 1.1707, + "margin_dpo/margin_mean": 63.32780075073242, + "margin_dpo/margin_std": 107.8141098022461, + "step": 184 + }, + { + "KL/chosen_KL_mean": -101.89752960205078, + "KL/mean": -127.2352523803711, + "KL/rejected_KL_mean": -152.57298278808594, + "KL/std": 100.57600402832031, + "epoch": 0.27165932452276065, + "fcm_dpo/beta": 0.004893806297332048, + "fcm_dpo/delta": 0.1559191346168518, + "fcm_dpo/margin": 50.67546081542969, + "fcm_dpo/q_t": 0.446666955947876, + "grad_norm": 19.57623291015625, + "learning_rate": 4.5768898691940836e-07, + "logits/chosen": -0.31452715396881104, + "logits/rejected": -0.29128819704055786, + "logps/chosen": -163.92337036132812, + "logps/ref_chosen": -62.025848388671875, + "logps/ref_rejected": -73.7625961303711, + "logps/rejected": -226.3355712890625, + "loss": 1.2331, + "margin_dpo/margin_mean": 50.67546081542969, + "margin_dpo/margin_std": 120.71915435791016, + "step": 185 + }, + { + "KL/chosen_KL_mean": -93.28840637207031, + "KL/mean": -141.59420776367188, + "KL/rejected_KL_mean": -189.89999389648438, + "KL/std": 101.8707046508789, + "epoch": 0.27312775330396477, + "fcm_dpo/beta": 0.004873909987509251, + "fcm_dpo/delta": -0.0744955912232399, + "fcm_dpo/margin": 96.61160278320312, + "fcm_dpo/q_t": 0.3932849168777466, + "grad_norm": 28.687524795532227, + "learning_rate": 4.5697190473557947e-07, + "logits/chosen": -0.36662542819976807, + "logits/rejected": -0.3481537103652954, + "logps/chosen": -162.64187622070312, + "logps/ref_chosen": -69.35346984863281, + "logps/ref_rejected": -88.07244873046875, + "logps/rejected": -277.97247314453125, + "loss": 1.043, + "margin_dpo/margin_mean": 96.6115951538086, + "margin_dpo/margin_std": 116.60765075683594, + "step": 186 + }, + { + "KL/chosen_KL_mean": -87.90306091308594, + "KL/mean": -127.93620300292969, + "KL/rejected_KL_mean": -167.9693603515625, + "KL/std": 96.8105697631836, + "epoch": 0.2745961820851689, + "fcm_dpo/beta": 0.004888523370027542, + "fcm_dpo/delta": 0.008520994335412979, + "fcm_dpo/margin": 80.06629943847656, + "fcm_dpo/q_t": 0.41026172041893005, + "grad_norm": 22.329120635986328, + "learning_rate": 4.5624936864957555e-07, + "logits/chosen": -0.3108750581741333, + "logits/rejected": -0.3043569326400757, + "logps/chosen": -140.65951538085938, + "logps/ref_chosen": -52.7564582824707, + "logps/ref_rejected": -81.96910095214844, + "logps/rejected": -249.93846130371094, + "loss": 1.0963, + "margin_dpo/margin_mean": 80.06629943847656, + "margin_dpo/margin_std": 105.62159729003906, + "step": 187 + }, + { + "KL/chosen_KL_mean": -83.09587097167969, + "KL/mean": -132.18377685546875, + "KL/rejected_KL_mean": -181.27166748046875, + "KL/std": 108.80839538574219, + "epoch": 0.27606461086637296, + "fcm_dpo/beta": 0.004818486049771309, + "fcm_dpo/delta": -0.07663469016551971, + "fcm_dpo/margin": 98.17579650878906, + "fcm_dpo/q_t": 0.3935472369194031, + "grad_norm": 30.24648094177246, + "learning_rate": 4.5552139770089454e-07, + "logits/chosen": -0.29885581135749817, + "logits/rejected": -0.3061617612838745, + "logps/chosen": -132.5113525390625, + "logps/ref_chosen": -49.415489196777344, + "logps/ref_rejected": -89.54043579101562, + "logps/rejected": -270.81207275390625, + "loss": 1.0432, + "margin_dpo/margin_mean": 98.17579650878906, + "margin_dpo/margin_std": 119.95960998535156, + "step": 188 + }, + { + "KL/chosen_KL_mean": -98.87287902832031, + "KL/mean": -138.66372680664062, + "KL/rejected_KL_mean": -178.45455932617188, + "KL/std": 109.66006469726562, + "epoch": 0.2775330396475771, + "fcm_dpo/beta": 0.004812294617295265, + "fcm_dpo/delta": 0.017675260081887245, + "fcm_dpo/margin": 79.58168029785156, + "fcm_dpo/q_t": 0.41616469621658325, + "grad_norm": 26.0447998046875, + "learning_rate": 4.5478801107224794e-07, + "logits/chosen": -0.3316497206687927, + "logits/rejected": -0.3185557723045349, + "logps/chosen": -151.27183532714844, + "logps/ref_chosen": -52.39896011352539, + "logps/ref_rejected": -72.16735076904297, + "logps/rejected": -250.62191772460938, + "loss": 1.1307, + "margin_dpo/margin_mean": 79.58168029785156, + "margin_dpo/margin_std": 134.16441345214844, + "step": 189 + }, + { + "KL/chosen_KL_mean": -106.78004455566406, + "KL/mean": -154.5462646484375, + "KL/rejected_KL_mean": -202.3125, + "KL/std": 118.31991577148438, + "epoch": 0.2790014684287812, + "fcm_dpo/beta": 0.004810405895113945, + "fcm_dpo/delta": -0.0634998232126236, + "fcm_dpo/margin": 95.53245544433594, + "fcm_dpo/q_t": 0.39865702390670776, + "grad_norm": 18.914052963256836, + "learning_rate": 4.5404922808905543e-07, + "logits/chosen": -0.3244064450263977, + "logits/rejected": -0.31318405270576477, + "logps/chosen": -171.46310424804688, + "logps/ref_chosen": -64.68305969238281, + "logps/ref_rejected": -102.55052185058594, + "logps/rejected": -304.863037109375, + "loss": 1.0853, + "margin_dpo/margin_mean": 95.53245544433594, + "margin_dpo/margin_std": 137.739990234375, + "step": 190 + }, + { + "KL/chosen_KL_mean": -99.22306823730469, + "KL/mean": -168.31130981445312, + "KL/rejected_KL_mean": -237.39950561523438, + "KL/std": 138.23770141601562, + "epoch": 0.28046989720998533, + "fcm_dpo/beta": 0.004560886882245541, + "fcm_dpo/delta": -0.24714502692222595, + "fcm_dpo/margin": 138.17645263671875, + "fcm_dpo/q_t": 0.3622073531150818, + "grad_norm": 21.69601058959961, + "learning_rate": 4.5330506821893565e-07, + "logits/chosen": -0.3293009102344513, + "logits/rejected": -0.30673933029174805, + "logps/chosen": -167.88194274902344, + "logps/ref_chosen": -68.65887451171875, + "logps/ref_rejected": -110.1396713256836, + "logps/rejected": -347.5391845703125, + "loss": 0.955, + "margin_dpo/margin_mean": 138.17645263671875, + "margin_dpo/margin_std": 153.68263244628906, + "step": 191 + }, + { + "KL/chosen_KL_mean": -127.37928009033203, + "KL/mean": -173.00198364257812, + "KL/rejected_KL_mean": -218.62469482421875, + "KL/std": 119.46993255615234, + "epoch": 0.28193832599118945, + "fcm_dpo/beta": 0.0045256055891513824, + "fcm_dpo/delta": -0.013612883165478706, + "fcm_dpo/margin": 91.24540710449219, + "fcm_dpo/q_t": 0.4095039367675781, + "grad_norm": 25.06287956237793, + "learning_rate": 4.5255555107119336e-07, + "logits/chosen": -0.29608154296875, + "logits/rejected": -0.2969720959663391, + "logps/chosen": -197.106201171875, + "logps/ref_chosen": -69.72691345214844, + "logps/ref_rejected": -103.32135009765625, + "logps/rejected": -321.946044921875, + "loss": 1.1165, + "margin_dpo/margin_mean": 91.24540710449219, + "margin_dpo/margin_std": 148.260009765625, + "step": 192 + }, + { + "KL/chosen_KL_mean": -125.8011474609375, + "KL/mean": -152.323486328125, + "KL/rejected_KL_mean": -178.84580993652344, + "KL/std": 109.30430603027344, + "epoch": 0.2834067547723935, + "fcm_dpo/beta": 0.0045287711545825005, + "fcm_dpo/delta": 0.041177622973918915, + "fcm_dpo/margin": 53.04465103149414, + "fcm_dpo/q_t": 0.44289711117744446, + "grad_norm": 29.155603408813477, + "learning_rate": 4.5180069639630236e-07, + "logits/chosen": -0.3240417540073395, + "logits/rejected": -0.319375216960907, + "logps/chosen": -185.99163818359375, + "logps/ref_chosen": -60.19049835205078, + "logps/ref_rejected": -76.40755462646484, + "logps/rejected": -255.25335693359375, + "loss": 1.2557, + "margin_dpo/margin_mean": 53.04465103149414, + "margin_dpo/margin_std": 140.07958984375, + "step": 193 + }, + { + "KL/chosen_KL_mean": -77.91348266601562, + "KL/mean": -120.96083068847656, + "KL/rejected_KL_mean": -164.00819396972656, + "KL/std": 90.18110656738281, + "epoch": 0.28487518355359764, + "fcm_dpo/beta": 0.004540526773780584, + "fcm_dpo/delta": 0.00930863805115223, + "fcm_dpo/margin": 86.09469604492188, + "fcm_dpo/q_t": 0.40839725732803345, + "grad_norm": 16.880617141723633, + "learning_rate": 4.510405240853854e-07, + "logits/chosen": -0.2117328941822052, + "logits/rejected": -0.19627614319324493, + "logps/chosen": -115.75386047363281, + "logps/ref_chosen": -37.84037399291992, + "logps/ref_rejected": -60.684783935546875, + "logps/rejected": -224.69296264648438, + "loss": 1.0819, + "margin_dpo/margin_mean": 86.09468841552734, + "margin_dpo/margin_std": 99.02046203613281, + "step": 194 + }, + { + "KL/chosen_KL_mean": -123.40607452392578, + "KL/mean": -170.14013671875, + "KL/rejected_KL_mean": -216.87420654296875, + "KL/std": 114.27099609375, + "epoch": 0.28634361233480177, + "fcm_dpo/beta": 0.004536244552582502, + "fcm_dpo/delta": -0.02507840283215046, + "fcm_dpo/margin": 93.46810913085938, + "fcm_dpo/q_t": 0.40300172567367554, + "grad_norm": 23.332624435424805, + "learning_rate": 4.5027505416968985e-07, + "logits/chosen": -0.2611733078956604, + "logits/rejected": -0.2804575562477112, + "logps/chosen": -178.29763793945312, + "logps/ref_chosen": -54.891571044921875, + "logps/ref_rejected": -96.77095794677734, + "logps/rejected": -313.6451416015625, + "loss": 1.0732, + "margin_dpo/margin_mean": 93.46810913085938, + "margin_dpo/margin_std": 118.67242431640625, + "step": 195 + }, + { + "KL/chosen_KL_mean": -94.62376403808594, + "KL/mean": -147.49710083007812, + "KL/rejected_KL_mean": -200.3704376220703, + "KL/std": 114.38645935058594, + "epoch": 0.2878120411160059, + "fcm_dpo/beta": 0.004456968978047371, + "fcm_dpo/delta": -0.07591746747493744, + "fcm_dpo/margin": 105.74667358398438, + "fcm_dpo/q_t": 0.3952023983001709, + "grad_norm": 17.2235107421875, + "learning_rate": 4.495043068200599e-07, + "logits/chosen": -0.2857983708381653, + "logits/rejected": -0.2708747684955597, + "logps/chosen": -147.8690185546875, + "logps/ref_chosen": -53.245243072509766, + "logps/ref_rejected": -76.05294799804688, + "logps/rejected": -276.42340087890625, + "loss": 1.0618, + "margin_dpo/margin_mean": 105.74667358398438, + "margin_dpo/margin_std": 138.9565887451172, + "step": 196 + }, + { + "KL/chosen_KL_mean": -97.66607666015625, + "KL/mean": -138.1141357421875, + "KL/rejected_KL_mean": -178.56219482421875, + "KL/std": 99.23837280273438, + "epoch": 0.28928046989721, + "fcm_dpo/beta": 0.0045026084408164024, + "fcm_dpo/delta": 0.03682290017604828, + "fcm_dpo/margin": 80.89613342285156, + "fcm_dpo/q_t": 0.4166555404663086, + "grad_norm": 18.91534996032715, + "learning_rate": 4.4872830234640493e-07, + "logits/chosen": -0.28415030241012573, + "logits/rejected": -0.2793928384780884, + "logps/chosen": -158.08641052246094, + "logps/ref_chosen": -60.42033386230469, + "logps/ref_rejected": -77.20890808105469, + "logps/rejected": -255.7711181640625, + "loss": 1.1147, + "margin_dpo/margin_mean": 80.89613342285156, + "margin_dpo/margin_std": 111.98675537109375, + "step": 197 + }, + { + "KL/chosen_KL_mean": -110.67520141601562, + "KL/mean": -162.27679443359375, + "KL/rejected_KL_mean": -213.87840270996094, + "KL/std": 122.85459899902344, + "epoch": 0.2907488986784141, + "fcm_dpo/beta": 0.004457796923816204, + "fcm_dpo/delta": -0.06293203681707382, + "fcm_dpo/margin": 103.20319366455078, + "fcm_dpo/q_t": 0.3982738256454468, + "grad_norm": 22.420948028564453, + "learning_rate": 4.479470611971645e-07, + "logits/chosen": -0.31583186984062195, + "logits/rejected": -0.31620723009109497, + "logps/chosen": -165.71139526367188, + "logps/ref_chosen": -55.03618621826172, + "logps/ref_rejected": -97.24325561523438, + "logps/rejected": -311.12164306640625, + "loss": 1.0625, + "margin_dpo/margin_mean": 103.20319366455078, + "margin_dpo/margin_std": 139.06460571289062, + "step": 198 + }, + { + "KL/chosen_KL_mean": -104.47000885009766, + "KL/mean": -155.94883728027344, + "KL/rejected_KL_mean": -207.42767333984375, + "KL/std": 111.98977661132812, + "epoch": 0.2922173274596182, + "fcm_dpo/beta": 0.004380636848509312, + "fcm_dpo/delta": -0.054408542811870575, + "fcm_dpo/margin": 102.95765686035156, + "fcm_dpo/q_t": 0.39817678928375244, + "grad_norm": 23.365877151489258, + "learning_rate": 4.471606039587695e-07, + "logits/chosen": -0.3535653352737427, + "logits/rejected": -0.3379266858100891, + "logps/chosen": -161.298828125, + "logps/ref_chosen": -56.828826904296875, + "logps/ref_rejected": -84.64820861816406, + "logps/rejected": -292.07586669921875, + "loss": 1.0706, + "margin_dpo/margin_mean": 102.95765686035156, + "margin_dpo/margin_std": 137.37387084960938, + "step": 199 + }, + { + "KL/chosen_KL_mean": -102.97698974609375, + "KL/mean": -154.0308837890625, + "KL/rejected_KL_mean": -205.0847625732422, + "KL/std": 120.30370330810547, + "epoch": 0.2936857562408223, + "fcm_dpo/beta": 0.004349041730165482, + "fcm_dpo/delta": -0.046485088765621185, + "fcm_dpo/margin": 102.1077880859375, + "fcm_dpo/q_t": 0.4016646146774292, + "grad_norm": 20.43732452392578, + "learning_rate": 4.4636895135509966e-07, + "logits/chosen": -0.28155362606048584, + "logits/rejected": -0.2662222385406494, + "logps/chosen": -156.04405212402344, + "logps/ref_chosen": -53.06706237792969, + "logps/ref_rejected": -80.60843658447266, + "logps/rejected": -285.6932067871094, + "loss": 1.0923, + "margin_dpo/margin_mean": 102.1077880859375, + "margin_dpo/margin_std": 154.43467712402344, + "step": 200 + }, + { + "KL/chosen_KL_mean": -107.32299041748047, + "KL/mean": -158.21717834472656, + "KL/rejected_KL_mean": -209.11135864257812, + "KL/std": 126.88838195800781, + "epoch": 0.29515418502202645, + "fcm_dpo/beta": 0.004335206001996994, + "fcm_dpo/delta": -0.04319122061133385, + "fcm_dpo/margin": 101.78836059570312, + "fcm_dpo/q_t": 0.4008065462112427, + "grad_norm": 20.02793312072754, + "learning_rate": 4.455721242469372e-07, + "logits/chosen": -0.3679494261741638, + "logits/rejected": -0.36475256085395813, + "logps/chosen": -182.7252197265625, + "logps/ref_chosen": -75.4022216796875, + "logps/ref_rejected": -114.80821990966797, + "logps/rejected": -323.9195861816406, + "loss": 1.0816, + "margin_dpo/margin_mean": 101.78836059570312, + "margin_dpo/margin_std": 143.44985961914062, + "step": 201 + }, + { + "KL/chosen_KL_mean": -111.05018615722656, + "KL/mean": -146.92918395996094, + "KL/rejected_KL_mean": -182.8081817626953, + "KL/std": 109.39289855957031, + "epoch": 0.2966226138032305, + "fcm_dpo/beta": 0.004375634714961052, + "fcm_dpo/delta": 0.08873856067657471, + "fcm_dpo/margin": 71.75799560546875, + "fcm_dpo/q_t": 0.43042024970054626, + "grad_norm": 20.026168823242188, + "learning_rate": 4.4477014363141755e-07, + "logits/chosen": -0.2920665740966797, + "logits/rejected": -0.30537718534469604, + "logps/chosen": -161.15150451660156, + "logps/ref_chosen": -50.101318359375, + "logps/ref_rejected": -86.98503112792969, + "logps/rejected": -269.793212890625, + "loss": 1.1858, + "margin_dpo/margin_mean": 71.75798797607422, + "margin_dpo/margin_std": 141.61204528808594, + "step": 202 + }, + { + "KL/chosen_KL_mean": -109.14225769042969, + "KL/mean": -152.8557586669922, + "KL/rejected_KL_mean": -196.5692138671875, + "KL/std": 111.0545654296875, + "epoch": 0.29809104258443464, + "fcm_dpo/beta": 0.004399011377245188, + "fcm_dpo/delta": 0.016021015122532845, + "fcm_dpo/margin": 87.4269790649414, + "fcm_dpo/q_t": 0.41227254271507263, + "grad_norm": 20.92191505432129, + "learning_rate": 4.439630306414758e-07, + "logits/chosen": -0.3324393033981323, + "logits/rejected": -0.321586549282074, + "logps/chosen": -169.751953125, + "logps/ref_chosen": -60.60969543457031, + "logps/ref_rejected": -85.89596557617188, + "logps/rejected": -282.4652099609375, + "loss": 1.1052, + "margin_dpo/margin_mean": 87.4269790649414, + "margin_dpo/margin_std": 122.52427673339844, + "step": 203 + }, + { + "KL/chosen_KL_mean": -122.31649780273438, + "KL/mean": -163.0527801513672, + "KL/rejected_KL_mean": -203.78907775878906, + "KL/std": 121.6878662109375, + "epoch": 0.29955947136563876, + "fcm_dpo/beta": 0.004431103356182575, + "fcm_dpo/delta": 0.04046226292848587, + "fcm_dpo/margin": 81.47259521484375, + "fcm_dpo/q_t": 0.4206123650074005, + "grad_norm": 26.054628372192383, + "learning_rate": 4.431508065452897e-07, + "logits/chosen": -0.4261574149131775, + "logits/rejected": -0.38839346170425415, + "logps/chosen": -202.4814453125, + "logps/ref_chosen": -80.16496276855469, + "logps/ref_rejected": -87.69590759277344, + "logps/rejected": -291.4849853515625, + "loss": 1.1504, + "margin_dpo/margin_mean": 81.47258758544922, + "margin_dpo/margin_std": 142.71099853515625, + "step": 204 + }, + { + "KL/chosen_KL_mean": -117.57252502441406, + "KL/mean": -171.2581024169922, + "KL/rejected_KL_mean": -224.94366455078125, + "KL/std": 124.95415496826172, + "epoch": 0.3010279001468429, + "fcm_dpo/beta": 0.004357962869107723, + "fcm_dpo/delta": -0.07328492403030396, + "fcm_dpo/margin": 107.37113189697266, + "fcm_dpo/q_t": 0.3927791714668274, + "grad_norm": 19.602901458740234, + "learning_rate": 4.4233349274571974e-07, + "logits/chosen": -0.34010183811187744, + "logits/rejected": -0.3110367953777313, + "logps/chosen": -176.95726013183594, + "logps/ref_chosen": -59.384735107421875, + "logps/ref_rejected": -85.12505340576172, + "logps/rejected": -310.0687255859375, + "loss": 1.0606, + "margin_dpo/margin_mean": 107.37113952636719, + "margin_dpo/margin_std": 136.42298889160156, + "step": 205 + }, + { + "KL/chosen_KL_mean": -108.13024139404297, + "KL/mean": -166.66561889648438, + "KL/rejected_KL_mean": -225.20098876953125, + "KL/std": 116.73199462890625, + "epoch": 0.302496328928047, + "fcm_dpo/beta": 0.004292918369174004, + "fcm_dpo/delta": -0.10862280428409576, + "fcm_dpo/margin": 117.07073974609375, + "fcm_dpo/q_t": 0.38319119811058044, + "grad_norm": 25.08537483215332, + "learning_rate": 4.415111107797445e-07, + "logits/chosen": -0.25357377529144287, + "logits/rejected": -0.25648266077041626, + "logps/chosen": -155.09474182128906, + "logps/ref_chosen": -46.964500427246094, + "logps/ref_rejected": -98.9534912109375, + "logps/rejected": -324.15447998046875, + "loss": 1.0155, + "margin_dpo/margin_mean": 117.07073974609375, + "margin_dpo/margin_std": 127.296875, + "step": 206 + }, + { + "KL/chosen_KL_mean": -99.36531829833984, + "KL/mean": -167.42587280273438, + "KL/rejected_KL_mean": -235.48643493652344, + "KL/std": 135.0901641845703, + "epoch": 0.3039647577092511, + "fcm_dpo/beta": 0.004175534471869469, + "fcm_dpo/delta": -0.1783892959356308, + "fcm_dpo/margin": 136.12110900878906, + "fcm_dpo/q_t": 0.37481075525283813, + "grad_norm": 22.83678436279297, + "learning_rate": 4.4068368231789365e-07, + "logits/chosen": -0.3456140458583832, + "logits/rejected": -0.3190155029296875, + "logps/chosen": -155.42156982421875, + "logps/ref_chosen": -56.05625915527344, + "logps/ref_rejected": -84.44779968261719, + "logps/rejected": -319.9342346191406, + "loss": 0.9899, + "margin_dpo/margin_mean": 136.12110900878906, + "margin_dpo/margin_std": 157.92230224609375, + "step": 207 + }, + { + "KL/chosen_KL_mean": -152.72695922851562, + "KL/mean": -205.685791015625, + "KL/rejected_KL_mean": -258.64459228515625, + "KL/std": 128.31884765625, + "epoch": 0.3054331864904552, + "fcm_dpo/beta": 0.004102812148630619, + "fcm_dpo/delta": -0.03629569336771965, + "fcm_dpo/margin": 105.91764068603516, + "fcm_dpo/q_t": 0.40152066946029663, + "grad_norm": 25.079753875732422, + "learning_rate": 4.398512291636768e-07, + "logits/chosen": -0.3910176157951355, + "logits/rejected": -0.3746778964996338, + "logps/chosen": -219.79457092285156, + "logps/ref_chosen": -67.06761169433594, + "logps/ref_rejected": -94.28689575195312, + "logps/rejected": -352.9315185546875, + "loss": 1.0928, + "margin_dpo/margin_mean": 105.91764831542969, + "margin_dpo/margin_std": 156.15579223632812, + "step": 208 + }, + { + "KL/chosen_KL_mean": -129.83139038085938, + "KL/mean": -176.5919952392578, + "KL/rejected_KL_mean": -223.35260009765625, + "KL/std": 117.19390869140625, + "epoch": 0.3069016152716593, + "fcm_dpo/beta": 0.004114994779229164, + "fcm_dpo/delta": 0.015758566558361053, + "fcm_dpo/margin": 93.5212173461914, + "fcm_dpo/q_t": 0.4131506383419037, + "grad_norm": 30.509929656982422, + "learning_rate": 4.3901377325300857e-07, + "logits/chosen": -0.26792603731155396, + "logits/rejected": -0.2554609179496765, + "logps/chosen": -186.01309204101562, + "logps/ref_chosen": -56.18169403076172, + "logps/ref_rejected": -80.94152069091797, + "logps/rejected": -304.29412841796875, + "loss": 1.1281, + "margin_dpo/margin_mean": 93.52120971679688, + "margin_dpo/margin_std": 149.02239990234375, + "step": 209 + }, + { + "KL/chosen_KL_mean": -116.4477767944336, + "KL/mean": -170.14013671875, + "KL/rejected_KL_mean": -223.83248901367188, + "KL/std": 119.5206069946289, + "epoch": 0.30837004405286345, + "fcm_dpo/beta": 0.004107258282601833, + "fcm_dpo/delta": -0.04311756044626236, + "fcm_dpo/margin": 107.38471984863281, + "fcm_dpo/q_t": 0.40103164315223694, + "grad_norm": 24.466625213623047, + "learning_rate": 4.381713366536311e-07, + "logits/chosen": -0.30964159965515137, + "logits/rejected": -0.3021623492240906, + "logps/chosen": -162.81959533691406, + "logps/ref_chosen": -46.371822357177734, + "logps/ref_rejected": -76.68162536621094, + "logps/rejected": -300.51409912109375, + "loss": 1.0767, + "margin_dpo/margin_mean": 107.38471221923828, + "margin_dpo/margin_std": 146.1619873046875, + "step": 210 + }, + { + "KL/chosen_KL_mean": -167.79052734375, + "KL/mean": -213.89144897460938, + "KL/rejected_KL_mean": -259.99237060546875, + "KL/std": 136.38101196289062, + "epoch": 0.30983847283406757, + "fcm_dpo/beta": 0.0040979161858558655, + "fcm_dpo/delta": 0.02301332727074623, + "fcm_dpo/margin": 92.20182800292969, + "fcm_dpo/q_t": 0.4184762239456177, + "grad_norm": 33.73249816894531, + "learning_rate": 4.373239415645323e-07, + "logits/chosen": -0.3087081015110016, + "logits/rejected": -0.26862210035324097, + "logps/chosen": -246.72286987304688, + "logps/ref_chosen": -78.93235778808594, + "logps/ref_rejected": -86.82098388671875, + "logps/rejected": -346.8133544921875, + "loss": 1.1425, + "margin_dpo/margin_mean": 92.20182800292969, + "margin_dpo/margin_std": 158.70635986328125, + "step": 211 + }, + { + "KL/chosen_KL_mean": -138.05943298339844, + "KL/mean": -203.9874267578125, + "KL/rejected_KL_mean": -269.9154052734375, + "KL/std": 145.75244140625, + "epoch": 0.31130690161527164, + "fcm_dpo/beta": 0.003989426419138908, + "fcm_dpo/delta": -0.13511215150356293, + "fcm_dpo/margin": 131.85598754882812, + "fcm_dpo/q_t": 0.382382869720459, + "grad_norm": 24.812414169311523, + "learning_rate": 4.3647161031536086e-07, + "logits/chosen": -0.26592007279396057, + "logits/rejected": -0.25487691164016724, + "logps/chosen": -196.25643920898438, + "logps/ref_chosen": -58.19701385498047, + "logps/ref_rejected": -103.05785369873047, + "logps/rejected": -372.9732666015625, + "loss": 1.0304, + "margin_dpo/margin_mean": 131.85598754882812, + "margin_dpo/margin_std": 161.92364501953125, + "step": 212 + }, + { + "KL/chosen_KL_mean": -129.4147186279297, + "KL/mean": -192.4105224609375, + "KL/rejected_KL_mean": -255.40631103515625, + "KL/std": 128.76019287109375, + "epoch": 0.31277533039647576, + "fcm_dpo/beta": 0.003918571397662163, + "fcm_dpo/delta": -0.09918186068534851, + "fcm_dpo/margin": 125.9916000366211, + "fcm_dpo/q_t": 0.3877994418144226, + "grad_norm": 32.999141693115234, + "learning_rate": 4.3561436536583774e-07, + "logits/chosen": -0.3247559368610382, + "logits/rejected": -0.30033838748931885, + "logps/chosen": -196.92742919921875, + "logps/ref_chosen": -67.51271057128906, + "logps/ref_rejected": -93.91471862792969, + "logps/rejected": -349.321044921875, + "loss": 1.0348, + "margin_dpo/margin_mean": 125.9916000366211, + "margin_dpo/margin_std": 152.36477661132812, + "step": 213 + }, + { + "KL/chosen_KL_mean": -111.72816467285156, + "KL/mean": -168.243896484375, + "KL/rejected_KL_mean": -224.75961303710938, + "KL/std": 126.36701965332031, + "epoch": 0.3142437591776799, + "fcm_dpo/beta": 0.0038879900239408016, + "fcm_dpo/delta": -0.04128566384315491, + "fcm_dpo/margin": 113.03146362304688, + "fcm_dpo/q_t": 0.40068867802619934, + "grad_norm": 22.442670822143555, + "learning_rate": 4.3475222930516473e-07, + "logits/chosen": -0.2424134612083435, + "logits/rejected": -0.24728670716285706, + "logps/chosen": -153.3330535888672, + "logps/ref_chosen": -41.604888916015625, + "logps/ref_rejected": -77.51741027832031, + "logps/rejected": -302.27703857421875, + "loss": 1.0691, + "margin_dpo/margin_mean": 113.03147888183594, + "margin_dpo/margin_std": 146.84898376464844, + "step": 214 + }, + { + "KL/chosen_KL_mean": -130.1924591064453, + "KL/mean": -189.68450927734375, + "KL/rejected_KL_mean": -249.17657470703125, + "KL/std": 129.92074584960938, + "epoch": 0.315712187958884, + "fcm_dpo/beta": 0.0038366110529750586, + "fcm_dpo/delta": -0.05947209149599075, + "fcm_dpo/margin": 118.9841079711914, + "fcm_dpo/q_t": 0.3948417901992798, + "grad_norm": 26.03775405883789, + "learning_rate": 4.3388522485142885e-07, + "logits/chosen": -0.27569520473480225, + "logits/rejected": -0.2664262354373932, + "logps/chosen": -183.4717254638672, + "logps/ref_chosen": -53.279266357421875, + "logps/ref_rejected": -89.96464538574219, + "logps/rejected": -339.1412353515625, + "loss": 1.044, + "margin_dpo/margin_mean": 118.9841079711914, + "margin_dpo/margin_std": 135.40081787109375, + "step": 215 + }, + { + "KL/chosen_KL_mean": -134.6547088623047, + "KL/mean": -192.75794982910156, + "KL/rejected_KL_mean": -250.8612060546875, + "KL/std": 135.34701538085938, + "epoch": 0.31718061674008813, + "fcm_dpo/beta": 0.0038237408734858036, + "fcm_dpo/delta": -0.046559788286685944, + "fcm_dpo/margin": 116.20650482177734, + "fcm_dpo/q_t": 0.4005330502986908, + "grad_norm": 25.67644691467285, + "learning_rate": 4.330133748510036e-07, + "logits/chosen": -0.2784517705440521, + "logits/rejected": -0.26232653856277466, + "logps/chosen": -183.54251098632812, + "logps/ref_chosen": -48.887794494628906, + "logps/ref_rejected": -77.19892883300781, + "logps/rejected": -328.06011962890625, + "loss": 1.085, + "margin_dpo/margin_mean": 116.20650482177734, + "margin_dpo/margin_std": 166.7809295654297, + "step": 216 + }, + { + "KL/chosen_KL_mean": -133.6094970703125, + "KL/mean": -202.16543579101562, + "KL/rejected_KL_mean": -270.72137451171875, + "KL/std": 137.3214111328125, + "epoch": 0.3186490455212922, + "fcm_dpo/beta": 0.0037270013708621264, + "fcm_dpo/delta": -0.11725132167339325, + "fcm_dpo/margin": 137.11187744140625, + "fcm_dpo/q_t": 0.3843996822834015, + "grad_norm": 21.137720108032227, + "learning_rate": 4.3213670227794757e-07, + "logits/chosen": -0.26740846037864685, + "logits/rejected": -0.2628672122955322, + "logps/chosen": -183.45480346679688, + "logps/ref_chosen": -49.845306396484375, + "logps/ref_rejected": -100.07832336425781, + "logps/rejected": -370.7996826171875, + "loss": 1.0138, + "margin_dpo/margin_mean": 137.11187744140625, + "margin_dpo/margin_std": 154.92494201660156, + "step": 217 + }, + { + "KL/chosen_KL_mean": -141.71827697753906, + "KL/mean": -194.88088989257812, + "KL/rejected_KL_mean": -248.04351806640625, + "KL/std": 136.5504150390625, + "epoch": 0.3201174743024963, + "fcm_dpo/beta": 0.0037083416245877743, + "fcm_dpo/delta": 0.0059033287689089775, + "fcm_dpo/margin": 106.32524108886719, + "fcm_dpo/q_t": 0.4114874601364136, + "grad_norm": 22.63395881652832, + "learning_rate": 4.3125523023339815e-07, + "logits/chosen": -0.2704193592071533, + "logits/rejected": -0.26462244987487793, + "logps/chosen": -200.29495239257812, + "logps/ref_chosen": -58.576683044433594, + "logps/ref_rejected": -87.84639739990234, + "logps/rejected": -335.8899230957031, + "loss": 1.1123, + "margin_dpo/margin_mean": 106.32524108886719, + "margin_dpo/margin_std": 159.38711547851562, + "step": 218 + }, + { + "KL/chosen_KL_mean": -149.95846557617188, + "KL/mean": -195.880859375, + "KL/rejected_KL_mean": -241.8032684326172, + "KL/std": 140.34046936035156, + "epoch": 0.32158590308370044, + "fcm_dpo/beta": 0.0037643599789589643, + "fcm_dpo/delta": 0.05545644462108612, + "fcm_dpo/margin": 91.84481048583984, + "fcm_dpo/q_t": 0.4227282404899597, + "grad_norm": 27.498462677001953, + "learning_rate": 4.303689819449636e-07, + "logits/chosen": -0.34284111857414246, + "logits/rejected": -0.33793115615844727, + "logps/chosen": -211.04232788085938, + "logps/ref_chosen": -61.083858489990234, + "logps/ref_rejected": -85.83042907714844, + "logps/rejected": -327.6336975097656, + "loss": 1.1678, + "margin_dpo/margin_mean": 91.84481048583984, + "margin_dpo/margin_std": 171.24391174316406, + "step": 219 + }, + { + "KL/chosen_KL_mean": -168.154296875, + "KL/mean": -206.7792205810547, + "KL/rejected_KL_mean": -245.40414428710938, + "KL/std": 120.42225646972656, + "epoch": 0.32305433186490456, + "fcm_dpo/beta": 0.00381092494353652, + "fcm_dpo/delta": 0.10894529521465302, + "fcm_dpo/margin": 77.24984741210938, + "fcm_dpo/q_t": 0.4311019778251648, + "grad_norm": 24.416948318481445, + "learning_rate": 4.2947798076611047e-07, + "logits/chosen": -0.2805694043636322, + "logits/rejected": -0.2573145031929016, + "logps/chosen": -238.18557739257812, + "logps/ref_chosen": -70.03128051757812, + "logps/ref_rejected": -87.68551635742188, + "logps/rejected": -333.08966064453125, + "loss": 1.1685, + "margin_dpo/margin_mean": 77.24984741210938, + "margin_dpo/margin_std": 125.33375549316406, + "step": 220 + }, + { + "KL/chosen_KL_mean": -144.7303466796875, + "KL/mean": -229.44322204589844, + "KL/rejected_KL_mean": -314.1560974121094, + "KL/std": 154.45883178710938, + "epoch": 0.3245227606461087, + "fcm_dpo/beta": 0.0037145623937249184, + "fcm_dpo/delta": -0.24419276416301727, + "fcm_dpo/margin": 169.42575073242188, + "fcm_dpo/q_t": 0.3563760221004486, + "grad_norm": 25.376220703125, + "learning_rate": 4.285822501755485e-07, + "logits/chosen": -0.28012099862098694, + "logits/rejected": -0.28704455494880676, + "logps/chosen": -196.8850555419922, + "logps/ref_chosen": -52.15470886230469, + "logps/ref_rejected": -106.46768188476562, + "logps/rejected": -420.623779296875, + "loss": 0.9329, + "margin_dpo/margin_mean": 169.42575073242188, + "margin_dpo/margin_std": 157.756103515625, + "step": 221 + }, + { + "KL/chosen_KL_mean": -154.47286987304688, + "KL/mean": -217.7156982421875, + "KL/rejected_KL_mean": -280.95849609375, + "KL/std": 141.71084594726562, + "epoch": 0.32599118942731276, + "fcm_dpo/beta": 0.0036375990603119135, + "fcm_dpo/delta": -0.06309865415096283, + "fcm_dpo/margin": 126.48562622070312, + "fcm_dpo/q_t": 0.39524269104003906, + "grad_norm": 21.781951904296875, + "learning_rate": 4.276818137766118e-07, + "logits/chosen": -0.3290114402770996, + "logits/rejected": -0.3324123024940491, + "logps/chosen": -215.4439697265625, + "logps/ref_chosen": -60.971099853515625, + "logps/ref_rejected": -100.00115203857422, + "logps/rejected": -380.95965576171875, + "loss": 1.0556, + "margin_dpo/margin_mean": 126.4856185913086, + "margin_dpo/margin_std": 158.17788696289062, + "step": 222 + }, + { + "KL/chosen_KL_mean": -162.42262268066406, + "KL/mean": -216.47817993164062, + "KL/rejected_KL_mean": -270.53375244140625, + "KL/std": 139.66900634765625, + "epoch": 0.3274596182085169, + "fcm_dpo/beta": 0.0036115439143031836, + "fcm_dpo/delta": 0.009921977296471596, + "fcm_dpo/margin": 108.11112213134766, + "fcm_dpo/q_t": 0.4122130274772644, + "grad_norm": 25.37874412536621, + "learning_rate": 4.2677669529663686e-07, + "logits/chosen": -0.24600395560264587, + "logits/rejected": -0.2434278130531311, + "logps/chosen": -215.06320190429688, + "logps/ref_chosen": -52.64057540893555, + "logps/ref_rejected": -82.82502746582031, + "logps/rejected": -353.3587646484375, + "loss": 1.1264, + "margin_dpo/margin_mean": 108.11112213134766, + "margin_dpo/margin_std": 176.22784423828125, + "step": 223 + }, + { + "KL/chosen_KL_mean": -141.04718017578125, + "KL/mean": -202.54745483398438, + "KL/rejected_KL_mean": -264.0477294921875, + "KL/std": 154.926513671875, + "epoch": 0.328928046989721, + "fcm_dpo/beta": 0.003568105399608612, + "fcm_dpo/delta": -0.04215101897716522, + "fcm_dpo/margin": 123.00054168701172, + "fcm_dpo/q_t": 0.4034996032714844, + "grad_norm": 23.84183120727539, + "learning_rate": 4.2586691858633747e-07, + "logits/chosen": -0.3231106102466583, + "logits/rejected": -0.3093082904815674, + "logps/chosen": -189.64259338378906, + "logps/ref_chosen": -48.59541320800781, + "logps/ref_rejected": -77.11648559570312, + "logps/rejected": -341.1642150878906, + "loss": 1.0901, + "margin_dpo/margin_mean": 123.00054168701172, + "margin_dpo/margin_std": 179.27581787109375, + "step": 224 + }, + { + "KL/chosen_KL_mean": -163.38140869140625, + "KL/mean": -235.73553466796875, + "KL/rejected_KL_mean": -308.089599609375, + "KL/std": 149.63555908203125, + "epoch": 0.3303964757709251, + "fcm_dpo/beta": 0.003522678278386593, + "fcm_dpo/delta": -0.11579211056232452, + "fcm_dpo/margin": 144.70822143554688, + "fcm_dpo/q_t": 0.3856618404388428, + "grad_norm": 22.832763671875, + "learning_rate": 4.249525076191759e-07, + "logits/chosen": -0.30160531401634216, + "logits/rejected": -0.29380398988723755, + "logps/chosen": -221.38186645507812, + "logps/ref_chosen": -58.000465393066406, + "logps/ref_rejected": -99.90291595458984, + "logps/rejected": -407.9925231933594, + "loss": 1.0322, + "margin_dpo/margin_mean": 144.70822143554688, + "margin_dpo/margin_std": 182.02337646484375, + "step": 225 + }, + { + "KL/chosen_KL_mean": -134.9498291015625, + "KL/mean": -192.45901489257812, + "KL/rejected_KL_mean": -249.96820068359375, + "KL/std": 144.02420043945312, + "epoch": 0.33186490455212925, + "fcm_dpo/beta": 0.0034828565549105406, + "fcm_dpo/delta": -0.001416236162185669, + "fcm_dpo/margin": 115.01838684082031, + "fcm_dpo/q_t": 0.4106452763080597, + "grad_norm": 30.9562931060791, + "learning_rate": 4.2403348649073167e-07, + "logits/chosen": -0.36006784439086914, + "logits/rejected": -0.32199037075042725, + "logps/chosen": -193.8486328125, + "logps/ref_chosen": -58.898799896240234, + "logps/ref_rejected": -78.68775939941406, + "logps/rejected": -328.65594482421875, + "loss": 1.1053, + "margin_dpo/margin_mean": 115.01838684082031, + "margin_dpo/margin_std": 166.55606079101562, + "step": 226 + }, + { + "KL/chosen_KL_mean": -160.70245361328125, + "KL/mean": -233.0044403076172, + "KL/rejected_KL_mean": -305.3064270019531, + "KL/std": 166.17343139648438, + "epoch": 0.3333333333333333, + "fcm_dpo/beta": 0.0034378478303551674, + "fcm_dpo/delta": -0.10290348529815674, + "fcm_dpo/margin": 144.60397338867188, + "fcm_dpo/q_t": 0.38717547059059143, + "grad_norm": 22.616573333740234, + "learning_rate": 4.2310987941806615e-07, + "logits/chosen": -0.36182135343551636, + "logits/rejected": -0.3515620529651642, + "logps/chosen": -219.77462768554688, + "logps/ref_chosen": -59.072181701660156, + "logps/ref_rejected": -99.41236877441406, + "logps/rejected": -404.71881103515625, + "loss": 1.0293, + "margin_dpo/margin_mean": 144.60397338867188, + "margin_dpo/margin_std": 172.94851684570312, + "step": 227 + }, + { + "KL/chosen_KL_mean": -162.9698028564453, + "KL/mean": -214.10031127929688, + "KL/rejected_KL_mean": -265.2308349609375, + "KL/std": 134.146728515625, + "epoch": 0.33480176211453744, + "fcm_dpo/beta": 0.0034628671128302813, + "fcm_dpo/delta": 0.0474376454949379, + "fcm_dpo/margin": 102.26102447509766, + "fcm_dpo/q_t": 0.418906033039093, + "grad_norm": 26.848163604736328, + "learning_rate": 4.2218171073908463e-07, + "logits/chosen": -0.31502634286880493, + "logits/rejected": -0.2975466251373291, + "logps/chosen": -228.861083984375, + "logps/ref_chosen": -65.89128875732422, + "logps/ref_rejected": -91.04875183105469, + "logps/rejected": -356.2795715332031, + "loss": 1.1398, + "margin_dpo/margin_mean": 102.26102447509766, + "margin_dpo/margin_std": 164.67837524414062, + "step": 228 + }, + { + "KL/chosen_KL_mean": -167.70614624023438, + "KL/mean": -222.6710968017578, + "KL/rejected_KL_mean": -277.63604736328125, + "KL/std": 156.47640991210938, + "epoch": 0.33627019089574156, + "fcm_dpo/beta": 0.003477250225841999, + "fcm_dpo/delta": 0.018432918936014175, + "fcm_dpo/margin": 109.92990112304688, + "fcm_dpo/q_t": 0.41217368841171265, + "grad_norm": 31.090105056762695, + "learning_rate": 4.212490049118951e-07, + "logits/chosen": -0.40321260690689087, + "logits/rejected": -0.37525972723960876, + "logps/chosen": -238.4125213623047, + "logps/ref_chosen": -70.70637512207031, + "logps/ref_rejected": -84.52741241455078, + "logps/rejected": -362.1634521484375, + "loss": 1.1155, + "margin_dpo/margin_mean": 109.92990112304688, + "margin_dpo/margin_std": 164.13958740234375, + "step": 229 + }, + { + "KL/chosen_KL_mean": -126.01810455322266, + "KL/mean": -208.7505340576172, + "KL/rejected_KL_mean": -291.48297119140625, + "KL/std": 146.91603088378906, + "epoch": 0.3377386196769457, + "fcm_dpo/beta": 0.0033868225291371346, + "fcm_dpo/delta": -0.17020674049854279, + "fcm_dpo/margin": 165.46487426757812, + "fcm_dpo/q_t": 0.37030667066574097, + "grad_norm": 24.862947463989258, + "learning_rate": 4.203117865141635e-07, + "logits/chosen": -0.31466907262802124, + "logits/rejected": -0.31940126419067383, + "logps/chosen": -165.30010986328125, + "logps/ref_chosen": -39.282005310058594, + "logps/ref_rejected": -85.62191009521484, + "logps/rejected": -377.1048889160156, + "loss": 0.9708, + "margin_dpo/margin_mean": 165.46487426757812, + "margin_dpo/margin_std": 161.74786376953125, + "step": 230 + }, + { + "KL/chosen_KL_mean": -148.27967834472656, + "KL/mean": -205.89512634277344, + "KL/rejected_KL_mean": -263.51055908203125, + "KL/std": 131.91383361816406, + "epoch": 0.3392070484581498, + "fcm_dpo/beta": 0.003367940429598093, + "fcm_dpo/delta": 0.01239101029932499, + "fcm_dpo/margin": 115.23086547851562, + "fcm_dpo/q_t": 0.4120003581047058, + "grad_norm": 23.897953033447266, + "learning_rate": 4.1937008024246625e-07, + "logits/chosen": -0.3472822308540344, + "logits/rejected": -0.31850665807724, + "logps/chosen": -211.55612182617188, + "logps/ref_chosen": -63.27644348144531, + "logps/ref_rejected": -74.1239013671875, + "logps/rejected": -337.63446044921875, + "loss": 1.0961, + "margin_dpo/margin_mean": 115.23086547851562, + "margin_dpo/margin_std": 152.61228942871094, + "step": 231 + }, + { + "KL/chosen_KL_mean": -182.43360900878906, + "KL/mean": -230.69378662109375, + "KL/rejected_KL_mean": -278.9539794921875, + "KL/std": 157.08425903320312, + "epoch": 0.3406754772393539, + "fcm_dpo/beta": 0.0034012598916888237, + "fcm_dpo/delta": 0.07421056926250458, + "fcm_dpo/margin": 96.52035522460938, + "fcm_dpo/q_t": 0.4272800087928772, + "grad_norm": 25.275489807128906, + "learning_rate": 4.1842391091163933e-07, + "logits/chosen": -0.36489853262901306, + "logits/rejected": -0.3447696268558502, + "logps/chosen": -253.182373046875, + "logps/ref_chosen": -70.74876403808594, + "logps/ref_rejected": -83.97706604003906, + "logps/rejected": -362.9310302734375, + "loss": 1.1552, + "margin_dpo/margin_mean": 96.52035522460938, + "margin_dpo/margin_std": 163.61196899414062, + "step": 232 + }, + { + "KL/chosen_KL_mean": -166.96878051757812, + "KL/mean": -240.413330078125, + "KL/rejected_KL_mean": -313.8578796386719, + "KL/std": 165.35609436035156, + "epoch": 0.342143906020558, + "fcm_dpo/beta": 0.003372794948518276, + "fcm_dpo/delta": -0.10033433884382248, + "fcm_dpo/margin": 146.88909912109375, + "fcm_dpo/q_t": 0.3924492597579956, + "grad_norm": 26.219318389892578, + "learning_rate": 4.174733034541245e-07, + "logits/chosen": -0.31235527992248535, + "logits/rejected": -0.3153640925884247, + "logps/chosen": -221.85171508789062, + "logps/ref_chosen": -54.8829345703125, + "logps/ref_rejected": -107.4800796508789, + "logps/rejected": -421.33795166015625, + "loss": 1.0631, + "margin_dpo/margin_mean": 146.88909912109375, + "margin_dpo/margin_std": 210.21575927734375, + "step": 233 + }, + { + "KL/chosen_KL_mean": -165.90899658203125, + "KL/mean": -241.48690795898438, + "KL/rejected_KL_mean": -317.0648193359375, + "KL/std": 148.5336151123047, + "epoch": 0.3436123348017621, + "fcm_dpo/beta": 0.0032793269492685795, + "fcm_dpo/delta": -0.1023728996515274, + "fcm_dpo/margin": 151.15582275390625, + "fcm_dpo/q_t": 0.38733774423599243, + "grad_norm": 41.00167465209961, + "learning_rate": 4.165182829193126e-07, + "logits/chosen": -0.3150627017021179, + "logits/rejected": -0.34308189153671265, + "logps/chosen": -210.00350952148438, + "logps/ref_chosen": -44.094520568847656, + "logps/ref_rejected": -100.00663757324219, + "logps/rejected": -417.07147216796875, + "loss": 1.0245, + "margin_dpo/margin_mean": 151.15582275390625, + "margin_dpo/margin_std": 169.50576782226562, + "step": 234 + }, + { + "KL/chosen_KL_mean": -197.6452178955078, + "KL/mean": -246.17535400390625, + "KL/rejected_KL_mean": -294.70550537109375, + "KL/std": 142.49789428710938, + "epoch": 0.34508076358296624, + "fcm_dpo/beta": 0.0033345932606607676, + "fcm_dpo/delta": 0.07840821146965027, + "fcm_dpo/margin": 97.06028747558594, + "fcm_dpo/q_t": 0.42566415667533875, + "grad_norm": 30.126893997192383, + "learning_rate": 4.1555887447288255e-07, + "logits/chosen": -0.36974847316741943, + "logits/rejected": -0.35245949029922485, + "logps/chosen": -259.88311767578125, + "logps/ref_chosen": -62.237911224365234, + "logps/ref_rejected": -90.39506530761719, + "logps/rejected": -385.1005859375, + "loss": 1.166, + "margin_dpo/margin_mean": 97.06028747558594, + "margin_dpo/margin_std": 170.45330810546875, + "step": 235 + }, + { + "KL/chosen_KL_mean": -141.45745849609375, + "KL/mean": -219.01951599121094, + "KL/rejected_KL_mean": -296.58160400390625, + "KL/std": 149.04501342773438, + "epoch": 0.3465491923641703, + "fcm_dpo/beta": 0.003280568402260542, + "fcm_dpo/delta": -0.11472684144973755, + "fcm_dpo/margin": 155.12411499023438, + "fcm_dpo/q_t": 0.3806627690792084, + "grad_norm": 48.95918273925781, + "learning_rate": 4.1459510339613946e-07, + "logits/chosen": -0.3421390652656555, + "logits/rejected": -0.341775506734848, + "logps/chosen": -190.79881286621094, + "logps/ref_chosen": -49.34136199951172, + "logps/ref_rejected": -103.51162719726562, + "logps/rejected": -400.09320068359375, + "loss": 0.9902, + "margin_dpo/margin_mean": 155.12413024902344, + "margin_dpo/margin_std": 140.13827514648438, + "step": 236 + }, + { + "KL/chosen_KL_mean": -187.8695068359375, + "KL/mean": -248.0149383544922, + "KL/rejected_KL_mean": -308.1603698730469, + "KL/std": 149.92160034179688, + "epoch": 0.34801762114537443, + "fcm_dpo/beta": 0.0032692216336727142, + "fcm_dpo/delta": 0.006964612752199173, + "fcm_dpo/margin": 120.29085540771484, + "fcm_dpo/q_t": 0.41055381298065186, + "grad_norm": 27.945327758789062, + "learning_rate": 4.136269950853473e-07, + "logits/chosen": -0.3814457356929779, + "logits/rejected": -0.3774615526199341, + "logps/chosen": -242.03762817382812, + "logps/ref_chosen": -54.168121337890625, + "logps/ref_rejected": -94.78036499023438, + "logps/rejected": -402.94073486328125, + "loss": 1.1038, + "margin_dpo/margin_mean": 120.29085540771484, + "margin_dpo/margin_std": 172.14694213867188, + "step": 237 + }, + { + "KL/chosen_KL_mean": -166.24346923828125, + "KL/mean": -225.47650146484375, + "KL/rejected_KL_mean": -284.70953369140625, + "KL/std": 151.5367889404297, + "epoch": 0.34948604992657856, + "fcm_dpo/beta": 0.0032576932571828365, + "fcm_dpo/delta": 0.01427885890007019, + "fcm_dpo/margin": 118.46604919433594, + "fcm_dpo/q_t": 0.41405510902404785, + "grad_norm": 25.66066551208496, + "learning_rate": 4.126545750510605e-07, + "logits/chosen": -0.3540055751800537, + "logits/rejected": -0.3696235418319702, + "logps/chosen": -220.21658325195312, + "logps/ref_chosen": -53.973121643066406, + "logps/ref_rejected": -89.41795349121094, + "logps/rejected": -374.1274719238281, + "loss": 1.1057, + "margin_dpo/margin_mean": 118.46604919433594, + "margin_dpo/margin_std": 167.74099731445312, + "step": 238 + }, + { + "KL/chosen_KL_mean": -176.48590087890625, + "KL/mean": -241.95834350585938, + "KL/rejected_KL_mean": -307.4307861328125, + "KL/std": 140.23287963867188, + "epoch": 0.3509544787077827, + "fcm_dpo/beta": 0.0032407566905021667, + "fcm_dpo/delta": -0.026493586599826813, + "fcm_dpo/margin": 130.94488525390625, + "fcm_dpo/q_t": 0.40173038840293884, + "grad_norm": 41.689571380615234, + "learning_rate": 4.116778689174514e-07, + "logits/chosen": -0.36024659872055054, + "logits/rejected": -0.3487205505371094, + "logps/chosen": -234.58370971679688, + "logps/ref_chosen": -58.09782409667969, + "logps/ref_rejected": -93.59294128417969, + "logps/rejected": -401.02374267578125, + "loss": 1.0726, + "margin_dpo/margin_mean": 130.94488525390625, + "margin_dpo/margin_std": 158.7174530029297, + "step": 239 + }, + { + "KL/chosen_KL_mean": -184.19223022460938, + "KL/mean": -238.87380981445312, + "KL/rejected_KL_mean": -293.5553894042969, + "KL/std": 150.6685028076172, + "epoch": 0.3524229074889868, + "fcm_dpo/beta": 0.003267391351982951, + "fcm_dpo/delta": 0.044143058359622955, + "fcm_dpo/margin": 109.3631820678711, + "fcm_dpo/q_t": 0.41868656873703003, + "grad_norm": 34.611045837402344, + "learning_rate": 4.106969024216348e-07, + "logits/chosen": -0.38304704427719116, + "logits/rejected": -0.362338662147522, + "logps/chosen": -244.80670166015625, + "logps/ref_chosen": -60.6144905090332, + "logps/ref_rejected": -74.1185302734375, + "logps/rejected": -367.6739196777344, + "loss": 1.1445, + "margin_dpo/margin_mean": 109.36317443847656, + "margin_dpo/margin_std": 180.47584533691406, + "step": 240 + }, + { + "KL/chosen_KL_mean": -152.53372192382812, + "KL/mean": -237.73934936523438, + "KL/rejected_KL_mean": -322.9449462890625, + "KL/std": 166.03857421875, + "epoch": 0.35389133627019087, + "fcm_dpo/beta": 0.0032168994657695293, + "fcm_dpo/delta": -0.15657520294189453, + "fcm_dpo/margin": 170.4112548828125, + "fcm_dpo/q_t": 0.37778547406196594, + "grad_norm": 30.021371841430664, + "learning_rate": 4.097117014129903e-07, + "logits/chosen": -0.4348849952220917, + "logits/rejected": -0.4122951626777649, + "logps/chosen": -218.62478637695312, + "logps/ref_chosen": -66.091064453125, + "logps/ref_rejected": -88.06088256835938, + "logps/rejected": -411.005859375, + "loss": 1.0002, + "margin_dpo/margin_mean": 170.4112548828125, + "margin_dpo/margin_std": 193.95298767089844, + "step": 241 + }, + { + "KL/chosen_KL_mean": -178.14532470703125, + "KL/mean": -239.86007690429688, + "KL/rejected_KL_mean": -301.57489013671875, + "KL/std": 146.30352783203125, + "epoch": 0.355359765051395, + "fcm_dpo/beta": 0.0032022669911384583, + "fcm_dpo/delta": 0.004475157707929611, + "fcm_dpo/margin": 123.42952728271484, + "fcm_dpo/q_t": 0.4114469289779663, + "grad_norm": 35.95293045043945, + "learning_rate": 4.087222918524807e-07, + "logits/chosen": -0.35760676860809326, + "logits/rejected": -0.33343029022216797, + "logps/chosen": -246.00924682617188, + "logps/ref_chosen": -67.86392974853516, + "logps/ref_rejected": -83.36033630371094, + "logps/rejected": -384.9352111816406, + "loss": 1.1033, + "margin_dpo/margin_mean": 123.42953491210938, + "margin_dpo/margin_std": 175.8994140625, + "step": 242 + }, + { + "KL/chosen_KL_mean": -181.12774658203125, + "KL/mean": -257.05047607421875, + "KL/rejected_KL_mean": -332.9732360839844, + "KL/std": 163.24075317382812, + "epoch": 0.3568281938325991, + "fcm_dpo/beta": 0.0031442558392882347, + "fcm_dpo/delta": -0.08152244985103607, + "fcm_dpo/margin": 151.84548950195312, + "fcm_dpo/q_t": 0.3913443386554718, + "grad_norm": 27.06866455078125, + "learning_rate": 4.07728699811968e-07, + "logits/chosen": -0.34606635570526123, + "logits/rejected": -0.3166462182998657, + "logps/chosen": -244.2119903564453, + "logps/ref_chosen": -63.0842399597168, + "logps/ref_rejected": -76.33563232421875, + "logps/rejected": -409.3088684082031, + "loss": 1.0419, + "margin_dpo/margin_mean": 151.84548950195312, + "margin_dpo/margin_std": 185.63485717773438, + "step": 243 + }, + { + "KL/chosen_KL_mean": -161.24728393554688, + "KL/mean": -241.1260986328125, + "KL/rejected_KL_mean": -321.0049133300781, + "KL/std": 159.36138916015625, + "epoch": 0.35829662261380324, + "fcm_dpo/beta": 0.0030848030000925064, + "fcm_dpo/delta": -0.09792040288448334, + "fcm_dpo/margin": 159.75762939453125, + "fcm_dpo/q_t": 0.3867965638637543, + "grad_norm": 32.62556076049805, + "learning_rate": 4.067309514735267e-07, + "logits/chosen": -0.4207112193107605, + "logits/rejected": -0.4130573272705078, + "logps/chosen": -222.38796997070312, + "logps/ref_chosen": -61.140689849853516, + "logps/ref_rejected": -94.89193725585938, + "logps/rejected": -415.8968505859375, + "loss": 1.0159, + "margin_dpo/margin_mean": 159.75762939453125, + "margin_dpo/margin_std": 168.3123779296875, + "step": 244 + }, + { + "KL/chosen_KL_mean": -189.32240295410156, + "KL/mean": -250.90139770507812, + "KL/rejected_KL_mean": -312.48040771484375, + "KL/std": 155.51824951171875, + "epoch": 0.35976505139500736, + "fcm_dpo/beta": 0.003060833550989628, + "fcm_dpo/delta": 0.022731080651283264, + "fcm_dpo/margin": 123.15798950195312, + "fcm_dpo/q_t": 0.4138393700122833, + "grad_norm": 28.120004653930664, + "learning_rate": 4.057290731287531e-07, + "logits/chosen": -0.3828911781311035, + "logits/rejected": -0.35266777873039246, + "logps/chosen": -256.5846862792969, + "logps/ref_chosen": -67.26228332519531, + "logps/ref_rejected": -87.64010620117188, + "logps/rejected": -400.1204833984375, + "loss": 1.1166, + "margin_dpo/margin_mean": 123.15798950195312, + "margin_dpo/margin_std": 173.21768188476562, + "step": 245 + }, + { + "KL/chosen_KL_mean": -184.933349609375, + "KL/mean": -248.30307006835938, + "KL/rejected_KL_mean": -311.67279052734375, + "KL/std": 172.1173858642578, + "epoch": 0.36123348017621143, + "fcm_dpo/beta": 0.0030869655311107635, + "fcm_dpo/delta": 0.009035417810082436, + "fcm_dpo/margin": 126.73939514160156, + "fcm_dpo/q_t": 0.4123349189758301, + "grad_norm": 25.968130111694336, + "learning_rate": 4.047230911780736e-07, + "logits/chosen": -0.43583017587661743, + "logits/rejected": -0.39911651611328125, + "logps/chosen": -251.6303253173828, + "logps/ref_chosen": -66.69696807861328, + "logps/ref_rejected": -84.34634399414062, + "logps/rejected": -396.01910400390625, + "loss": 1.108, + "margin_dpo/margin_mean": 126.73939514160156, + "margin_dpo/margin_std": 186.197265625, + "step": 246 + }, + { + "KL/chosen_KL_mean": -217.3389129638672, + "KL/mean": -306.2090148925781, + "KL/rejected_KL_mean": -395.0791015625, + "KL/std": 180.24539184570312, + "epoch": 0.36270190895741555, + "fcm_dpo/beta": 0.0030250344425439835, + "fcm_dpo/delta": -0.14571964740753174, + "fcm_dpo/margin": 177.74020385742188, + "fcm_dpo/q_t": 0.377947062253952, + "grad_norm": 34.5230712890625, + "learning_rate": 4.0371303213004814e-07, + "logits/chosen": -0.35546165704727173, + "logits/rejected": -0.3532963991165161, + "logps/chosen": -273.94427490234375, + "logps/ref_chosen": -56.6053466796875, + "logps/ref_rejected": -106.29326629638672, + "logps/rejected": -501.37237548828125, + "loss": 1.0034, + "margin_dpo/margin_mean": 177.7401885986328, + "margin_dpo/margin_std": 199.57327270507812, + "step": 247 + }, + { + "KL/chosen_KL_mean": -187.72491455078125, + "KL/mean": -263.7269287109375, + "KL/rejected_KL_mean": -339.7288818359375, + "KL/std": 144.5110321044922, + "epoch": 0.3641703377386197, + "fcm_dpo/beta": 0.00296983914449811, + "fcm_dpo/delta": -0.05443059653043747, + "fcm_dpo/margin": 152.0039825439453, + "fcm_dpo/q_t": 0.3929385244846344, + "grad_norm": 30.407548904418945, + "learning_rate": 4.0269892260067197e-07, + "logits/chosen": -0.3517131209373474, + "logits/rejected": -0.37110453844070435, + "logps/chosen": -231.76812744140625, + "logps/ref_chosen": -44.043216705322266, + "logps/ref_rejected": -91.85687255859375, + "logps/rejected": -431.5857849121094, + "loss": 1.0265, + "margin_dpo/margin_mean": 152.00399780273438, + "margin_dpo/margin_std": 138.8626708984375, + "step": 248 + }, + { + "KL/chosen_KL_mean": -232.51548767089844, + "KL/mean": -272.2945556640625, + "KL/rejected_KL_mean": -312.07366943359375, + "KL/std": 151.4901580810547, + "epoch": 0.3656387665198238, + "fcm_dpo/beta": 0.0030452050268650055, + "fcm_dpo/delta": 0.1617167890071869, + "fcm_dpo/margin": 79.55818176269531, + "fcm_dpo/q_t": 0.4446827173233032, + "grad_norm": 51.20861053466797, + "learning_rate": 4.0168078931267426e-07, + "logits/chosen": -0.3823295533657074, + "logits/rejected": -0.360689252614975, + "logps/chosen": -294.95782470703125, + "logps/ref_chosen": -62.442352294921875, + "logps/ref_rejected": -80.46806335449219, + "logps/rejected": -392.5417175292969, + "loss": 1.2284, + "margin_dpo/margin_mean": 79.55818176269531, + "margin_dpo/margin_std": 176.84884643554688, + "step": 249 + }, + { + "KL/chosen_KL_mean": -208.30474853515625, + "KL/mean": -283.3992919921875, + "KL/rejected_KL_mean": -358.49383544921875, + "KL/std": 153.48965454101562, + "epoch": 0.3671071953010279, + "fcm_dpo/beta": 0.0030563112813979387, + "fcm_dpo/delta": -0.061967238783836365, + "fcm_dpo/margin": 150.18910217285156, + "fcm_dpo/q_t": 0.39273035526275635, + "grad_norm": 63.86355972290039, + "learning_rate": 4.006586590948141e-07, + "logits/chosen": -0.3651628792285919, + "logits/rejected": -0.3122418522834778, + "logps/chosen": -273.9414367675781, + "logps/ref_chosen": -65.63668823242188, + "logps/ref_rejected": -73.87184143066406, + "logps/rejected": -432.36566162109375, + "loss": 1.0317, + "margin_dpo/margin_mean": 150.18910217285156, + "margin_dpo/margin_std": 152.9610137939453, + "step": 250 + }, + { + "KL/chosen_KL_mean": -218.24432373046875, + "KL/mean": -268.9655456542969, + "KL/rejected_KL_mean": -319.686767578125, + "KL/std": 161.1807861328125, + "epoch": 0.368575624082232, + "fcm_dpo/beta": 0.003067499492317438, + "fcm_dpo/delta": 0.09177864342927933, + "fcm_dpo/margin": 101.44242858886719, + "fcm_dpo/q_t": 0.42934930324554443, + "grad_norm": 46.124000549316406, + "learning_rate": 3.9963255888117325e-07, + "logits/chosen": -0.32652994990348816, + "logits/rejected": -0.29296159744262695, + "logps/chosen": -275.42706298828125, + "logps/ref_chosen": -57.182716369628906, + "logps/ref_rejected": -77.66343688964844, + "logps/rejected": -397.3501892089844, + "loss": 1.1706, + "margin_dpo/margin_mean": 101.44244384765625, + "margin_dpo/margin_std": 174.69888305664062, + "step": 251 + }, + { + "KL/chosen_KL_mean": -209.73199462890625, + "KL/mean": -280.8515625, + "KL/rejected_KL_mean": -351.9710693359375, + "KL/std": 143.38165283203125, + "epoch": 0.3700440528634361, + "fcm_dpo/beta": 0.0030634840950369835, + "fcm_dpo/delta": -0.037671059370040894, + "fcm_dpo/margin": 142.23912048339844, + "fcm_dpo/q_t": 0.3959454894065857, + "grad_norm": 26.554500579833984, + "learning_rate": 3.9860251571044666e-07, + "logits/chosen": -0.3959979712963104, + "logits/rejected": -0.3560243248939514, + "logps/chosen": -281.4176330566406, + "logps/ref_chosen": -71.68563842773438, + "logps/ref_rejected": -84.75799560546875, + "logps/rejected": -436.72906494140625, + "loss": 1.0445, + "margin_dpo/margin_mean": 142.23912048339844, + "margin_dpo/margin_std": 143.8651580810547, + "step": 252 + }, + { + "KL/chosen_KL_mean": -180.37916564941406, + "KL/mean": -247.28616333007812, + "KL/rejected_KL_mean": -314.19317626953125, + "KL/std": 149.814208984375, + "epoch": 0.37151248164464024, + "fcm_dpo/beta": 0.003077391069382429, + "fcm_dpo/delta": -0.012822866439819336, + "fcm_dpo/margin": 133.81402587890625, + "fcm_dpo/q_t": 0.40381836891174316, + "grad_norm": 22.02428436279297, + "learning_rate": 3.9756855672522986e-07, + "logits/chosen": -0.4026602804660797, + "logits/rejected": -0.3960729241371155, + "logps/chosen": -249.51309204101562, + "logps/ref_chosen": -69.1339340209961, + "logps/ref_rejected": -98.70252990722656, + "logps/rejected": -412.8957214355469, + "loss": 1.0801, + "margin_dpo/margin_mean": 133.81402587890625, + "margin_dpo/margin_std": 165.3370819091797, + "step": 253 + }, + { + "KL/chosen_KL_mean": -173.2677001953125, + "KL/mean": -232.09576416015625, + "KL/rejected_KL_mean": -290.923828125, + "KL/std": 157.92564392089844, + "epoch": 0.37298091042584436, + "fcm_dpo/beta": 0.0030737267807126045, + "fcm_dpo/delta": 0.03981554135680199, + "fcm_dpo/margin": 117.6561279296875, + "fcm_dpo/q_t": 0.42006951570510864, + "grad_norm": 20.989831924438477, + "learning_rate": 3.965307091713037e-07, + "logits/chosen": -0.37722885608673096, + "logits/rejected": -0.364244282245636, + "logps/chosen": -227.42269897460938, + "logps/ref_chosen": -54.154998779296875, + "logps/ref_rejected": -90.30764770507812, + "logps/rejected": -381.2314758300781, + "loss": 1.1402, + "margin_dpo/margin_mean": 117.6561279296875, + "margin_dpo/margin_std": 197.60504150390625, + "step": 254 + }, + { + "KL/chosen_KL_mean": -170.31912231445312, + "KL/mean": -232.83419799804688, + "KL/rejected_KL_mean": -295.3492736816406, + "KL/std": 138.8697967529297, + "epoch": 0.3744493392070485, + "fcm_dpo/beta": 0.0030783750116825104, + "fcm_dpo/delta": 0.015461381524801254, + "fcm_dpo/margin": 125.03014373779297, + "fcm_dpo/q_t": 0.4109645187854767, + "grad_norm": 21.194082260131836, + "learning_rate": 3.954890003969163e-07, + "logits/chosen": -0.3547831177711487, + "logits/rejected": -0.3463220000267029, + "logps/chosen": -227.46080017089844, + "logps/ref_chosen": -57.14167022705078, + "logps/ref_rejected": -90.2085952758789, + "logps/rejected": -385.557861328125, + "loss": 1.107, + "margin_dpo/margin_mean": 125.03013610839844, + "margin_dpo/margin_std": 173.13796997070312, + "step": 255 + }, + { + "KL/chosen_KL_mean": -147.08546447753906, + "KL/mean": -213.44046020507812, + "KL/rejected_KL_mean": -279.79547119140625, + "KL/std": 147.87371826171875, + "epoch": 0.37591776798825255, + "fcm_dpo/beta": 0.0030894456431269646, + "fcm_dpo/delta": -0.010427280329167843, + "fcm_dpo/margin": 132.71002197265625, + "fcm_dpo/q_t": 0.40535274147987366, + "grad_norm": 39.32383346557617, + "learning_rate": 3.944434578520628e-07, + "logits/chosen": -0.3375306725502014, + "logits/rejected": -0.3469845950603485, + "logps/chosen": -202.24896240234375, + "logps/ref_chosen": -55.163490295410156, + "logps/ref_rejected": -92.56291961669922, + "logps/rejected": -372.3583984375, + "loss": 1.0822, + "margin_dpo/margin_mean": 132.71002197265625, + "margin_dpo/margin_std": 170.27206420898438, + "step": 256 + }, + { + "KL/chosen_KL_mean": -140.64047241210938, + "KL/mean": -211.18006896972656, + "KL/rejected_KL_mean": -281.71966552734375, + "KL/std": 152.0925750732422, + "epoch": 0.37738619676945667, + "fcm_dpo/beta": 0.00305275060236454, + "fcm_dpo/delta": -0.03301442041993141, + "fcm_dpo/margin": 141.07920837402344, + "fcm_dpo/q_t": 0.40170031785964966, + "grad_norm": 23.27699089050293, + "learning_rate": 3.933941090877615e-07, + "logits/chosen": -0.3658456802368164, + "logits/rejected": -0.3539636731147766, + "logps/chosen": -190.0641632080078, + "logps/ref_chosen": -49.42369842529297, + "logps/ref_rejected": -79.53791809082031, + "logps/rejected": -361.257568359375, + "loss": 1.0705, + "margin_dpo/margin_mean": 141.07920837402344, + "margin_dpo/margin_std": 172.331787109375, + "step": 257 + }, + { + "KL/chosen_KL_mean": -186.5501708984375, + "KL/mean": -254.5238037109375, + "KL/rejected_KL_mean": -322.4974365234375, + "KL/std": 160.353515625, + "epoch": 0.3788546255506608, + "fcm_dpo/beta": 0.003063221462070942, + "fcm_dpo/delta": -0.017175834625959396, + "fcm_dpo/margin": 135.947265625, + "fcm_dpo/q_t": 0.403425931930542, + "grad_norm": 33.59754943847656, + "learning_rate": 3.923409817553284e-07, + "logits/chosen": -0.33080989122390747, + "logits/rejected": -0.3309909701347351, + "logps/chosen": -245.93429565429688, + "logps/ref_chosen": -59.384124755859375, + "logps/ref_rejected": -95.99010467529297, + "logps/rejected": -418.487548828125, + "loss": 1.0923, + "margin_dpo/margin_mean": 135.947265625, + "margin_dpo/margin_std": 190.87680053710938, + "step": 258 + }, + { + "KL/chosen_KL_mean": -180.4862060546875, + "KL/mean": -238.32546997070312, + "KL/rejected_KL_mean": -296.16473388671875, + "KL/std": 154.69522094726562, + "epoch": 0.3803230543318649, + "fcm_dpo/beta": 0.003068537451326847, + "fcm_dpo/delta": 0.046667762100696564, + "fcm_dpo/margin": 115.67851257324219, + "fcm_dpo/q_t": 0.4190807044506073, + "grad_norm": 27.576894760131836, + "learning_rate": 3.9128410360564793e-07, + "logits/chosen": -0.3981941342353821, + "logits/rejected": -0.39807045459747314, + "logps/chosen": -233.31454467773438, + "logps/ref_chosen": -52.828346252441406, + "logps/ref_rejected": -89.191650390625, + "logps/rejected": -385.35638427734375, + "loss": 1.1328, + "margin_dpo/margin_mean": 115.67851257324219, + "margin_dpo/margin_std": 177.17535400390625, + "step": 259 + }, + { + "KL/chosen_KL_mean": -187.39376831054688, + "KL/mean": -266.05322265625, + "KL/rejected_KL_mean": -344.71270751953125, + "KL/std": 160.06930541992188, + "epoch": 0.38179148311306904, + "fcm_dpo/beta": 0.003057563677430153, + "fcm_dpo/delta": -0.08508844673633575, + "fcm_dpo/margin": 157.31893920898438, + "fcm_dpo/q_t": 0.39034503698349, + "grad_norm": 31.068809509277344, + "learning_rate": 3.9022350248844246e-07, + "logits/chosen": -0.3578794002532959, + "logits/rejected": -0.37384307384490967, + "logps/chosen": -234.81146240234375, + "logps/ref_chosen": -47.41767501831055, + "logps/ref_rejected": -95.08978271484375, + "logps/rejected": -439.802490234375, + "loss": 1.0238, + "margin_dpo/margin_mean": 157.31893920898438, + "margin_dpo/margin_std": 170.58743286132812, + "step": 260 + }, + { + "KL/chosen_KL_mean": -198.41915893554688, + "KL/mean": -272.1341552734375, + "KL/rejected_KL_mean": -345.84918212890625, + "KL/std": 177.0203857421875, + "epoch": 0.3832599118942731, + "fcm_dpo/beta": 0.0030062044970691204, + "fcm_dpo/delta": -0.045364413410425186, + "fcm_dpo/margin": 147.43002319335938, + "fcm_dpo/q_t": 0.4001613259315491, + "grad_norm": 19.665233612060547, + "learning_rate": 3.891592063515376e-07, + "logits/chosen": -0.3024892210960388, + "logits/rejected": -0.303438663482666, + "logps/chosen": -251.45053100585938, + "logps/ref_chosen": -53.03137969970703, + "logps/ref_rejected": -88.51494598388672, + "logps/rejected": -434.3641357421875, + "loss": 1.0747, + "margin_dpo/margin_mean": 147.43002319335938, + "margin_dpo/margin_std": 202.10751342773438, + "step": 261 + }, + { + "KL/chosen_KL_mean": -231.7572021484375, + "KL/mean": -292.0511474609375, + "KL/rejected_KL_mean": -352.3450927734375, + "KL/std": 160.82723999023438, + "epoch": 0.38472834067547723, + "fcm_dpo/beta": 0.0030295196920633316, + "fcm_dpo/delta": 0.035723648965358734, + "fcm_dpo/margin": 120.58787536621094, + "fcm_dpo/q_t": 0.4150589108467102, + "grad_norm": 23.96550750732422, + "learning_rate": 3.880912432401264e-07, + "logits/chosen": -0.3180779814720154, + "logits/rejected": -0.29295575618743896, + "logps/chosen": -291.3773193359375, + "logps/ref_chosen": -59.620140075683594, + "logps/ref_rejected": -86.41853332519531, + "logps/rejected": -438.76361083984375, + "loss": 1.1084, + "margin_dpo/margin_mean": 120.58787536621094, + "margin_dpo/margin_std": 159.1489715576172, + "step": 262 + }, + { + "KL/chosen_KL_mean": -214.27926635742188, + "KL/mean": -300.5625, + "KL/rejected_KL_mean": -386.8457336425781, + "KL/std": 190.634033203125, + "epoch": 0.38619676945668135, + "fcm_dpo/beta": 0.002960496349260211, + "fcm_dpo/delta": -0.11779750883579254, + "fcm_dpo/margin": 172.56646728515625, + "fcm_dpo/q_t": 0.38409924507141113, + "grad_norm": 20.86168098449707, + "learning_rate": 3.870196412960302e-07, + "logits/chosen": -0.3342781960964203, + "logits/rejected": -0.31365495920181274, + "logps/chosen": -273.7001953125, + "logps/ref_chosen": -59.42094421386719, + "logps/ref_rejected": -96.85720825195312, + "logps/rejected": -483.70294189453125, + "loss": 1.0246, + "margin_dpo/margin_mean": 172.56646728515625, + "margin_dpo/margin_std": 205.58511352539062, + "step": 263 + }, + { + "KL/chosen_KL_mean": -228.12066650390625, + "KL/mean": -300.76123046875, + "KL/rejected_KL_mean": -373.40179443359375, + "KL/std": 175.52401733398438, + "epoch": 0.3876651982378855, + "fcm_dpo/beta": 0.0029227761551737785, + "fcm_dpo/delta": -0.026993874460458755, + "fcm_dpo/margin": 145.28106689453125, + "fcm_dpo/q_t": 0.4040907025337219, + "grad_norm": 22.25633430480957, + "learning_rate": 3.8594442875695665e-07, + "logits/chosen": -0.37826618552207947, + "logits/rejected": -0.3711628019809723, + "logps/chosen": -290.8427734375, + "logps/ref_chosen": -62.722084045410156, + "logps/ref_rejected": -93.85620880126953, + "logps/rejected": -467.25799560546875, + "loss": 1.0907, + "margin_dpo/margin_mean": 145.2810821533203, + "margin_dpo/margin_std": 201.77871704101562, + "step": 264 + }, + { + "KL/chosen_KL_mean": -242.83438110351562, + "KL/mean": -314.224365234375, + "KL/rejected_KL_mean": -385.6143798828125, + "KL/std": 194.88223266601562, + "epoch": 0.3891336270190896, + "fcm_dpo/beta": 0.0029316158033907413, + "fcm_dpo/delta": -0.019387083128094673, + "fcm_dpo/margin": 142.77999877929688, + "fcm_dpo/q_t": 0.40785303711891174, + "grad_norm": 26.028135299682617, + "learning_rate": 3.848656339557562e-07, + "logits/chosen": -0.3183874785900116, + "logits/rejected": -0.304283082485199, + "logps/chosen": -304.80584716796875, + "logps/ref_chosen": -61.971466064453125, + "logps/ref_rejected": -88.02059936523438, + "logps/rejected": -473.6349792480469, + "loss": 1.1159, + "margin_dpo/margin_mean": 142.77999877929688, + "margin_dpo/margin_std": 233.77301025390625, + "step": 265 + }, + { + "KL/chosen_KL_mean": -247.06314086914062, + "KL/mean": -303.61907958984375, + "KL/rejected_KL_mean": -360.17498779296875, + "KL/std": 165.74261474609375, + "epoch": 0.39060205580029367, + "fcm_dpo/beta": 0.0029581869021058083, + "fcm_dpo/delta": 0.06766145676374435, + "fcm_dpo/margin": 113.11186218261719, + "fcm_dpo/q_t": 0.42461222410202026, + "grad_norm": 42.105552673339844, + "learning_rate": 3.8378328531967507e-07, + "logits/chosen": -0.33068183064460754, + "logits/rejected": -0.2887161374092102, + "logps/chosen": -314.1628112792969, + "logps/ref_chosen": -67.09967041015625, + "logps/ref_rejected": -67.97122192382812, + "logps/rejected": -428.146240234375, + "loss": 1.1555, + "margin_dpo/margin_mean": 113.11186218261719, + "margin_dpo/margin_std": 193.41064453125, + "step": 266 + }, + { + "KL/chosen_KL_mean": -218.62939453125, + "KL/mean": -286.082763671875, + "KL/rejected_KL_mean": -353.53619384765625, + "KL/std": 175.55633544921875, + "epoch": 0.3920704845814978, + "fcm_dpo/beta": 0.002957455348223448, + "fcm_dpo/delta": 0.0007606670260429382, + "fcm_dpo/margin": 134.9067840576172, + "fcm_dpo/q_t": 0.4094040095806122, + "grad_norm": 30.943706512451172, + "learning_rate": 3.8269741136960646e-07, + "logits/chosen": -0.4011858105659485, + "logits/rejected": -0.37256526947021484, + "logps/chosen": -287.60015869140625, + "logps/ref_chosen": -68.97075653076172, + "logps/ref_rejected": -90.16844940185547, + "logps/rejected": -443.7046203613281, + "loss": 1.1093, + "margin_dpo/margin_mean": 134.90679931640625, + "margin_dpo/margin_std": 202.06175231933594, + "step": 267 + }, + { + "KL/chosen_KL_mean": -222.305419921875, + "KL/mean": -293.548095703125, + "KL/rejected_KL_mean": -364.79071044921875, + "KL/std": 170.23300170898438, + "epoch": 0.3935389133627019, + "fcm_dpo/beta": 0.0029503919649869204, + "fcm_dpo/delta": -0.02154139243066311, + "fcm_dpo/margin": 142.48526000976562, + "fcm_dpo/q_t": 0.40528371930122375, + "grad_norm": 23.621414184570312, + "learning_rate": 3.8160804071933894e-07, + "logits/chosen": -0.3952227234840393, + "logits/rejected": -0.40232187509536743, + "logps/chosen": -278.20574951171875, + "logps/ref_chosen": -55.90031051635742, + "logps/ref_rejected": -101.64763641357422, + "logps/rejected": -466.4383239746094, + "loss": 1.0936, + "margin_dpo/margin_mean": 142.48526000976562, + "margin_dpo/margin_std": 206.6456298828125, + "step": 268 + }, + { + "KL/chosen_KL_mean": -233.79080200195312, + "KL/mean": -320.32830810546875, + "KL/rejected_KL_mean": -406.86578369140625, + "KL/std": 176.70233154296875, + "epoch": 0.39500734214390604, + "fcm_dpo/beta": 0.002910827985033393, + "fcm_dpo/delta": -0.109227254986763, + "fcm_dpo/margin": 173.074951171875, + "fcm_dpo/q_t": 0.38658440113067627, + "grad_norm": 23.399341583251953, + "learning_rate": 3.8051520207480204e-07, + "logits/chosen": -0.4322871267795563, + "logits/rejected": -0.4134712517261505, + "logps/chosen": -303.83038330078125, + "logps/ref_chosen": -70.03955841064453, + "logps/ref_rejected": -107.34937286376953, + "logps/rejected": -514.2151489257812, + "loss": 1.0422, + "margin_dpo/margin_mean": 173.074951171875, + "margin_dpo/margin_std": 224.85488891601562, + "step": 269 + }, + { + "KL/chosen_KL_mean": -206.2056427001953, + "KL/mean": -268.89251708984375, + "KL/rejected_KL_mean": -331.57940673828125, + "KL/std": 156.54763793945312, + "epoch": 0.3964757709251101, + "fcm_dpo/beta": 0.002915448509156704, + "fcm_dpo/delta": 0.03554879128932953, + "fcm_dpo/margin": 125.37371826171875, + "fcm_dpo/q_t": 0.4160010814666748, + "grad_norm": 25.85466194152832, + "learning_rate": 3.794189242333106e-07, + "logits/chosen": -0.46122607588768005, + "logits/rejected": -0.45263946056365967, + "logps/chosen": -275.7391357421875, + "logps/ref_chosen": -69.53347778320312, + "logps/ref_rejected": -109.92864990234375, + "logps/rejected": -441.5080261230469, + "loss": 1.1288, + "margin_dpo/margin_mean": 125.37371826171875, + "margin_dpo/margin_std": 193.83901977539062, + "step": 270 + }, + { + "KL/chosen_KL_mean": -191.8485870361328, + "KL/mean": -269.27569580078125, + "KL/rejected_KL_mean": -346.70281982421875, + "KL/std": 154.6339111328125, + "epoch": 0.39794419970631423, + "fcm_dpo/beta": 0.0028847784269601107, + "fcm_dpo/delta": -0.049142319709062576, + "fcm_dpo/margin": 154.854248046875, + "fcm_dpo/q_t": 0.3972168564796448, + "grad_norm": 22.624128341674805, + "learning_rate": 3.7831923608280514e-07, + "logits/chosen": -0.4137924313545227, + "logits/rejected": -0.39371395111083984, + "logps/chosen": -248.6131591796875, + "logps/ref_chosen": -56.76456832885742, + "logps/ref_rejected": -92.51383972167969, + "logps/rejected": -439.2166748046875, + "loss": 1.0476, + "margin_dpo/margin_mean": 154.85426330566406, + "margin_dpo/margin_std": 175.0887908935547, + "step": 271 + }, + { + "KL/chosen_KL_mean": -180.83294677734375, + "KL/mean": -275.11517333984375, + "KL/rejected_KL_mean": -369.3974304199219, + "KL/std": 171.1865234375, + "epoch": 0.39941262848751835, + "fcm_dpo/beta": 0.002835802501067519, + "fcm_dpo/delta": -0.1421043574810028, + "fcm_dpo/margin": 188.56448364257812, + "fcm_dpo/q_t": 0.37538450956344604, + "grad_norm": 32.70468521118164, + "learning_rate": 3.772161666010912e-07, + "logits/chosen": -0.3418217897415161, + "logits/rejected": -0.350533664226532, + "logps/chosen": -230.33010864257812, + "logps/ref_chosen": -49.497154235839844, + "logps/ref_rejected": -105.54279327392578, + "logps/rejected": -474.94024658203125, + "loss": 0.9828, + "margin_dpo/margin_mean": 188.56448364257812, + "margin_dpo/margin_std": 178.37030029296875, + "step": 272 + }, + { + "KL/chosen_KL_mean": -211.40390014648438, + "KL/mean": -304.2496643066406, + "KL/rejected_KL_mean": -397.0954284667969, + "KL/std": 175.37908935546875, + "epoch": 0.4008810572687225, + "fcm_dpo/beta": 0.00274536176584661, + "fcm_dpo/delta": -0.11640150099992752, + "fcm_dpo/margin": 185.6915283203125, + "fcm_dpo/q_t": 0.3824120759963989, + "grad_norm": 21.104841232299805, + "learning_rate": 3.761097448550755e-07, + "logits/chosen": -0.3616599440574646, + "logits/rejected": -0.34069812297821045, + "logps/chosen": -274.3792724609375, + "logps/ref_chosen": -62.97539520263672, + "logps/ref_rejected": -92.49858093261719, + "logps/rejected": -489.593994140625, + "loss": 1.0078, + "margin_dpo/margin_mean": 185.6915283203125, + "margin_dpo/margin_std": 196.62474060058594, + "step": 273 + }, + { + "KL/chosen_KL_mean": -254.86184692382812, + "KL/mean": -323.7417907714844, + "KL/rejected_KL_mean": -392.6217346191406, + "KL/std": 157.4027099609375, + "epoch": 0.4023494860499266, + "fcm_dpo/beta": 0.0027546617202460766, + "fcm_dpo/delta": 0.021147366613149643, + "fcm_dpo/margin": 137.75990295410156, + "fcm_dpo/q_t": 0.4122427701950073, + "grad_norm": 26.784929275512695, + "learning_rate": 3.75e-07, + "logits/chosen": -0.2770734429359436, + "logits/rejected": -0.2582925260066986, + "logps/chosen": -310.5295715332031, + "logps/ref_chosen": -55.66770935058594, + "logps/ref_rejected": -77.33308410644531, + "logps/rejected": -469.954833984375, + "loss": 1.1023, + "margin_dpo/margin_mean": 137.7598876953125, + "margin_dpo/margin_std": 182.3780517578125, + "step": 274 + }, + { + "KL/chosen_KL_mean": -200.73196411132812, + "KL/mean": -275.09075927734375, + "KL/rejected_KL_mean": -349.4495849609375, + "KL/std": 161.7529296875, + "epoch": 0.40381791483113066, + "fcm_dpo/beta": 0.0027543343603610992, + "fcm_dpo/delta": -0.010161615908145905, + "fcm_dpo/margin": 148.71763610839844, + "fcm_dpo/q_t": 0.4048367142677307, + "grad_norm": 23.660945892333984, + "learning_rate": 3.738869612786737e-07, + "logits/chosen": -0.3343503177165985, + "logits/rejected": -0.3386019766330719, + "logps/chosen": -249.32666015625, + "logps/ref_chosen": -48.594703674316406, + "logps/ref_rejected": -93.30369567871094, + "logps/rejected": -442.7532958984375, + "loss": 1.0776, + "margin_dpo/margin_mean": 148.71762084960938, + "margin_dpo/margin_std": 181.3911590576172, + "step": 275 + }, + { + "KL/chosen_KL_mean": -220.0225372314453, + "KL/mean": -294.27294921875, + "KL/rejected_KL_mean": -368.52337646484375, + "KL/std": 166.26907348632812, + "epoch": 0.4052863436123348, + "fcm_dpo/beta": 0.002734929323196411, + "fcm_dpo/delta": -0.0065729208290576935, + "fcm_dpo/margin": 148.5008544921875, + "fcm_dpo/q_t": 0.40702730417251587, + "grad_norm": 28.4442195892334, + "learning_rate": 3.7277065802070204e-07, + "logits/chosen": -0.33677220344543457, + "logits/rejected": -0.31244635581970215, + "logps/chosen": -276.5999450683594, + "logps/ref_chosen": -56.57740783691406, + "logps/ref_rejected": -70.36566925048828, + "logps/rejected": -438.8890380859375, + "loss": 1.0889, + "margin_dpo/margin_mean": 148.5008544921875, + "margin_dpo/margin_std": 197.42532348632812, + "step": 276 + }, + { + "KL/chosen_KL_mean": -239.46275329589844, + "KL/mean": -317.6234130859375, + "KL/rejected_KL_mean": -395.7840576171875, + "KL/std": 170.51141357421875, + "epoch": 0.4067547723935389, + "fcm_dpo/beta": 0.0027346829883754253, + "fcm_dpo/delta": -0.028729649260640144, + "fcm_dpo/margin": 156.3212890625, + "fcm_dpo/q_t": 0.40158289670944214, + "grad_norm": 25.2097225189209, + "learning_rate": 3.71651119641714e-07, + "logits/chosen": -0.2805439829826355, + "logits/rejected": -0.2614123523235321, + "logps/chosen": -295.73431396484375, + "logps/ref_chosen": -56.27156066894531, + "logps/ref_rejected": -92.88127136230469, + "logps/rejected": -488.6653137207031, + "loss": 1.0752, + "margin_dpo/margin_mean": 156.3212890625, + "margin_dpo/margin_std": 201.70455932617188, + "step": 277 + }, + { + "KL/chosen_KL_mean": -215.6261749267578, + "KL/mean": -304.9232482910156, + "KL/rejected_KL_mean": -394.2203369140625, + "KL/std": 179.00375366210938, + "epoch": 0.40822320117474303, + "fcm_dpo/beta": 0.0026860979851335287, + "fcm_dpo/delta": -0.08402146399021149, + "fcm_dpo/margin": 178.59414672851562, + "fcm_dpo/q_t": 0.38982489705085754, + "grad_norm": 23.9796199798584, + "learning_rate": 3.705283756425872e-07, + "logits/chosen": -0.31917044520378113, + "logits/rejected": -0.32198548316955566, + "logps/chosen": -268.568115234375, + "logps/ref_chosen": -52.94194030761719, + "logps/ref_rejected": -91.25357818603516, + "logps/rejected": -485.4739074707031, + "loss": 1.0282, + "margin_dpo/margin_mean": 178.59414672851562, + "margin_dpo/margin_std": 198.23104858398438, + "step": 278 + }, + { + "KL/chosen_KL_mean": -253.12692260742188, + "KL/mean": -339.2762145996094, + "KL/rejected_KL_mean": -425.425537109375, + "KL/std": 185.4033966064453, + "epoch": 0.40969162995594716, + "fcm_dpo/beta": 0.0026397216133773327, + "fcm_dpo/delta": -0.058702681213617325, + "fcm_dpo/margin": 172.298583984375, + "fcm_dpo/q_t": 0.3971731960773468, + "grad_norm": 28.65115737915039, + "learning_rate": 3.6940245560867e-07, + "logits/chosen": -0.2409660816192627, + "logits/rejected": -0.23715651035308838, + "logps/chosen": -301.76824951171875, + "logps/ref_chosen": -48.641319274902344, + "logps/ref_rejected": -87.8514404296875, + "logps/rejected": -513.2769775390625, + "loss": 1.0662, + "margin_dpo/margin_mean": 172.298583984375, + "margin_dpo/margin_std": 225.11993408203125, + "step": 279 + }, + { + "KL/chosen_KL_mean": -251.72592163085938, + "KL/mean": -344.47216796875, + "KL/rejected_KL_mean": -437.21832275390625, + "KL/std": 179.44480895996094, + "epoch": 0.4111600587371512, + "fcm_dpo/beta": 0.0026130005717277527, + "fcm_dpo/delta": -0.08905528485774994, + "fcm_dpo/margin": 185.492431640625, + "fcm_dpo/q_t": 0.3867556154727936, + "grad_norm": 23.646604537963867, + "learning_rate": 3.6827338920900253e-07, + "logits/chosen": -0.29928305745124817, + "logits/rejected": -0.29868048429489136, + "logps/chosen": -310.5230407714844, + "logps/ref_chosen": -58.797122955322266, + "logps/ref_rejected": -98.61885070800781, + "logps/rejected": -535.837158203125, + "loss": 1.0195, + "margin_dpo/margin_mean": 185.492431640625, + "margin_dpo/margin_std": 194.14852905273438, + "step": 280 + }, + { + "KL/chosen_KL_mean": -230.0161895751953, + "KL/mean": -313.6643371582031, + "KL/rejected_KL_mean": -397.3125, + "KL/std": 175.47067260742188, + "epoch": 0.41262848751835535, + "fcm_dpo/beta": 0.0025973522569984198, + "fcm_dpo/delta": -0.036213867366313934, + "fcm_dpo/margin": 167.29629516601562, + "fcm_dpo/q_t": 0.3972865343093872, + "grad_norm": 23.116756439208984, + "learning_rate": 3.6714120619553435e-07, + "logits/chosen": -0.32348203659057617, + "logits/rejected": -0.2926068902015686, + "logps/chosen": -285.50469970703125, + "logps/ref_chosen": -55.488521575927734, + "logps/ref_rejected": -80.88258361816406, + "logps/rejected": -478.195068359375, + "loss": 1.0576, + "margin_dpo/margin_mean": 167.29629516601562, + "margin_dpo/margin_std": 190.1474609375, + "step": 281 + }, + { + "KL/chosen_KL_mean": -254.25106811523438, + "KL/mean": -319.09814453125, + "KL/rejected_KL_mean": -383.9451904296875, + "KL/std": 183.95884704589844, + "epoch": 0.41409691629955947, + "fcm_dpo/beta": 0.0026173896621912718, + "fcm_dpo/delta": 0.06215311959385872, + "fcm_dpo/margin": 129.69415283203125, + "fcm_dpo/q_t": 0.42490720748901367, + "grad_norm": 23.07098388671875, + "learning_rate": 3.660059364023408e-07, + "logits/chosen": -0.407487154006958, + "logits/rejected": -0.3855167031288147, + "logps/chosen": -327.32122802734375, + "logps/ref_chosen": -73.07014465332031, + "logps/ref_rejected": -95.35098266601562, + "logps/rejected": -479.29620361328125, + "loss": 1.1456, + "margin_dpo/margin_mean": 129.69415283203125, + "margin_dpo/margin_std": 215.59033203125, + "step": 282 + }, + { + "KL/chosen_KL_mean": -257.63934326171875, + "KL/mean": -357.9620361328125, + "KL/rejected_KL_mean": -458.2847595214844, + "KL/std": 216.70352172851562, + "epoch": 0.4155653450807636, + "fcm_dpo/beta": 0.002578072715550661, + "fcm_dpo/delta": -0.12350119650363922, + "fcm_dpo/margin": 200.64541625976562, + "fcm_dpo/q_t": 0.3798179626464844, + "grad_norm": 27.85664176940918, + "learning_rate": 3.6486760974483685e-07, + "logits/chosen": -0.3043696880340576, + "logits/rejected": -0.3012719452381134, + "logps/chosen": -319.53778076171875, + "logps/ref_chosen": -61.89844512939453, + "logps/ref_rejected": -96.98655700683594, + "logps/rejected": -555.2713012695312, + "loss": 1.0023, + "margin_dpo/margin_mean": 200.64540100097656, + "margin_dpo/margin_std": 206.13702392578125, + "step": 283 + }, + { + "KL/chosen_KL_mean": -241.11981201171875, + "KL/mean": -333.07305908203125, + "KL/rejected_KL_mean": -425.02630615234375, + "KL/std": 198.67276000976562, + "epoch": 0.4170337738619677, + "fcm_dpo/beta": 0.0025225451681762934, + "fcm_dpo/delta": -0.06718793511390686, + "fcm_dpo/margin": 183.906494140625, + "fcm_dpo/q_t": 0.39294561743736267, + "grad_norm": 21.307559967041016, + "learning_rate": 3.6372625621898863e-07, + "logits/chosen": -0.38930195569992065, + "logits/rejected": -0.37415850162506104, + "logps/chosen": -299.55535888671875, + "logps/ref_chosen": -58.4355354309082, + "logps/ref_rejected": -93.46926879882812, + "logps/rejected": -518.49560546875, + "loss": 1.0327, + "margin_dpo/margin_mean": 183.906494140625, + "margin_dpo/margin_std": 199.44464111328125, + "step": 284 + }, + { + "KL/chosen_KL_mean": -272.1553039550781, + "KL/mean": -350.767578125, + "KL/rejected_KL_mean": -429.3798522949219, + "KL/std": 169.2427978515625, + "epoch": 0.4185022026431718, + "fcm_dpo/beta": 0.0025313901714980602, + "fcm_dpo/delta": 0.001558154821395874, + "fcm_dpo/margin": 157.2245330810547, + "fcm_dpo/q_t": 0.4059276878833771, + "grad_norm": 25.553455352783203, + "learning_rate": 3.625819059005228e-07, + "logits/chosen": -0.3947150707244873, + "logits/rejected": -0.37686532735824585, + "logps/chosen": -338.38751220703125, + "logps/ref_chosen": -66.23219299316406, + "logps/ref_rejected": -99.1268310546875, + "logps/rejected": -528.5067138671875, + "loss": 1.0824, + "margin_dpo/margin_mean": 157.2245330810547, + "margin_dpo/margin_std": 185.91542053222656, + "step": 285 + }, + { + "KL/chosen_KL_mean": -268.74615478515625, + "KL/mean": -357.44683837890625, + "KL/rejected_KL_mean": -446.1475524902344, + "KL/std": 201.09698486328125, + "epoch": 0.4199706314243759, + "fcm_dpo/beta": 0.002504766918718815, + "fcm_dpo/delta": -0.04638573154807091, + "fcm_dpo/margin": 177.4013671875, + "fcm_dpo/q_t": 0.3975168466567993, + "grad_norm": 24.010190963745117, + "learning_rate": 3.614345889441346e-07, + "logits/chosen": -0.3724295198917389, + "logits/rejected": -0.3507525324821472, + "logps/chosen": -341.6971740722656, + "logps/ref_chosen": -72.95100402832031, + "logps/ref_rejected": -88.58845520019531, + "logps/rejected": -534.7359619140625, + "loss": 1.0627, + "margin_dpo/margin_mean": 177.40138244628906, + "margin_dpo/margin_std": 221.60133361816406, + "step": 286 + }, + { + "KL/chosen_KL_mean": -245.1240234375, + "KL/mean": -319.36212158203125, + "KL/rejected_KL_mean": -393.60015869140625, + "KL/std": 172.98507690429688, + "epoch": 0.42143906020558003, + "fcm_dpo/beta": 0.002508362988010049, + "fcm_dpo/delta": 0.028594862669706345, + "fcm_dpo/margin": 148.4761199951172, + "fcm_dpo/q_t": 0.41409826278686523, + "grad_norm": 21.714580535888672, + "learning_rate": 3.6028433558269275e-07, + "logits/chosen": -0.36127710342407227, + "logits/rejected": -0.33609965443611145, + "logps/chosen": -306.6651916503906, + "logps/ref_chosen": -61.54115295410156, + "logps/ref_rejected": -77.69607543945312, + "logps/rejected": -471.2962341308594, + "loss": 1.1047, + "margin_dpo/margin_mean": 148.47613525390625, + "margin_dpo/margin_std": 196.79652404785156, + "step": 287 + }, + { + "KL/chosen_KL_mean": -241.18698120117188, + "KL/mean": -333.81597900390625, + "KL/rejected_KL_mean": -426.4449462890625, + "KL/std": 176.66453552246094, + "epoch": 0.42290748898678415, + "fcm_dpo/beta": 0.0024691871367394924, + "fcm_dpo/delta": -0.06168384104967117, + "fcm_dpo/margin": 185.25796508789062, + "fcm_dpo/q_t": 0.39265191555023193, + "grad_norm": 21.64046287536621, + "learning_rate": 3.5913117612644327e-07, + "logits/chosen": -0.3853694200515747, + "logits/rejected": -0.36701393127441406, + "logps/chosen": -297.84820556640625, + "logps/ref_chosen": -56.661224365234375, + "logps/ref_rejected": -87.33570098876953, + "logps/rejected": -513.7806396484375, + "loss": 1.0344, + "margin_dpo/margin_mean": 185.25796508789062, + "margin_dpo/margin_std": 186.76431274414062, + "step": 288 + }, + { + "KL/chosen_KL_mean": -234.46319580078125, + "KL/mean": -339.17218017578125, + "KL/rejected_KL_mean": -443.88116455078125, + "KL/std": 201.45144653320312, + "epoch": 0.4243759177679883, + "fcm_dpo/beta": 0.0024433922953903675, + "fcm_dpo/delta": -0.11754532158374786, + "fcm_dpo/margin": 209.41796875, + "fcm_dpo/q_t": 0.38267892599105835, + "grad_norm": 24.331398010253906, + "learning_rate": 3.5797514096221024e-07, + "logits/chosen": -0.3093263506889343, + "logits/rejected": -0.30716824531555176, + "logps/chosen": -279.693603515625, + "logps/ref_chosen": -45.23039245605469, + "logps/ref_rejected": -87.64266967773438, + "logps/rejected": -531.5238037109375, + "loss": 1.0067, + "margin_dpo/margin_mean": 209.41796875, + "margin_dpo/margin_std": 220.97320556640625, + "step": 289 + }, + { + "KL/chosen_KL_mean": -259.2691650390625, + "KL/mean": -364.51544189453125, + "KL/rejected_KL_mean": -469.76171875, + "KL/std": 214.3240509033203, + "epoch": 0.42584434654919234, + "fcm_dpo/beta": 0.002378998324275017, + "fcm_dpo/delta": -0.10634815692901611, + "fcm_dpo/margin": 210.4925537109375, + "fcm_dpo/q_t": 0.3870581388473511, + "grad_norm": 21.887035369873047, + "learning_rate": 3.568162605525952e-07, + "logits/chosen": -0.31333810091018677, + "logits/rejected": -0.33406785130500793, + "logps/chosen": -314.74066162109375, + "logps/ref_chosen": -55.47149658203125, + "logps/ref_rejected": -116.70857238769531, + "logps/rejected": -586.4702758789062, + "loss": 1.0288, + "margin_dpo/margin_mean": 210.49253845214844, + "margin_dpo/margin_std": 254.7548828125, + "step": 290 + }, + { + "KL/chosen_KL_mean": -229.11093139648438, + "KL/mean": -324.2933654785156, + "KL/rejected_KL_mean": -419.47576904296875, + "KL/std": 183.89297485351562, + "epoch": 0.42731277533039647, + "fcm_dpo/beta": 0.002363018225878477, + "fcm_dpo/delta": -0.05237455666065216, + "fcm_dpo/margin": 190.36480712890625, + "fcm_dpo/q_t": 0.39525771141052246, + "grad_norm": 22.43724250793457, + "learning_rate": 3.5565456543517485e-07, + "logits/chosen": -0.32546311616897583, + "logits/rejected": -0.3105616271495819, + "logps/chosen": -292.3713073730469, + "logps/ref_chosen": -63.26036834716797, + "logps/ref_rejected": -89.29708862304688, + "logps/rejected": -508.7728576660156, + "loss": 1.0516, + "margin_dpo/margin_mean": 190.36480712890625, + "margin_dpo/margin_std": 219.41854858398438, + "step": 291 + }, + { + "KL/chosen_KL_mean": -263.8599853515625, + "KL/mean": -365.8248291015625, + "KL/rejected_KL_mean": -467.7896728515625, + "KL/std": 221.3306884765625, + "epoch": 0.4287812041116006, + "fcm_dpo/beta": 0.0023269178345799446, + "fcm_dpo/delta": -0.07821857929229736, + "fcm_dpo/margin": 203.92965698242188, + "fcm_dpo/q_t": 0.39132484793663025, + "grad_norm": 26.2148380279541, + "learning_rate": 3.5449008622169583e-07, + "logits/chosen": -0.3232491612434387, + "logits/rejected": -0.3077685534954071, + "logps/chosen": -317.7785339355469, + "logps/ref_chosen": -53.91852951049805, + "logps/ref_rejected": -89.96138000488281, + "logps/rejected": -557.7510375976562, + "loss": 1.0518, + "margin_dpo/margin_mean": 203.92965698242188, + "margin_dpo/margin_std": 259.8817443847656, + "step": 292 + }, + { + "KL/chosen_KL_mean": -269.84210205078125, + "KL/mean": -353.70697021484375, + "KL/rejected_KL_mean": -437.5718078613281, + "KL/std": 224.77517700195312, + "epoch": 0.4302496328928047, + "fcm_dpo/beta": 0.002317019272595644, + "fcm_dpo/delta": 0.011760619468986988, + "fcm_dpo/margin": 167.7296905517578, + "fcm_dpo/q_t": 0.41295474767684937, + "grad_norm": 38.31715774536133, + "learning_rate": 3.5332285359726846e-07, + "logits/chosen": -0.31585967540740967, + "logits/rejected": -0.3076015114784241, + "logps/chosen": -330.2181396484375, + "logps/ref_chosen": -60.376033782958984, + "logps/ref_rejected": -77.85244750976562, + "logps/rejected": -515.4242553710938, + "loss": 1.1161, + "margin_dpo/margin_mean": 167.72970581054688, + "margin_dpo/margin_std": 257.7914733886719, + "step": 293 + }, + { + "KL/chosen_KL_mean": -257.2511291503906, + "KL/mean": -347.72064208984375, + "KL/rejected_KL_mean": -438.19012451171875, + "KL/std": 204.02166748046875, + "epoch": 0.43171806167400884, + "fcm_dpo/beta": 0.002303325105458498, + "fcm_dpo/delta": -0.01764640584588051, + "fcm_dpo/margin": 180.93899536132812, + "fcm_dpo/q_t": 0.4063786268234253, + "grad_norm": 20.47630500793457, + "learning_rate": 3.5215289831955786e-07, + "logits/chosen": -0.3238428235054016, + "logits/rejected": -0.33083072304725647, + "logps/chosen": -305.3386535644531, + "logps/ref_chosen": -48.0875358581543, + "logps/ref_rejected": -81.89698791503906, + "logps/rejected": -520.0870971679688, + "loss": 1.0891, + "margin_dpo/margin_mean": 180.93899536132812, + "margin_dpo/margin_std": 251.64306640625, + "step": 294 + }, + { + "KL/chosen_KL_mean": -306.6954345703125, + "KL/mean": -408.22650146484375, + "KL/rejected_KL_mean": -509.75762939453125, + "KL/std": 231.20632934570312, + "epoch": 0.4331864904552129, + "fcm_dpo/beta": 0.002286091446876526, + "fcm_dpo/delta": -0.06729740649461746, + "fcm_dpo/margin": 203.06219482421875, + "fcm_dpo/q_t": 0.3953544497489929, + "grad_norm": 25.35572052001953, + "learning_rate": 3.509802512179737e-07, + "logits/chosen": -0.3378216624259949, + "logits/rejected": -0.34275323152542114, + "logps/chosen": -356.6201171875, + "logps/ref_chosen": -49.92467498779297, + "logps/ref_rejected": -87.45632934570312, + "logps/rejected": -597.2139282226562, + "loss": 1.0634, + "margin_dpo/margin_mean": 203.06219482421875, + "margin_dpo/margin_std": 268.56146240234375, + "step": 295 + }, + { + "KL/chosen_KL_mean": -361.69189453125, + "KL/mean": -433.52117919921875, + "KL/rejected_KL_mean": -505.3504943847656, + "KL/std": 217.83050537109375, + "epoch": 0.434654919236417, + "fcm_dpo/beta": 0.002259893110021949, + "fcm_dpo/delta": -0.03520293906331062, + "fcm_dpo/margin": 143.65863037109375, + "fcm_dpo/q_t": 0.4249199628829956, + "grad_norm": 37.518798828125, + "learning_rate": 3.498049431928577e-07, + "logits/chosen": -0.39878833293914795, + "logits/rejected": -0.3886667490005493, + "logps/chosen": -427.18310546875, + "logps/ref_chosen": -65.49124145507812, + "logps/ref_rejected": -93.08908081054688, + "logps/rejected": -598.4395751953125, + "loss": 1.177, + "margin_dpo/margin_mean": 143.65863037109375, + "margin_dpo/margin_std": 266.5452575683594, + "step": 296 + }, + { + "KL/chosen_KL_mean": -317.15142822265625, + "KL/mean": -396.7143859863281, + "KL/rejected_KL_mean": -476.27734375, + "KL/std": 205.87338256835938, + "epoch": 0.43612334801762115, + "fcm_dpo/beta": 0.002268793759867549, + "fcm_dpo/delta": 0.04043077677488327, + "fcm_dpo/margin": 159.12591552734375, + "fcm_dpo/q_t": 0.4162394404411316, + "grad_norm": 30.537891387939453, + "learning_rate": 3.486270052146694e-07, + "logits/chosen": -0.38363754749298096, + "logits/rejected": -0.38842642307281494, + "logps/chosen": -373.6283874511719, + "logps/ref_chosen": -56.476951599121094, + "logps/ref_rejected": -95.1385498046875, + "logps/rejected": -571.4158935546875, + "loss": 1.1114, + "margin_dpo/margin_mean": 159.12591552734375, + "margin_dpo/margin_std": 209.26858520507812, + "step": 297 + }, + { + "KL/chosen_KL_mean": -326.7189025878906, + "KL/mean": -432.0556640625, + "KL/rejected_KL_mean": -537.3925170898438, + "KL/std": 260.4841003417969, + "epoch": 0.43759177679882527, + "fcm_dpo/beta": 0.002258981578052044, + "fcm_dpo/delta": -0.07966307550668716, + "fcm_dpo/margin": 210.67361450195312, + "fcm_dpo/q_t": 0.39616093039512634, + "grad_norm": 25.527212142944336, + "learning_rate": 3.474464683231698e-07, + "logits/chosen": -0.3951565623283386, + "logits/rejected": -0.4159289598464966, + "logps/chosen": -394.0440673828125, + "logps/ref_chosen": -67.32516479492188, + "logps/ref_rejected": -116.66217041015625, + "logps/rejected": -654.0546875, + "loss": 1.0681, + "margin_dpo/margin_mean": 210.67361450195312, + "margin_dpo/margin_std": 302.4098815917969, + "step": 298 + }, + { + "KL/chosen_KL_mean": -267.58660888671875, + "KL/mean": -359.8266296386719, + "KL/rejected_KL_mean": -452.066650390625, + "KL/std": 204.48165893554688, + "epoch": 0.4390602055800294, + "fcm_dpo/beta": 0.0022493680007755756, + "fcm_dpo/delta": -0.016153991222381592, + "fcm_dpo/margin": 184.4800567626953, + "fcm_dpo/q_t": 0.40638279914855957, + "grad_norm": 37.44233322143555, + "learning_rate": 3.462633636266041e-07, + "logits/chosen": -0.37919020652770996, + "logits/rejected": -0.38419249653816223, + "logps/chosen": -316.5487060546875, + "logps/ref_chosen": -48.96209716796875, + "logps/ref_rejected": -84.32823944091797, + "logps/rejected": -536.3948974609375, + "loss": 1.0928, + "margin_dpo/margin_mean": 184.4800567626953, + "margin_dpo/margin_std": 256.15374755859375, + "step": 299 + }, + { + "KL/chosen_KL_mean": -322.3915710449219, + "KL/mean": -433.9006652832031, + "KL/rejected_KL_mean": -545.4097900390625, + "KL/std": 238.47645568847656, + "epoch": 0.44052863436123346, + "fcm_dpo/beta": 0.0022087290417402983, + "fcm_dpo/delta": -0.0972696915268898, + "fcm_dpo/margin": 223.01821899414062, + "fcm_dpo/q_t": 0.38775455951690674, + "grad_norm": 33.15673828125, + "learning_rate": 3.4507772230088147e-07, + "logits/chosen": -0.39440402388572693, + "logits/rejected": -0.4011026620864868, + "logps/chosen": -381.46527099609375, + "logps/ref_chosen": -59.07371139526367, + "logps/ref_rejected": -95.9664535522461, + "logps/rejected": -641.376220703125, + "loss": 1.0439, + "margin_dpo/margin_mean": 223.01821899414062, + "margin_dpo/margin_std": 281.2703857421875, + "step": 300 + }, + { + "KL/chosen_KL_mean": -275.7957763671875, + "KL/mean": -380.38037109375, + "KL/rejected_KL_mean": -484.9649963378906, + "KL/std": 218.0830078125, + "epoch": 0.4419970631424376, + "fcm_dpo/beta": 0.002167191356420517, + "fcm_dpo/delta": -0.05619215965270996, + "fcm_dpo/margin": 209.16920471191406, + "fcm_dpo/q_t": 0.39748483896255493, + "grad_norm": 23.70587921142578, + "learning_rate": 3.4388957558875316e-07, + "logits/chosen": -0.40770500898361206, + "logits/rejected": -0.40985846519470215, + "logps/chosen": -333.045166015625, + "logps/ref_chosen": -57.249366760253906, + "logps/ref_rejected": -92.35354614257812, + "logps/rejected": -577.3185424804688, + "loss": 1.0547, + "margin_dpo/margin_mean": 209.16921997070312, + "margin_dpo/margin_std": 258.6616516113281, + "step": 301 + }, + { + "KL/chosen_KL_mean": -233.06362915039062, + "KL/mean": -322.35797119140625, + "KL/rejected_KL_mean": -411.6523132324219, + "KL/std": 194.712158203125, + "epoch": 0.4434654919236417, + "fcm_dpo/beta": 0.002163384575396776, + "fcm_dpo/delta": 0.013927444815635681, + "fcm_dpo/margin": 178.58871459960938, + "fcm_dpo/q_t": 0.4111108183860779, + "grad_norm": 27.502851486206055, + "learning_rate": 3.426989547989902e-07, + "logits/chosen": -0.3880677819252014, + "logits/rejected": -0.3943794369697571, + "logps/chosen": -284.2615966796875, + "logps/ref_chosen": -51.197994232177734, + "logps/ref_rejected": -97.22636413574219, + "logps/rejected": -508.878662109375, + "loss": 1.0956, + "margin_dpo/margin_mean": 178.58871459960938, + "margin_dpo/margin_std": 229.92686462402344, + "step": 302 + }, + { + "KL/chosen_KL_mean": -241.3304443359375, + "KL/mean": -321.529541015625, + "KL/rejected_KL_mean": -401.7286376953125, + "KL/std": 198.59133911132812, + "epoch": 0.44493392070484583, + "fcm_dpo/beta": 0.002184551674872637, + "fcm_dpo/delta": 0.05139891058206558, + "fcm_dpo/margin": 160.39822387695312, + "fcm_dpo/q_t": 0.4194805920124054, + "grad_norm": 26.236801147460938, + "learning_rate": 3.4150589130555773e-07, + "logits/chosen": -0.41135263442993164, + "logits/rejected": -0.3971450924873352, + "logps/chosen": -308.04437255859375, + "logps/ref_chosen": -66.71394348144531, + "logps/ref_rejected": -86.94542694091797, + "logps/rejected": -488.674072265625, + "loss": 1.1326, + "margin_dpo/margin_mean": 160.39822387695312, + "margin_dpo/margin_std": 240.5888671875, + "step": 303 + }, + { + "KL/chosen_KL_mean": -217.92318725585938, + "KL/mean": -308.0065002441406, + "KL/rejected_KL_mean": -398.0898132324219, + "KL/std": 184.00167846679688, + "epoch": 0.44640234948604995, + "fcm_dpo/beta": 0.002205474767833948, + "fcm_dpo/delta": 0.0024266578257083893, + "fcm_dpo/margin": 180.16664123535156, + "fcm_dpo/q_t": 0.40509599447250366, + "grad_norm": 33.5152473449707, + "learning_rate": 3.403104165467883e-07, + "logits/chosen": -0.42003294825553894, + "logits/rejected": -0.4105939567089081, + "logps/chosen": -289.8739013671875, + "logps/ref_chosen": -71.95069885253906, + "logps/ref_rejected": -90.47203063964844, + "logps/rejected": -488.56182861328125, + "loss": 1.0614, + "margin_dpo/margin_mean": 180.16664123535156, + "margin_dpo/margin_std": 164.29513549804688, + "step": 304 + }, + { + "KL/chosen_KL_mean": -242.13421630859375, + "KL/mean": -321.9437561035156, + "KL/rejected_KL_mean": -401.7532653808594, + "KL/std": 213.17129516601562, + "epoch": 0.447870778267254, + "fcm_dpo/beta": 0.0022017783485352993, + "fcm_dpo/delta": 0.05001223459839821, + "fcm_dpo/margin": 159.6190643310547, + "fcm_dpo/q_t": 0.42033082246780396, + "grad_norm": 20.60869026184082, + "learning_rate": 3.391125620245535e-07, + "logits/chosen": -0.4197441339492798, + "logits/rejected": -0.4035117030143738, + "logps/chosen": -308.929443359375, + "logps/ref_chosen": -66.79523468017578, + "logps/ref_rejected": -92.75459289550781, + "logps/rejected": -494.50787353515625, + "loss": 1.1316, + "margin_dpo/margin_mean": 159.6190643310547, + "margin_dpo/margin_std": 237.54547119140625, + "step": 305 + }, + { + "KL/chosen_KL_mean": -238.73196411132812, + "KL/mean": -318.37652587890625, + "KL/rejected_KL_mean": -398.0210876464844, + "KL/std": 185.3416748046875, + "epoch": 0.44933920704845814, + "fcm_dpo/beta": 0.0022371697705239058, + "fcm_dpo/delta": 0.045154958963394165, + "fcm_dpo/margin": 159.28915405273438, + "fcm_dpo/q_t": 0.41680416464805603, + "grad_norm": 25.355138778686523, + "learning_rate": 3.3791235930343417e-07, + "logits/chosen": -0.3622322082519531, + "logits/rejected": -0.33640217781066895, + "logps/chosen": -308.4158630371094, + "logps/ref_chosen": -69.68389892578125, + "logps/ref_rejected": -85.15919494628906, + "logps/rejected": -483.1802978515625, + "loss": 1.1079, + "margin_dpo/margin_mean": 159.2891387939453, + "margin_dpo/margin_std": 199.42770385742188, + "step": 306 + }, + { + "KL/chosen_KL_mean": -218.85501098632812, + "KL/mean": -301.641357421875, + "KL/rejected_KL_mean": -384.42767333984375, + "KL/std": 174.05111694335938, + "epoch": 0.45080763582966227, + "fcm_dpo/beta": 0.00224516075104475, + "fcm_dpo/delta": 0.029343584552407265, + "fcm_dpo/margin": 165.57266235351562, + "fcm_dpo/q_t": 0.41345101594924927, + "grad_norm": 22.831409454345703, + "learning_rate": 3.367098400098881e-07, + "logits/chosen": -0.3699556589126587, + "logits/rejected": -0.35030895471572876, + "logps/chosen": -289.02044677734375, + "logps/ref_chosen": -70.16542053222656, + "logps/ref_rejected": -86.97230529785156, + "logps/rejected": -471.39996337890625, + "loss": 1.1025, + "margin_dpo/margin_mean": 165.57266235351562, + "margin_dpo/margin_std": 212.66111755371094, + "step": 307 + }, + { + "KL/chosen_KL_mean": -228.22244262695312, + "KL/mean": -321.84100341796875, + "KL/rejected_KL_mean": -415.4595947265625, + "KL/std": 197.64083862304688, + "epoch": 0.4522760646108664, + "fcm_dpo/beta": 0.002246259246021509, + "fcm_dpo/delta": -0.021481268107891083, + "fcm_dpo/margin": 187.23712158203125, + "fcm_dpo/q_t": 0.40048325061798096, + "grad_norm": 28.529888153076172, + "learning_rate": 3.355050358314172e-07, + "logits/chosen": -0.3800508975982666, + "logits/rejected": -0.36786890029907227, + "logps/chosen": -283.4674377441406, + "logps/ref_chosen": -55.2449951171875, + "logps/ref_rejected": -79.37226104736328, + "logps/rejected": -494.83184814453125, + "loss": 1.0533, + "margin_dpo/margin_mean": 187.2371368408203, + "margin_dpo/margin_std": 190.32083129882812, + "step": 308 + }, + { + "KL/chosen_KL_mean": -236.49908447265625, + "KL/mean": -329.51934814453125, + "KL/rejected_KL_mean": -422.53961181640625, + "KL/std": 211.5294189453125, + "epoch": 0.45374449339207046, + "fcm_dpo/beta": 0.0022473763674497604, + "fcm_dpo/delta": -0.019291866570711136, + "fcm_dpo/margin": 186.04052734375, + "fcm_dpo/q_t": 0.40186938643455505, + "grad_norm": 31.124483108520508, + "learning_rate": 3.3429797851573183e-07, + "logits/chosen": -0.33029186725616455, + "logits/rejected": -0.32252657413482666, + "logps/chosen": -285.45819091796875, + "logps/ref_chosen": -48.959083557128906, + "logps/ref_rejected": -82.34072875976562, + "logps/rejected": -504.88031005859375, + "loss": 1.0741, + "margin_dpo/margin_mean": 186.04052734375, + "margin_dpo/margin_std": 224.29818725585938, + "step": 309 + }, + { + "KL/chosen_KL_mean": -281.655029296875, + "KL/mean": -370.4913330078125, + "KL/rejected_KL_mean": -459.32763671875, + "KL/std": 190.68673706054688, + "epoch": 0.4552129221732746, + "fcm_dpo/beta": 0.002242402173578739, + "fcm_dpo/delta": 0.001399170607328415, + "fcm_dpo/margin": 177.672607421875, + "fcm_dpo/q_t": 0.40604251623153687, + "grad_norm": 29.155431747436523, + "learning_rate": 3.3308869986991487e-07, + "logits/chosen": -0.3663063645362854, + "logits/rejected": -0.35591787099838257, + "logps/chosen": -344.3968200683594, + "logps/ref_chosen": -62.74177932739258, + "logps/ref_rejected": -79.9302978515625, + "logps/rejected": -539.2579345703125, + "loss": 1.0711, + "margin_dpo/margin_mean": 177.672607421875, + "margin_dpo/margin_std": 188.24441528320312, + "step": 310 + }, + { + "KL/chosen_KL_mean": -303.1356201171875, + "KL/mean": -403.75494384765625, + "KL/rejected_KL_mean": -504.37420654296875, + "KL/std": 248.3344268798828, + "epoch": 0.4566813509544787, + "fcm_dpo/beta": 0.0022182685788720846, + "fcm_dpo/delta": -0.04857812821865082, + "fcm_dpo/margin": 201.23855590820312, + "fcm_dpo/q_t": 0.3989013433456421, + "grad_norm": 23.382389068603516, + "learning_rate": 3.3187723175958346e-07, + "logits/chosen": -0.3620428442955017, + "logits/rejected": -0.3392923176288605, + "logps/chosen": -356.16363525390625, + "logps/ref_chosen": -53.02798080444336, + "logps/ref_rejected": -77.43820190429688, + "logps/rejected": -581.8123779296875, + "loss": 1.0653, + "margin_dpo/margin_mean": 201.23855590820312, + "margin_dpo/margin_std": 260.4375915527344, + "step": 311 + }, + { + "KL/chosen_KL_mean": -292.04132080078125, + "KL/mean": -381.0889892578125, + "KL/rejected_KL_mean": -470.13665771484375, + "KL/std": 213.00611877441406, + "epoch": 0.4581497797356828, + "fcm_dpo/beta": 0.002222396433353424, + "fcm_dpo/delta": 0.004101406782865524, + "fcm_dpo/margin": 178.0953826904297, + "fcm_dpo/q_t": 0.41095060110092163, + "grad_norm": 34.67451095581055, + "learning_rate": 3.306636061080487e-07, + "logits/chosen": -0.3244793117046356, + "logits/rejected": -0.3159826993942261, + "logps/chosen": -341.43353271484375, + "logps/ref_chosen": -49.39221954345703, + "logps/ref_rejected": -75.79280853271484, + "logps/rejected": -545.929443359375, + "loss": 1.102, + "margin_dpo/margin_mean": 178.09536743164062, + "margin_dpo/margin_std": 254.87359619140625, + "step": 312 + }, + { + "KL/chosen_KL_mean": -271.20880126953125, + "KL/mean": -367.263671875, + "KL/rejected_KL_mean": -463.318603515625, + "KL/std": 229.5218048095703, + "epoch": 0.45961820851688695, + "fcm_dpo/beta": 0.002221351722255349, + "fcm_dpo/delta": -0.02877044305205345, + "fcm_dpo/margin": 192.10980224609375, + "fcm_dpo/q_t": 0.40212100744247437, + "grad_norm": 26.70942497253418, + "learning_rate": 3.2944785489547537e-07, + "logits/chosen": -0.3600277602672577, + "logits/rejected": -0.3582299053668976, + "logps/chosen": -321.3615417480469, + "logps/ref_chosen": -50.152740478515625, + "logps/ref_rejected": -86.40620422363281, + "logps/rejected": -549.7247924804688, + "loss": 1.0837, + "margin_dpo/margin_mean": 192.10980224609375, + "margin_dpo/margin_std": 255.63043212890625, + "step": 313 + }, + { + "KL/chosen_KL_mean": -286.284423828125, + "KL/mean": -373.939453125, + "KL/rejected_KL_mean": -461.594482421875, + "KL/std": 214.23907470703125, + "epoch": 0.461086637298091, + "fcm_dpo/beta": 0.002198255155235529, + "fcm_dpo/delta": 0.014935776591300964, + "fcm_dpo/margin": 175.31007385253906, + "fcm_dpo/q_t": 0.4136376976966858, + "grad_norm": 24.576322555541992, + "learning_rate": 3.2823001015803857e-07, + "logits/chosen": -0.4146174192428589, + "logits/rejected": -0.41618144512176514, + "logps/chosen": -343.5220031738281, + "logps/ref_chosen": -57.237579345703125, + "logps/ref_rejected": -97.5965347290039, + "logps/rejected": -559.1910400390625, + "loss": 1.1215, + "margin_dpo/margin_mean": 175.31007385253906, + "margin_dpo/margin_std": 272.0423889160156, + "step": 314 + }, + { + "KL/chosen_KL_mean": -258.12774658203125, + "KL/mean": -340.7080078125, + "KL/rejected_KL_mean": -423.2882385253906, + "KL/std": 191.50604248046875, + "epoch": 0.46255506607929514, + "fcm_dpo/beta": 0.0022171130403876305, + "fcm_dpo/delta": 0.0351128987967968, + "fcm_dpo/margin": 165.16049194335938, + "fcm_dpo/q_t": 0.415382444858551, + "grad_norm": 20.407081604003906, + "learning_rate": 3.270101039870797e-07, + "logits/chosen": -0.31862980127334595, + "logits/rejected": -0.32194432616233826, + "logps/chosen": -307.19732666015625, + "logps/ref_chosen": -49.06958770751953, + "logps/ref_rejected": -85.68087768554688, + "logps/rejected": -508.9691162109375, + "loss": 1.1062, + "margin_dpo/margin_mean": 165.16049194335938, + "margin_dpo/margin_std": 214.27072143554688, + "step": 315 + }, + { + "KL/chosen_KL_mean": -253.05474853515625, + "KL/mean": -367.26123046875, + "KL/rejected_KL_mean": -481.4677429199219, + "KL/std": 226.0899658203125, + "epoch": 0.46402349486049926, + "fcm_dpo/beta": 0.0021798848174512386, + "fcm_dpo/delta": -0.10400072485208511, + "fcm_dpo/margin": 228.41297912597656, + "fcm_dpo/q_t": 0.3863717317581177, + "grad_norm": 26.12361717224121, + "learning_rate": 3.2578816852826086e-07, + "logits/chosen": -0.3711768090724945, + "logits/rejected": -0.37630313634872437, + "logps/chosen": -307.31549072265625, + "logps/ref_chosen": -54.26074981689453, + "logps/ref_rejected": -101.2814712524414, + "logps/rejected": -582.7492065429688, + "loss": 1.0163, + "margin_dpo/margin_mean": 228.41297912597656, + "margin_dpo/margin_std": 247.7483367919922, + "step": 316 + }, + { + "KL/chosen_KL_mean": -257.2327880859375, + "KL/mean": -380.733642578125, + "KL/rejected_KL_mean": -504.2344970703125, + "KL/std": 204.96792602539062, + "epoch": 0.4654919236417034, + "fcm_dpo/beta": 0.0021434309892356396, + "fcm_dpo/delta": -0.13644810020923615, + "fcm_dpo/margin": 247.001708984375, + "fcm_dpo/q_t": 0.3767782747745514, + "grad_norm": 26.812149047851562, + "learning_rate": 3.2456423598071783e-07, + "logits/chosen": -0.40798693895339966, + "logits/rejected": -0.4007112681865692, + "logps/chosen": -313.3270263671875, + "logps/ref_chosen": -56.094207763671875, + "logps/ref_rejected": -100.69905090332031, + "logps/rejected": -604.93359375, + "loss": 0.9842, + "margin_dpo/margin_mean": 247.001708984375, + "margin_dpo/margin_std": 228.92449951171875, + "step": 317 + }, + { + "KL/chosen_KL_mean": -277.570068359375, + "KL/mean": -369.9479675292969, + "KL/rejected_KL_mean": -462.3258361816406, + "KL/std": 211.07015991210938, + "epoch": 0.4669603524229075, + "fcm_dpo/beta": 0.0021276050247251987, + "fcm_dpo/delta": 0.0070451050996780396, + "fcm_dpo/margin": 184.7557373046875, + "fcm_dpo/q_t": 0.40913063287734985, + "grad_norm": 27.008316040039062, + "learning_rate": 3.233383385962115e-07, + "logits/chosen": -0.4423850178718567, + "logits/rejected": -0.414249062538147, + "logps/chosen": -342.21575927734375, + "logps/ref_chosen": -64.64569854736328, + "logps/ref_rejected": -82.76425170898438, + "logps/rejected": -545.090087890625, + "loss": 1.0887, + "margin_dpo/margin_mean": 184.75575256347656, + "margin_dpo/margin_std": 232.82424926757812, + "step": 318 + }, + { + "KL/chosen_KL_mean": -247.65008544921875, + "KL/mean": -358.6512451171875, + "KL/rejected_KL_mean": -469.65240478515625, + "KL/std": 232.62017822265625, + "epoch": 0.4684287812041116, + "fcm_dpo/beta": 0.0021020234562456608, + "fcm_dpo/delta": -0.06988762319087982, + "fcm_dpo/margin": 222.0023193359375, + "fcm_dpo/q_t": 0.39178723096847534, + "grad_norm": 22.64339828491211, + "learning_rate": 3.2211050867827805e-07, + "logits/chosen": -0.41219907999038696, + "logits/rejected": -0.4256317913532257, + "logps/chosen": -297.0338134765625, + "logps/ref_chosen": -49.383758544921875, + "logps/ref_rejected": -113.90650939941406, + "logps/rejected": -583.5589599609375, + "loss": 1.0377, + "margin_dpo/margin_mean": 222.00233459472656, + "margin_dpo/margin_std": 251.9779052734375, + "step": 319 + }, + { + "KL/chosen_KL_mean": -261.9112548828125, + "KL/mean": -383.52362060546875, + "KL/rejected_KL_mean": -505.13592529296875, + "KL/std": 241.70034790039062, + "epoch": 0.4698972099853157, + "fcm_dpo/beta": 0.0020547928288578987, + "fcm_dpo/delta": -0.10551808774471283, + "fcm_dpo/margin": 243.2246856689453, + "fcm_dpo/q_t": 0.3849526047706604, + "grad_norm": 28.402790069580078, + "learning_rate": 3.208807785813777e-07, + "logits/chosen": -0.398881196975708, + "logits/rejected": -0.4026961922645569, + "logps/chosen": -321.4161376953125, + "logps/ref_chosen": -59.50489044189453, + "logps/ref_rejected": -97.66717529296875, + "logps/rejected": -602.8031005859375, + "loss": 1.0129, + "margin_dpo/margin_mean": 243.22470092773438, + "margin_dpo/margin_std": 259.66607666015625, + "step": 320 + }, + { + "KL/chosen_KL_mean": -320.12939453125, + "KL/mean": -424.35235595703125, + "KL/rejected_KL_mean": -528.5753173828125, + "KL/std": 247.34034729003906, + "epoch": 0.4713656387665198, + "fcm_dpo/beta": 0.0020298874005675316, + "fcm_dpo/delta": -0.025036636739969254, + "fcm_dpo/margin": 208.4459228515625, + "fcm_dpo/q_t": 0.4030313193798065, + "grad_norm": 25.453643798828125, + "learning_rate": 3.1964918071004217e-07, + "logits/chosen": -0.36571186780929565, + "logits/rejected": -0.3529800772666931, + "logps/chosen": -381.6781005859375, + "logps/ref_chosen": -61.548683166503906, + "logps/ref_rejected": -91.64103698730469, + "logps/rejected": -620.2163696289062, + "loss": 1.0767, + "margin_dpo/margin_mean": 208.4459228515625, + "margin_dpo/margin_std": 262.54095458984375, + "step": 321 + }, + { + "KL/chosen_KL_mean": -278.1830139160156, + "KL/mean": -390.0034484863281, + "KL/rejected_KL_mean": -501.8238525390625, + "KL/std": 217.16629028320312, + "epoch": 0.47283406754772395, + "fcm_dpo/beta": 0.0020174758974462748, + "fcm_dpo/delta": -0.05392756685614586, + "fcm_dpo/margin": 223.640869140625, + "fcm_dpo/q_t": 0.39428332448005676, + "grad_norm": 33.35853958129883, + "learning_rate": 3.184157475180207e-07, + "logits/chosen": -0.3929744362831116, + "logits/rejected": -0.39414313435554504, + "logps/chosen": -335.4730529785156, + "logps/ref_chosen": -57.29003143310547, + "logps/ref_rejected": -95.74992370605469, + "logps/rejected": -597.5737915039062, + "loss": 1.0374, + "margin_dpo/margin_mean": 223.640869140625, + "margin_dpo/margin_std": 232.15565490722656, + "step": 322 + }, + { + "KL/chosen_KL_mean": -303.02447509765625, + "KL/mean": -401.8762512207031, + "KL/rejected_KL_mean": -500.72802734375, + "KL/std": 222.78414916992188, + "epoch": 0.47430249632892807, + "fcm_dpo/beta": 0.0020154546946287155, + "fcm_dpo/delta": 0.0015676822513341904, + "fcm_dpo/margin": 197.70355224609375, + "fcm_dpo/q_t": 0.4071164131164551, + "grad_norm": 40.872833251953125, + "learning_rate": 3.171805115074251e-07, + "logits/chosen": -0.43304672837257385, + "logits/rejected": -0.4297791123390198, + "logps/chosen": -354.2584228515625, + "logps/ref_chosen": -51.23395919799805, + "logps/ref_rejected": -75.06192016601562, + "logps/rejected": -575.7899169921875, + "loss": 1.086, + "margin_dpo/margin_mean": 197.70355224609375, + "margin_dpo/margin_std": 243.023681640625, + "step": 323 + }, + { + "KL/chosen_KL_mean": -347.34503173828125, + "KL/mean": -441.3756103515625, + "KL/rejected_KL_mean": -535.4061889648438, + "KL/std": 241.51780700683594, + "epoch": 0.47577092511013214, + "fcm_dpo/beta": 0.0020376183092594147, + "fcm_dpo/delta": 0.016333594918251038, + "fcm_dpo/margin": 188.06114196777344, + "fcm_dpo/q_t": 0.4135010838508606, + "grad_norm": 41.4670295715332, + "learning_rate": 3.1594350522787295e-07, + "logits/chosen": -0.42671218514442444, + "logits/rejected": -0.41737309098243713, + "logps/chosen": -412.4801940917969, + "logps/ref_chosen": -65.13516998291016, + "logps/ref_rejected": -86.47750854492188, + "logps/rejected": -621.8836669921875, + "loss": 1.1178, + "margin_dpo/margin_mean": 188.0611572265625, + "margin_dpo/margin_std": 271.6750183105469, + "step": 324 + }, + { + "KL/chosen_KL_mean": -271.2542724609375, + "KL/mean": -350.12774658203125, + "KL/rejected_KL_mean": -429.001220703125, + "KL/std": 204.61688232421875, + "epoch": 0.47723935389133626, + "fcm_dpo/beta": 0.002051199320703745, + "fcm_dpo/delta": 0.07886850088834763, + "fcm_dpo/margin": 157.7469482421875, + "fcm_dpo/q_t": 0.4243575930595398, + "grad_norm": 27.271472930908203, + "learning_rate": 3.147047612756302e-07, + "logits/chosen": -0.4851588308811188, + "logits/rejected": -0.4723260998725891, + "logps/chosen": -327.4698486328125, + "logps/ref_chosen": -56.215599060058594, + "logps/ref_rejected": -70.08592987060547, + "logps/rejected": -499.0871276855469, + "loss": 1.137, + "margin_dpo/margin_mean": 157.7469482421875, + "margin_dpo/margin_std": 213.0748291015625, + "step": 325 + }, + { + "KL/chosen_KL_mean": -291.67022705078125, + "KL/mean": -364.40618896484375, + "KL/rejected_KL_mean": -437.1421813964844, + "KL/std": 193.47927856445312, + "epoch": 0.4787077826725404, + "fcm_dpo/beta": 0.0020837995689362288, + "fcm_dpo/delta": 0.10001323372125626, + "fcm_dpo/margin": 145.47195434570312, + "fcm_dpo/q_t": 0.4282350540161133, + "grad_norm": 77.27185821533203, + "learning_rate": 3.134643122927519e-07, + "logits/chosen": -0.4964238405227661, + "logits/rejected": -0.4864235520362854, + "logps/chosen": -364.39520263671875, + "logps/ref_chosen": -72.72496032714844, + "logps/ref_rejected": -79.8467788696289, + "logps/rejected": -516.9889526367188, + "loss": 1.1457, + "margin_dpo/margin_mean": 145.47195434570312, + "margin_dpo/margin_std": 189.67251586914062, + "step": 326 + }, + { + "KL/chosen_KL_mean": -262.9017333984375, + "KL/mean": -374.2644348144531, + "KL/rejected_KL_mean": -485.62713623046875, + "KL/std": 206.111572265625, + "epoch": 0.4801762114537445, + "fcm_dpo/beta": 0.0020731650292873383, + "fcm_dpo/delta": -0.06503181904554367, + "fcm_dpo/margin": 222.725341796875, + "fcm_dpo/q_t": 0.39099863171577454, + "grad_norm": 71.85537719726562, + "learning_rate": 3.1222219096622264e-07, + "logits/chosen": -0.43772637844085693, + "logits/rejected": -0.4322543740272522, + "logps/chosen": -332.0361633300781, + "logps/ref_chosen": -69.13441467285156, + "logps/ref_rejected": -111.93377685546875, + "logps/rejected": -597.5609130859375, + "loss": 1.0265, + "margin_dpo/margin_mean": 222.725341796875, + "margin_dpo/margin_std": 220.58840942382812, + "step": 327 + }, + { + "KL/chosen_KL_mean": -290.6591796875, + "KL/mean": -395.22021484375, + "KL/rejected_KL_mean": -499.78118896484375, + "KL/std": 233.2576904296875, + "epoch": 0.48164464023494863, + "fcm_dpo/beta": 0.0020635989494621754, + "fcm_dpo/delta": -0.03295481950044632, + "fcm_dpo/margin": 209.12203979492188, + "fcm_dpo/q_t": 0.40090832114219666, + "grad_norm": 34.49656295776367, + "learning_rate": 3.1097843002709427e-07, + "logits/chosen": -0.4391549825668335, + "logits/rejected": -0.44703197479248047, + "logps/chosen": -350.34637451171875, + "logps/ref_chosen": -59.68719482421875, + "logps/ref_rejected": -90.85499572753906, + "logps/rejected": -590.6361694335938, + "loss": 1.0657, + "margin_dpo/margin_mean": 209.12203979492188, + "margin_dpo/margin_std": 258.40020751953125, + "step": 328 + }, + { + "KL/chosen_KL_mean": -349.1519470214844, + "KL/mean": -458.60186767578125, + "KL/rejected_KL_mean": -568.0517578125, + "KL/std": 267.2418212890625, + "epoch": 0.4831130690161527, + "fcm_dpo/beta": 0.0020334022119641304, + "fcm_dpo/delta": -0.047926321625709534, + "fcm_dpo/margin": 218.89981079101562, + "fcm_dpo/q_t": 0.39808762073516846, + "grad_norm": 32.74344253540039, + "learning_rate": 3.0973306224962437e-07, + "logits/chosen": -0.4355248808860779, + "logits/rejected": -0.4343896210193634, + "logps/chosen": -414.39813232421875, + "logps/ref_chosen": -65.2461929321289, + "logps/ref_rejected": -100.69770812988281, + "logps/rejected": -668.74951171875, + "loss": 1.0751, + "margin_dpo/margin_mean": 218.89981079101562, + "margin_dpo/margin_std": 289.7802734375, + "step": 329 + }, + { + "KL/chosen_KL_mean": -293.26153564453125, + "KL/mean": -410.9012756347656, + "KL/rejected_KL_mean": -528.5409545898438, + "KL/std": 247.63658142089844, + "epoch": 0.4845814977973568, + "fcm_dpo/beta": 0.002014409750699997, + "fcm_dpo/delta": -0.0776476040482521, + "fcm_dpo/margin": 235.2794189453125, + "fcm_dpo/q_t": 0.39114266633987427, + "grad_norm": 23.221515655517578, + "learning_rate": 3.084861204504122e-07, + "logits/chosen": -0.36399000883102417, + "logits/rejected": -0.3640822768211365, + "logps/chosen": -340.2598876953125, + "logps/ref_chosen": -46.998348236083984, + "logps/ref_rejected": -86.87684631347656, + "logps/rejected": -615.4178466796875, + "loss": 1.035, + "margin_dpo/margin_mean": 235.27944946289062, + "margin_dpo/margin_std": 269.7514343261719, + "step": 330 + }, + { + "KL/chosen_KL_mean": -312.65850830078125, + "KL/mean": -426.49957275390625, + "KL/rejected_KL_mean": -540.340576171875, + "KL/std": 204.22564697265625, + "epoch": 0.48604992657856094, + "fcm_dpo/beta": 0.0019897697493433952, + "fcm_dpo/delta": -0.05556933581829071, + "fcm_dpo/margin": 227.68206787109375, + "fcm_dpo/q_t": 0.39270973205566406, + "grad_norm": 25.662797927856445, + "learning_rate": 3.072376374875335e-07, + "logits/chosen": -0.4090471863746643, + "logits/rejected": -0.40625983476638794, + "logps/chosen": -363.1827392578125, + "logps/ref_chosen": -50.52424621582031, + "logps/ref_rejected": -89.01544189453125, + "logps/rejected": -629.3560180664062, + "loss": 1.0209, + "margin_dpo/margin_mean": 227.68206787109375, + "margin_dpo/margin_std": 201.46847534179688, + "step": 331 + }, + { + "KL/chosen_KL_mean": -308.2393798828125, + "KL/mean": -389.4369812011719, + "KL/rejected_KL_mean": -470.63458251953125, + "KL/std": 208.03164672851562, + "epoch": 0.48751835535976507, + "fcm_dpo/beta": 0.002010452328249812, + "fcm_dpo/delta": 0.07582204043865204, + "fcm_dpo/margin": 162.3951873779297, + "fcm_dpo/q_t": 0.42497020959854126, + "grad_norm": 21.15984535217285, + "learning_rate": 3.059876462596758e-07, + "logits/chosen": -0.3901352882385254, + "logits/rejected": -0.3734605014324188, + "logps/chosen": -357.419677734375, + "logps/ref_chosen": -49.18028259277344, + "logps/ref_rejected": -76.48515319824219, + "logps/rejected": -547.1197509765625, + "loss": 1.137, + "margin_dpo/margin_mean": 162.3951873779297, + "margin_dpo/margin_std": 232.2877655029297, + "step": 332 + }, + { + "KL/chosen_KL_mean": -318.0777893066406, + "KL/mean": -425.67242431640625, + "KL/rejected_KL_mean": -533.26708984375, + "KL/std": 241.38174438476562, + "epoch": 0.4889867841409692, + "fcm_dpo/beta": 0.0019922310020774603, + "fcm_dpo/delta": -0.031133878976106644, + "fcm_dpo/margin": 215.18930053710938, + "fcm_dpo/q_t": 0.4023503363132477, + "grad_norm": 23.545013427734375, + "learning_rate": 3.0473617970527015e-07, + "logits/chosen": -0.4057619571685791, + "logits/rejected": -0.3980754017829895, + "logps/chosen": -381.8335266113281, + "logps/ref_chosen": -63.75574493408203, + "logps/ref_rejected": -95.04411315917969, + "logps/rejected": -628.3111572265625, + "loss": 1.0854, + "margin_dpo/margin_mean": 215.18930053710938, + "margin_dpo/margin_std": 291.11785888671875, + "step": 333 + }, + { + "KL/chosen_KL_mean": -299.594970703125, + "KL/mean": -393.54693603515625, + "KL/rejected_KL_mean": -487.4989318847656, + "KL/std": 260.1100769042969, + "epoch": 0.49045521292217326, + "fcm_dpo/beta": 0.0020080246031284332, + "fcm_dpo/delta": 0.02357984334230423, + "fcm_dpo/margin": 187.9039306640625, + "fcm_dpo/q_t": 0.414248526096344, + "grad_norm": 22.068880081176758, + "learning_rate": 3.034832708016243e-07, + "logits/chosen": -0.41038450598716736, + "logits/rejected": -0.40884631872177124, + "logps/chosen": -366.5747375488281, + "logps/ref_chosen": -66.97975158691406, + "logps/ref_rejected": -95.31692504882812, + "logps/rejected": -582.8158569335938, + "loss": 1.1256, + "margin_dpo/margin_mean": 187.9039306640625, + "margin_dpo/margin_std": 291.5281982421875, + "step": 334 + }, + { + "KL/chosen_KL_mean": -315.5887756347656, + "KL/mean": -390.5389404296875, + "KL/rejected_KL_mean": -465.4891052246094, + "KL/std": 233.7744140625, + "epoch": 0.4919236417033774, + "fcm_dpo/beta": 0.0020120900589972734, + "fcm_dpo/delta": 0.0005856315256096423, + "fcm_dpo/margin": 149.90032958984375, + "fcm_dpo/q_t": 0.4307052493095398, + "grad_norm": 24.0537052154541, + "learning_rate": 3.022289525640531e-07, + "logits/chosen": -0.41110914945602417, + "logits/rejected": -0.38828492164611816, + "logps/chosen": -378.1312561035156, + "logps/ref_chosen": -62.54248046875, + "logps/ref_rejected": -87.61770629882812, + "logps/rejected": -553.1068115234375, + "loss": 1.1711, + "margin_dpo/margin_mean": 149.90032958984375, + "margin_dpo/margin_std": 253.32125854492188, + "step": 335 + }, + { + "KL/chosen_KL_mean": -303.5247802734375, + "KL/mean": -422.5467529296875, + "KL/rejected_KL_mean": -541.5687255859375, + "KL/std": 268.7447509765625, + "epoch": 0.4933920704845815, + "fcm_dpo/beta": 0.0019951139111071825, + "fcm_dpo/delta": -0.07861563563346863, + "fcm_dpo/margin": 238.04388427734375, + "fcm_dpo/q_t": 0.39314505457878113, + "grad_norm": 26.437314987182617, + "learning_rate": 3.009732580450086e-07, + "logits/chosen": -0.384580135345459, + "logits/rejected": -0.38513875007629395, + "logps/chosen": -358.05596923828125, + "logps/ref_chosen": -54.53115463256836, + "logps/ref_rejected": -104.40424346923828, + "logps/rejected": -645.972900390625, + "loss": 1.0597, + "margin_dpo/margin_mean": 238.04388427734375, + "margin_dpo/margin_std": 317.6496276855469, + "step": 336 + }, + { + "KL/chosen_KL_mean": -271.9599609375, + "KL/mean": -389.19110107421875, + "KL/rejected_KL_mean": -506.4222106933594, + "KL/std": 216.30596923828125, + "epoch": 0.4948604992657856, + "fcm_dpo/beta": 0.001958012580871582, + "fcm_dpo/delta": -0.06217820942401886, + "fcm_dpo/margin": 234.46224975585938, + "fcm_dpo/q_t": 0.39352160692214966, + "grad_norm": 28.5858154296875, + "learning_rate": 2.9971622033320914e-07, + "logits/chosen": -0.43051382899284363, + "logits/rejected": -0.42107608914375305, + "logps/chosen": -337.088623046875, + "logps/ref_chosen": -65.12869262695312, + "logps/ref_rejected": -101.72701263427734, + "logps/rejected": -608.1492309570312, + "loss": 1.0362, + "margin_dpo/margin_mean": 234.46224975585938, + "margin_dpo/margin_std": 254.32135009765625, + "step": 337 + }, + { + "KL/chosen_KL_mean": -241.61541748046875, + "KL/mean": -351.14056396484375, + "KL/rejected_KL_mean": -460.665771484375, + "KL/std": 205.6318359375, + "epoch": 0.49632892804698975, + "fcm_dpo/beta": 0.0019469063263386488, + "fcm_dpo/delta": -0.02767963334918022, + "fcm_dpo/margin": 219.05032348632812, + "fcm_dpo/q_t": 0.3992460072040558, + "grad_norm": 28.797653198242188, + "learning_rate": 2.984578725527675e-07, + "logits/chosen": -0.4226057231426239, + "logits/rejected": -0.413729190826416, + "logps/chosen": -300.0381164550781, + "logps/ref_chosen": -58.422706604003906, + "logps/ref_rejected": -89.06854248046875, + "logps/rejected": -549.7342529296875, + "loss": 1.0451, + "margin_dpo/margin_mean": 219.0503387451172, + "margin_dpo/margin_std": 213.1126251220703, + "step": 338 + }, + { + "KL/chosen_KL_mean": -263.037353515625, + "KL/mean": -371.4858703613281, + "KL/rejected_KL_mean": -479.93438720703125, + "KL/std": 212.88307189941406, + "epoch": 0.4977973568281938, + "fcm_dpo/beta": 0.0019529033452272415, + "fcm_dpo/delta": -0.025594212114810944, + "fcm_dpo/margin": 216.89700317382812, + "fcm_dpo/q_t": 0.40049952268600464, + "grad_norm": 28.772863388061523, + "learning_rate": 2.9719824786231796e-07, + "logits/chosen": -0.4214293956756592, + "logits/rejected": -0.4066346287727356, + "logps/chosen": -323.0326843261719, + "logps/ref_chosen": -59.99531555175781, + "logps/ref_rejected": -103.9109115600586, + "logps/rejected": -583.8452758789062, + "loss": 1.0555, + "margin_dpo/margin_mean": 216.89700317382812, + "margin_dpo/margin_std": 217.88153076171875, + "step": 339 + }, + { + "KL/chosen_KL_mean": -285.5733337402344, + "KL/mean": -379.6592712402344, + "KL/rejected_KL_mean": -473.7452087402344, + "KL/std": 208.79730224609375, + "epoch": 0.49926578560939794, + "fcm_dpo/beta": 0.0019388075452297926, + "fcm_dpo/delta": 0.0363488644361496, + "fcm_dpo/margin": 188.17184448242188, + "fcm_dpo/q_t": 0.4153197407722473, + "grad_norm": 23.684680938720703, + "learning_rate": 2.959373794541426e-07, + "logits/chosen": -0.3585469722747803, + "logits/rejected": -0.3364448547363281, + "logps/chosen": -338.403564453125, + "logps/ref_chosen": -52.83022689819336, + "logps/ref_rejected": -73.10723114013672, + "logps/rejected": -546.8524169921875, + "loss": 1.1127, + "margin_dpo/margin_mean": 188.17184448242188, + "margin_dpo/margin_std": 253.51083374023438, + "step": 340 + }, + { + "KL/chosen_KL_mean": -286.9049072265625, + "KL/mean": -402.452392578125, + "KL/rejected_KL_mean": -517.9999389648438, + "KL/std": 244.80047607421875, + "epoch": 0.5007342143906021, + "fcm_dpo/beta": 0.0019295980455353856, + "fcm_dpo/delta": -0.04842275381088257, + "fcm_dpo/margin": 231.09503173828125, + "fcm_dpo/q_t": 0.39680999517440796, + "grad_norm": 23.347299575805664, + "learning_rate": 2.946753005532965e-07, + "logits/chosen": -0.38709864020347595, + "logits/rejected": -0.38579249382019043, + "logps/chosen": -334.8046875, + "logps/ref_chosen": -47.899803161621094, + "logps/ref_rejected": -101.80987548828125, + "logps/rejected": -619.809814453125, + "loss": 1.0474, + "margin_dpo/margin_mean": 231.0950164794922, + "margin_dpo/margin_std": 254.49545288085938, + "step": 341 + }, + { + "KL/chosen_KL_mean": -302.0462341308594, + "KL/mean": -403.65533447265625, + "KL/rejected_KL_mean": -505.2643737792969, + "KL/std": 232.10491943359375, + "epoch": 0.5022026431718062, + "fcm_dpo/beta": 0.0019408478401601315, + "fcm_dpo/delta": 0.005411949008703232, + "fcm_dpo/margin": 203.21817016601562, + "fcm_dpo/q_t": 0.4091772437095642, + "grad_norm": 22.63515853881836, + "learning_rate": 2.934120444167326e-07, + "logits/chosen": -0.41977885365486145, + "logits/rejected": -0.39589670300483704, + "logps/chosen": -374.0428771972656, + "logps/ref_chosen": -71.99664306640625, + "logps/ref_rejected": -92.58959197998047, + "logps/rejected": -597.85400390625, + "loss": 1.0955, + "margin_dpo/margin_mean": 203.21817016601562, + "margin_dpo/margin_std": 270.35748291015625, + "step": 342 + }, + { + "KL/chosen_KL_mean": -301.8097839355469, + "KL/mean": -423.54559326171875, + "KL/rejected_KL_mean": -545.2814331054688, + "KL/std": 240.10980224609375, + "epoch": 0.5036710719530103, + "fcm_dpo/beta": 0.001911632250994444, + "fcm_dpo/delta": -0.06870149821043015, + "fcm_dpo/margin": 243.47164916992188, + "fcm_dpo/q_t": 0.389914333820343, + "grad_norm": 26.55186653137207, + "learning_rate": 2.9214764433242476e-07, + "logits/chosen": -0.40133050084114075, + "logits/rejected": -0.4047289490699768, + "logps/chosen": -356.21539306640625, + "logps/ref_chosen": -54.405616760253906, + "logps/ref_rejected": -111.04142761230469, + "logps/rejected": -656.3228759765625, + "loss": 1.0164, + "margin_dpo/margin_mean": 243.47164916992188, + "margin_dpo/margin_std": 218.26107788085938, + "step": 343 + }, + { + "KL/chosen_KL_mean": -306.146728515625, + "KL/mean": -411.81256103515625, + "KL/rejected_KL_mean": -517.4783935546875, + "KL/std": 263.5313720703125, + "epoch": 0.5051395007342144, + "fcm_dpo/beta": 0.0019231976475566626, + "fcm_dpo/delta": -0.008610613644123077, + "fcm_dpo/margin": 211.33169555664062, + "fcm_dpo/q_t": 0.40852880477905273, + "grad_norm": 30.049617767333984, + "learning_rate": 2.9088213361849126e-07, + "logits/chosen": -0.436582088470459, + "logits/rejected": -0.4415278732776642, + "logps/chosen": -360.11138916015625, + "logps/ref_chosen": -53.96466827392578, + "logps/ref_rejected": -90.62336730957031, + "logps/rejected": -608.101806640625, + "loss": 1.09, + "margin_dpo/margin_mean": 211.33169555664062, + "margin_dpo/margin_std": 268.55743408203125, + "step": 344 + }, + { + "KL/chosen_KL_mean": -365.295166015625, + "KL/mean": -484.23297119140625, + "KL/rejected_KL_mean": -603.1707153320312, + "KL/std": 253.80105590820312, + "epoch": 0.5066079295154186, + "fcm_dpo/beta": 0.001892891013994813, + "fcm_dpo/delta": -0.052630215883255005, + "fcm_dpo/margin": 237.8755645751953, + "fcm_dpo/q_t": 0.39596062898635864, + "grad_norm": 21.18876075744629, + "learning_rate": 2.896155456223163e-07, + "logits/chosen": -0.46160662174224854, + "logits/rejected": -0.46070683002471924, + "logps/chosen": -426.9808654785156, + "logps/ref_chosen": -61.685699462890625, + "logps/ref_rejected": -99.49041748046875, + "logps/rejected": -702.6611328125, + "loss": 1.0516, + "margin_dpo/margin_mean": 237.8755645751953, + "margin_dpo/margin_std": 280.4576721191406, + "step": 345 + }, + { + "KL/chosen_KL_mean": -366.03582763671875, + "KL/mean": -478.0716552734375, + "KL/rejected_KL_mean": -590.107421875, + "KL/std": 247.56460571289062, + "epoch": 0.5080763582966226, + "fcm_dpo/beta": 0.0018771484028548002, + "fcm_dpo/delta": -0.02152422070503235, + "fcm_dpo/margin": 224.07159423828125, + "fcm_dpo/q_t": 0.40137046575546265, + "grad_norm": 23.943134307861328, + "learning_rate": 2.883479137196714e-07, + "logits/chosen": -0.38838616013526917, + "logits/rejected": -0.3767489790916443, + "logps/chosen": -421.2921142578125, + "logps/ref_chosen": -55.256263732910156, + "logps/ref_rejected": -77.41532135009766, + "logps/rejected": -667.5227661132812, + "loss": 1.0685, + "margin_dpo/margin_mean": 224.07159423828125, + "margin_dpo/margin_std": 267.4580993652344, + "step": 346 + }, + { + "KL/chosen_KL_mean": -364.4321594238281, + "KL/mean": -474.5341796875, + "KL/rejected_KL_mean": -584.63623046875, + "KL/std": 261.7486877441406, + "epoch": 0.5095447870778267, + "fcm_dpo/beta": 0.001872351742349565, + "fcm_dpo/delta": -0.012826315127313137, + "fcm_dpo/margin": 220.20404052734375, + "fcm_dpo/q_t": 0.40555307269096375, + "grad_norm": 25.194826126098633, + "learning_rate": 2.8707927131383614e-07, + "logits/chosen": -0.4138352870941162, + "logits/rejected": -0.40841221809387207, + "logps/chosen": -421.9984130859375, + "logps/ref_chosen": -57.56623840332031, + "logps/ref_rejected": -92.35509490966797, + "logps/rejected": -676.9913330078125, + "loss": 1.0869, + "margin_dpo/margin_mean": 220.20404052734375, + "margin_dpo/margin_std": 294.784423828125, + "step": 347 + }, + { + "KL/chosen_KL_mean": -316.824951171875, + "KL/mean": -411.66033935546875, + "KL/rejected_KL_mean": -506.4957580566406, + "KL/std": 220.3616180419922, + "epoch": 0.5110132158590308, + "fcm_dpo/beta": 0.0018715888727456331, + "fcm_dpo/delta": 0.04614096134901047, + "fcm_dpo/margin": 189.6708221435547, + "fcm_dpo/q_t": 0.41821640729904175, + "grad_norm": 21.97551727294922, + "learning_rate": 2.858096518347179e-07, + "logits/chosen": -0.44485563039779663, + "logits/rejected": -0.446816623210907, + "logps/chosen": -373.14263916015625, + "logps/ref_chosen": -56.31770324707031, + "logps/ref_rejected": -89.13836669921875, + "logps/rejected": -595.6341552734375, + "loss": 1.1197, + "margin_dpo/margin_mean": 189.6708221435547, + "margin_dpo/margin_std": 254.99215698242188, + "step": 348 + }, + { + "KL/chosen_KL_mean": -301.78118896484375, + "KL/mean": -409.97479248046875, + "KL/rejected_KL_mean": -518.1683959960938, + "KL/std": 247.78256225585938, + "epoch": 0.5124816446402349, + "fcm_dpo/beta": 0.0018918917048722506, + "fcm_dpo/delta": -0.010010870173573494, + "fcm_dpo/margin": 216.3872528076172, + "fcm_dpo/q_t": 0.40753403306007385, + "grad_norm": 20.3768367767334, + "learning_rate": 2.845390887379706e-07, + "logits/chosen": -0.4286951422691345, + "logits/rejected": -0.42969733476638794, + "logps/chosen": -359.80670166015625, + "logps/ref_chosen": -58.025516510009766, + "logps/ref_rejected": -97.50515747070312, + "logps/rejected": -615.673583984375, + "loss": 1.0971, + "margin_dpo/margin_mean": 216.3872528076172, + "margin_dpo/margin_std": 307.3798828125, + "step": 349 + }, + { + "KL/chosen_KL_mean": -318.9201354980469, + "KL/mean": -423.44635009765625, + "KL/rejected_KL_mean": -527.9725341796875, + "KL/std": 231.78794860839844, + "epoch": 0.5139500734214391, + "fcm_dpo/beta": 0.0018784052226692438, + "fcm_dpo/delta": 0.007296178489923477, + "fcm_dpo/margin": 209.05239868164062, + "fcm_dpo/q_t": 0.40847963094711304, + "grad_norm": 30.007631301879883, + "learning_rate": 2.8326761550411346e-07, + "logits/chosen": -0.4662485122680664, + "logits/rejected": -0.46992364525794983, + "logps/chosen": -383.2506103515625, + "logps/ref_chosen": -64.33049011230469, + "logps/ref_rejected": -89.87164306640625, + "logps/rejected": -617.84423828125, + "loss": 1.1014, + "margin_dpo/margin_mean": 209.0524139404297, + "margin_dpo/margin_std": 286.0767822265625, + "step": 350 + }, + { + "KL/chosen_KL_mean": -291.11431884765625, + "KL/mean": -417.4867858886719, + "KL/rejected_KL_mean": -543.8592529296875, + "KL/std": 271.51190185546875, + "epoch": 0.5154185022026432, + "fcm_dpo/beta": 0.0018615357112139463, + "fcm_dpo/delta": -0.07428093254566193, + "fcm_dpo/margin": 252.7449951171875, + "fcm_dpo/q_t": 0.39380645751953125, + "grad_norm": 30.30561637878418, + "learning_rate": 2.819952656376487e-07, + "logits/chosen": -0.45949965715408325, + "logits/rejected": -0.4594254493713379, + "logps/chosen": -351.78643798828125, + "logps/ref_chosen": -60.6721305847168, + "logps/ref_rejected": -101.5654296875, + "logps/rejected": -645.4246826171875, + "loss": 1.049, + "margin_dpo/margin_mean": 252.7449951171875, + "margin_dpo/margin_std": 314.3124084472656, + "step": 351 + }, + { + "KL/chosen_KL_mean": -349.67596435546875, + "KL/mean": -426.81182861328125, + "KL/rejected_KL_mean": -503.9476623535156, + "KL/std": 249.33474731445312, + "epoch": 0.5168869309838473, + "fcm_dpo/beta": 0.0018662881338968873, + "fcm_dpo/delta": 0.01947195641696453, + "fcm_dpo/margin": 154.27169799804688, + "fcm_dpo/q_t": 0.4333202540874481, + "grad_norm": 31.560047149658203, + "learning_rate": 2.8072207266617854e-07, + "logits/chosen": -0.427381694316864, + "logits/rejected": -0.39572709798812866, + "logps/chosen": -420.6194152832031, + "logps/ref_chosen": -70.9434585571289, + "logps/ref_rejected": -76.6419677734375, + "logps/rejected": -580.589599609375, + "loss": 1.1823, + "margin_dpo/margin_mean": 154.27169799804688, + "margin_dpo/margin_std": 272.5928039550781, + "step": 352 + }, + { + "KL/chosen_KL_mean": -324.58709716796875, + "KL/mean": -425.61248779296875, + "KL/rejected_KL_mean": -526.6378173828125, + "KL/std": 249.03515625, + "epoch": 0.5183553597650514, + "fcm_dpo/beta": 0.001863989164121449, + "fcm_dpo/delta": 0.023936476558446884, + "fcm_dpo/margin": 202.05075073242188, + "fcm_dpo/q_t": 0.4135817885398865, + "grad_norm": 25.82649040222168, + "learning_rate": 2.794480701395219e-07, + "logits/chosen": -0.47480159997940063, + "logits/rejected": -0.46382421255111694, + "logps/chosen": -382.982421875, + "logps/ref_chosen": -58.39533996582031, + "logps/ref_rejected": -80.33553314208984, + "logps/rejected": -606.973388671875, + "loss": 1.1089, + "margin_dpo/margin_mean": 202.0507354736328, + "margin_dpo/margin_std": 275.25592041015625, + "step": 353 + }, + { + "KL/chosen_KL_mean": -270.94378662109375, + "KL/mean": -384.894287109375, + "KL/rejected_KL_mean": -498.8448181152344, + "KL/std": 226.17709350585938, + "epoch": 0.5198237885462555, + "fcm_dpo/beta": 0.0018658683402463794, + "fcm_dpo/delta": -0.026406319811940193, + "fcm_dpo/margin": 227.90101623535156, + "fcm_dpo/q_t": 0.399784117937088, + "grad_norm": 27.079309463500977, + "learning_rate": 2.781732916288303e-07, + "logits/chosen": -0.43044692277908325, + "logits/rejected": -0.42145881056785583, + "logps/chosen": -330.74676513671875, + "logps/ref_chosen": -59.80299377441406, + "logps/ref_rejected": -88.75750732421875, + "logps/rejected": -587.602294921875, + "loss": 1.0478, + "margin_dpo/margin_mean": 227.90103149414062, + "margin_dpo/margin_std": 226.9383544921875, + "step": 354 + }, + { + "KL/chosen_KL_mean": -270.277099609375, + "KL/mean": -383.14495849609375, + "KL/rejected_KL_mean": -496.0128479003906, + "KL/std": 231.57516479492188, + "epoch": 0.5212922173274597, + "fcm_dpo/beta": 0.0018558851443231106, + "fcm_dpo/delta": -0.019903086125850677, + "fcm_dpo/margin": 225.7357177734375, + "fcm_dpo/q_t": 0.40093863010406494, + "grad_norm": 35.046512603759766, + "learning_rate": 2.7689777072570284e-07, + "logits/chosen": -0.538011908531189, + "logits/rejected": -0.5325556993484497, + "logps/chosen": -324.4056091308594, + "logps/ref_chosen": -54.12849807739258, + "logps/ref_rejected": -82.40606689453125, + "logps/rejected": -578.4189453125, + "loss": 1.0533, + "margin_dpo/margin_mean": 225.7357177734375, + "margin_dpo/margin_std": 227.40325927734375, + "step": 355 + }, + { + "KL/chosen_KL_mean": -336.8787841796875, + "KL/mean": -398.74810791015625, + "KL/rejected_KL_mean": -460.6174621582031, + "KL/std": 237.61134338378906, + "epoch": 0.5227606461086637, + "fcm_dpo/beta": 0.0018649199046194553, + "fcm_dpo/delta": 0.037342458963394165, + "fcm_dpo/margin": 123.73867797851562, + "fcm_dpo/q_t": 0.44756919145584106, + "grad_norm": 32.40851974487305, + "learning_rate": 2.7562154104130176e-07, + "logits/chosen": -0.500129222869873, + "logits/rejected": -0.48293763399124146, + "logps/chosen": -401.5526123046875, + "logps/ref_chosen": -64.6738052368164, + "logps/ref_rejected": -75.89926147460938, + "logps/rejected": -536.5167236328125, + "loss": 1.2411, + "margin_dpo/margin_mean": 123.73868560791016, + "margin_dpo/margin_std": 291.8314208984375, + "step": 356 + }, + { + "KL/chosen_KL_mean": -302.33843994140625, + "KL/mean": -403.6683349609375, + "KL/rejected_KL_mean": -504.9982604980469, + "KL/std": 235.599365234375, + "epoch": 0.5242290748898678, + "fcm_dpo/beta": 0.001869656378403306, + "fcm_dpo/delta": 0.021622397005558014, + "fcm_dpo/margin": 202.65980529785156, + "fcm_dpo/q_t": 0.4108501672744751, + "grad_norm": 24.0694522857666, + "learning_rate": 2.7434463620546594e-07, + "logits/chosen": -0.47579270601272583, + "logits/rejected": -0.4654581546783447, + "logps/chosen": -355.0642395019531, + "logps/ref_chosen": -52.725799560546875, + "logps/ref_rejected": -86.84115600585938, + "logps/rejected": -591.8394165039062, + "loss": 1.0927, + "margin_dpo/margin_mean": 202.65982055664062, + "margin_dpo/margin_std": 240.59239196777344, + "step": 357 + }, + { + "KL/chosen_KL_mean": -271.8714599609375, + "KL/mean": -368.218994140625, + "KL/rejected_KL_mean": -464.56658935546875, + "KL/std": 229.80101013183594, + "epoch": 0.5256975036710719, + "fcm_dpo/beta": 0.0018902610754594207, + "fcm_dpo/delta": 0.03711070120334625, + "fcm_dpo/margin": 192.69509887695312, + "fcm_dpo/q_t": 0.4157891273498535, + "grad_norm": 23.27127456665039, + "learning_rate": 2.730670898658255e-07, + "logits/chosen": -0.5084176063537598, + "logits/rejected": -0.4954051971435547, + "logps/chosen": -335.076904296875, + "logps/ref_chosen": -63.20543670654297, + "logps/ref_rejected": -88.373291015625, + "logps/rejected": -552.9398803710938, + "loss": 1.1069, + "margin_dpo/margin_mean": 192.69509887695312, + "margin_dpo/margin_std": 249.3919219970703, + "step": 358 + }, + { + "KL/chosen_KL_mean": -314.4269104003906, + "KL/mean": -426.55120849609375, + "KL/rejected_KL_mean": -538.675537109375, + "KL/std": 234.15951538085938, + "epoch": 0.527165932452276, + "fcm_dpo/beta": 0.0018845023587346077, + "fcm_dpo/delta": -0.023675762116909027, + "fcm_dpo/margin": 224.2485809326172, + "fcm_dpo/q_t": 0.4034174680709839, + "grad_norm": 25.26753044128418, + "learning_rate": 2.717889356869146e-07, + "logits/chosen": -0.4363176226615906, + "logits/rejected": -0.4269408583641052, + "logps/chosen": -370.797119140625, + "logps/ref_chosen": -56.370216369628906, + "logps/ref_rejected": -82.17375183105469, + "logps/rejected": -620.8492431640625, + "loss": 1.0759, + "margin_dpo/margin_mean": 224.2485809326172, + "margin_dpo/margin_std": 283.8497009277344, + "step": 359 + }, + { + "KL/chosen_KL_mean": -313.34381103515625, + "KL/mean": -399.41571044921875, + "KL/rejected_KL_mean": -485.4875793457031, + "KL/std": 200.4339599609375, + "epoch": 0.5286343612334802, + "fcm_dpo/beta": 0.0019088031258434057, + "fcm_dpo/delta": 0.07367773354053497, + "fcm_dpo/margin": 172.14376831054688, + "fcm_dpo/q_t": 0.42239513993263245, + "grad_norm": 39.744529724121094, + "learning_rate": 2.7051020734928443e-07, + "logits/chosen": -0.44749805331230164, + "logits/rejected": -0.4344269633293152, + "logps/chosen": -364.80419921875, + "logps/ref_chosen": -51.460384368896484, + "logps/ref_rejected": -69.83892059326172, + "logps/rejected": -555.3265380859375, + "loss": 1.1216, + "margin_dpo/margin_mean": 172.1437530517578, + "margin_dpo/margin_std": 204.19671630859375, + "step": 360 + }, + { + "KL/chosen_KL_mean": -329.3931884765625, + "KL/mean": -417.86151123046875, + "KL/rejected_KL_mean": -506.329833984375, + "KL/std": 236.83558654785156, + "epoch": 0.5301027900146843, + "fcm_dpo/beta": 0.0019349538488313556, + "fcm_dpo/delta": 0.05939781665802002, + "fcm_dpo/margin": 176.93667602539062, + "fcm_dpo/q_t": 0.4206019639968872, + "grad_norm": 28.265932083129883, + "learning_rate": 2.6923093854861593e-07, + "logits/chosen": -0.42435145378112793, + "logits/rejected": -0.4203334152698517, + "logps/chosen": -383.2627258300781, + "logps/ref_chosen": -53.86951446533203, + "logps/ref_rejected": -90.7692642211914, + "logps/rejected": -597.09912109375, + "loss": 1.1322, + "margin_dpo/margin_mean": 176.93667602539062, + "margin_dpo/margin_std": 257.4376220703125, + "step": 361 + }, + { + "KL/chosen_KL_mean": -289.91583251953125, + "KL/mean": -428.1727600097656, + "KL/rejected_KL_mean": -566.4297485351562, + "KL/std": 248.63421630859375, + "epoch": 0.5315712187958884, + "fcm_dpo/beta": 0.0018921452574431896, + "fcm_dpo/delta": -0.13069821894168854, + "fcm_dpo/margin": 276.513916015625, + "fcm_dpo/q_t": 0.3790006637573242, + "grad_norm": 24.199586868286133, + "learning_rate": 2.679511629948319e-07, + "logits/chosen": -0.45070475339889526, + "logits/rejected": -0.46132344007492065, + "logps/chosen": -348.55487060546875, + "logps/ref_chosen": -58.639060974121094, + "logps/ref_rejected": -105.58195495605469, + "logps/rejected": -672.01171875, + "loss": 0.9867, + "margin_dpo/margin_mean": 276.5138854980469, + "margin_dpo/margin_std": 264.4261474609375, + "step": 362 + }, + { + "KL/chosen_KL_mean": -264.89666748046875, + "KL/mean": -404.021728515625, + "KL/rejected_KL_mean": -543.1467895507812, + "KL/std": 245.37258911132812, + "epoch": 0.5330396475770925, + "fcm_dpo/beta": 0.001857282593846321, + "fcm_dpo/delta": -0.12296949326992035, + "fcm_dpo/margin": 278.2501220703125, + "fcm_dpo/q_t": 0.38030678033828735, + "grad_norm": 23.536636352539062, + "learning_rate": 2.6667091441120816e-07, + "logits/chosen": -0.4141322076320648, + "logits/rejected": -0.4084208607673645, + "logps/chosen": -309.455078125, + "logps/ref_chosen": -44.558380126953125, + "logps/ref_rejected": -74.69496154785156, + "logps/rejected": -617.841796875, + "loss": 0.9918, + "margin_dpo/margin_mean": 278.2501220703125, + "margin_dpo/margin_std": 267.5613098144531, + "step": 363 + }, + { + "KL/chosen_KL_mean": -294.61907958984375, + "KL/mean": -398.00604248046875, + "KL/rejected_KL_mean": -501.39300537109375, + "KL/std": 241.02774047851562, + "epoch": 0.5345080763582967, + "fcm_dpo/beta": 0.001852140761911869, + "fcm_dpo/delta": 0.017322657629847527, + "fcm_dpo/margin": 206.77392578125, + "fcm_dpo/q_t": 0.41120392084121704, + "grad_norm": 24.284122467041016, + "learning_rate": 2.6539022653348575e-07, + "logits/chosen": -0.46633392572402954, + "logits/rejected": -0.4776257276535034, + "logps/chosen": -343.513671875, + "logps/ref_chosen": -48.894622802734375, + "logps/ref_rejected": -91.395751953125, + "logps/rejected": -592.7887573242188, + "loss": 1.1034, + "margin_dpo/margin_mean": 206.77394104003906, + "margin_dpo/margin_std": 278.9082946777344, + "step": 364 + }, + { + "KL/chosen_KL_mean": -282.74884033203125, + "KL/mean": -393.6543884277344, + "KL/rejected_KL_mean": -504.5599060058594, + "KL/std": 251.020751953125, + "epoch": 0.5359765051395007, + "fcm_dpo/beta": 0.0018444794695824385, + "fcm_dpo/delta": -0.009514345787465572, + "fcm_dpo/margin": 221.81109619140625, + "fcm_dpo/q_t": 0.40571504831314087, + "grad_norm": 23.80178451538086, + "learning_rate": 2.641091331089811e-07, + "logits/chosen": -0.42873144149780273, + "logits/rejected": -0.440301775932312, + "logps/chosen": -334.2415771484375, + "logps/ref_chosen": -51.49274444580078, + "logps/ref_rejected": -92.70166778564453, + "logps/rejected": -597.2615966796875, + "loss": 1.0708, + "margin_dpo/margin_mean": 221.81109619140625, + "margin_dpo/margin_std": 257.0599060058594, + "step": 365 + }, + { + "KL/chosen_KL_mean": -263.0450134277344, + "KL/mean": -374.4595031738281, + "KL/rejected_KL_mean": -485.8740234375, + "KL/std": 239.58175659179688, + "epoch": 0.5374449339207048, + "fcm_dpo/beta": 0.001833090209402144, + "fcm_dpo/delta": -0.009195588529109955, + "fcm_dpo/margin": 222.82901000976562, + "fcm_dpo/q_t": 0.40598738193511963, + "grad_norm": 21.787899017333984, + "learning_rate": 2.6282766789569736e-07, + "logits/chosen": -0.41914117336273193, + "logits/rejected": -0.43506374955177307, + "logps/chosen": -307.76556396484375, + "logps/ref_chosen": -44.7205696105957, + "logps/ref_rejected": -83.31040954589844, + "logps/rejected": -569.1844482421875, + "loss": 1.0851, + "margin_dpo/margin_mean": 222.82901000976562, + "margin_dpo/margin_std": 287.36468505859375, + "step": 366 + }, + { + "KL/chosen_KL_mean": -271.44921875, + "KL/mean": -364.90087890625, + "KL/rejected_KL_mean": -458.3525085449219, + "KL/std": 220.12733459472656, + "epoch": 0.5389133627019089, + "fcm_dpo/beta": 0.0018576278816908598, + "fcm_dpo/delta": 0.05456267669796944, + "fcm_dpo/margin": 186.90333557128906, + "fcm_dpo/q_t": 0.41822659969329834, + "grad_norm": 20.662567138671875, + "learning_rate": 2.615458646614349e-07, + "logits/chosen": -0.44316402077674866, + "logits/rejected": -0.4273492395877838, + "logps/chosen": -329.8546142578125, + "logps/ref_chosen": -58.405418395996094, + "logps/ref_rejected": -76.75132751464844, + "logps/rejected": -535.1038208007812, + "loss": 1.1158, + "margin_dpo/margin_mean": 186.90333557128906, + "margin_dpo/margin_std": 237.801025390625, + "step": 367 + }, + { + "KL/chosen_KL_mean": -256.9306640625, + "KL/mean": -400.14288330078125, + "KL/rejected_KL_mean": -543.3551025390625, + "KL/std": 246.394287109375, + "epoch": 0.540381791483113, + "fcm_dpo/beta": 0.0018250863067805767, + "fcm_dpo/delta": -0.12947417795658112, + "fcm_dpo/margin": 286.42449951171875, + "fcm_dpo/q_t": 0.37577325105667114, + "grad_norm": 35.373626708984375, + "learning_rate": 2.6026375718290083e-07, + "logits/chosen": -0.46660637855529785, + "logits/rejected": -0.4749259352684021, + "logps/chosen": -301.3831787109375, + "logps/ref_chosen": -44.452518463134766, + "logps/ref_rejected": -98.55526733398438, + "logps/rejected": -641.910400390625, + "loss": 0.9699, + "margin_dpo/margin_mean": 286.42449951171875, + "margin_dpo/margin_std": 225.23828125, + "step": 368 + }, + { + "KL/chosen_KL_mean": -343.1910095214844, + "KL/mean": -415.24658203125, + "KL/rejected_KL_mean": -487.3021240234375, + "KL/std": 250.22268676757812, + "epoch": 0.5418502202643172, + "fcm_dpo/beta": 0.001818750286474824, + "fcm_dpo/delta": 0.0356462262570858, + "fcm_dpo/margin": 144.11106872558594, + "fcm_dpo/q_t": 0.43869489431381226, + "grad_norm": 29.09714126586914, + "learning_rate": 2.589813792448196e-07, + "logits/chosen": -0.47453856468200684, + "logits/rejected": -0.4585055708885193, + "logps/chosen": -414.572509765625, + "logps/ref_chosen": -71.38150024414062, + "logps/ref_rejected": -91.29582214355469, + "logps/rejected": -578.597900390625, + "loss": 1.205, + "margin_dpo/margin_mean": 144.11106872558594, + "margin_dpo/margin_std": 282.6583251953125, + "step": 369 + }, + { + "KL/chosen_KL_mean": -344.9140625, + "KL/mean": -421.39202880859375, + "KL/rejected_KL_mean": -497.8699951171875, + "KL/std": 256.6206359863281, + "epoch": 0.5433186490455213, + "fcm_dpo/beta": 0.0018614260479807854, + "fcm_dpo/delta": 0.11835242807865143, + "fcm_dpo/margin": 152.9559783935547, + "fcm_dpo/q_t": 0.4348163604736328, + "grad_norm": 29.563688278198242, + "learning_rate": 2.5769876463904263e-07, + "logits/chosen": -0.46674686670303345, + "logits/rejected": -0.45883116126060486, + "logps/chosen": -416.52154541015625, + "logps/ref_chosen": -71.60749816894531, + "logps/ref_rejected": -97.25978088378906, + "logps/rejected": -595.1297607421875, + "loss": 1.1911, + "margin_dpo/margin_mean": 152.95596313476562, + "margin_dpo/margin_std": 288.8004150390625, + "step": 370 + }, + { + "KL/chosen_KL_mean": -335.6900939941406, + "KL/mean": -441.45654296875, + "KL/rejected_KL_mean": -547.222900390625, + "KL/std": 263.390625, + "epoch": 0.5447870778267254, + "fcm_dpo/beta": 0.0018742081010714173, + "fcm_dpo/delta": 0.003576137125492096, + "fcm_dpo/margin": 211.5328369140625, + "fcm_dpo/q_t": 0.40980789065361023, + "grad_norm": 27.87566566467285, + "learning_rate": 2.5641594716365744e-07, + "logits/chosen": -0.5134047269821167, + "logits/rejected": -0.49832814931869507, + "logps/chosen": -405.1045837402344, + "logps/ref_chosen": -69.41448974609375, + "logps/ref_rejected": -99.17217254638672, + "logps/rejected": -646.3951416015625, + "loss": 1.1043, + "margin_dpo/margin_mean": 211.5328369140625, + "margin_dpo/margin_std": 302.67376708984375, + "step": 371 + }, + { + "KL/chosen_KL_mean": -320.33050537109375, + "KL/mean": -443.8200378417969, + "KL/rejected_KL_mean": -567.3095703125, + "KL/std": 299.37408447265625, + "epoch": 0.5462555066079295, + "fcm_dpo/beta": 0.0018502443563193083, + "fcm_dpo/delta": -0.05988244712352753, + "fcm_dpo/margin": 246.97903442382812, + "fcm_dpo/q_t": 0.398201584815979, + "grad_norm": 25.24636459350586, + "learning_rate": 2.551329606220976e-07, + "logits/chosen": -0.5078925490379333, + "logits/rejected": -0.49255937337875366, + "logps/chosen": -382.14849853515625, + "logps/ref_chosen": -61.8179931640625, + "logps/ref_rejected": -78.53948974609375, + "logps/rejected": -645.8489990234375, + "loss": 1.0673, + "margin_dpo/margin_mean": 246.97903442382812, + "margin_dpo/margin_std": 337.23712158203125, + "step": 372 + }, + { + "KL/chosen_KL_mean": -346.5724792480469, + "KL/mean": -466.34234619140625, + "KL/rejected_KL_mean": -586.1122436523438, + "KL/std": 286.9017639160156, + "epoch": 0.5477239353891337, + "fcm_dpo/beta": 0.0018451586365699768, + "fcm_dpo/delta": -0.044221822172403336, + "fcm_dpo/margin": 239.539794921875, + "fcm_dpo/q_t": 0.3970082998275757, + "grad_norm": 32.10969161987305, + "learning_rate": 2.538498388222517e-07, + "logits/chosen": -0.4709147810935974, + "logits/rejected": -0.4543595016002655, + "logps/chosen": -410.78961181640625, + "logps/ref_chosen": -64.21713256835938, + "logps/ref_rejected": -85.95960998535156, + "logps/rejected": -672.0718994140625, + "loss": 1.0556, + "margin_dpo/margin_mean": 239.539794921875, + "margin_dpo/margin_std": 271.01666259765625, + "step": 373 + }, + { + "KL/chosen_KL_mean": -305.8607177734375, + "KL/mean": -413.46075439453125, + "KL/rejected_KL_mean": -521.060791015625, + "KL/std": 309.17022705078125, + "epoch": 0.5491923641703378, + "fcm_dpo/beta": 0.0018260091310366988, + "fcm_dpo/delta": 0.007001262158155441, + "fcm_dpo/margin": 215.20005798339844, + "fcm_dpo/q_t": 0.413374662399292, + "grad_norm": 24.107498168945312, + "learning_rate": 2.525666155755725e-07, + "logits/chosen": -0.5528968572616577, + "logits/rejected": -0.5381832122802734, + "logps/chosen": -376.5108947753906, + "logps/ref_chosen": -70.65018463134766, + "logps/ref_rejected": -93.64016723632812, + "logps/rejected": -614.700927734375, + "loss": 1.1254, + "margin_dpo/margin_mean": 215.2000732421875, + "margin_dpo/margin_std": 354.08026123046875, + "step": 374 + }, + { + "KL/chosen_KL_mean": -316.97076416015625, + "KL/mean": -415.6497802734375, + "KL/rejected_KL_mean": -514.3287353515625, + "KL/std": 248.89346313476562, + "epoch": 0.5506607929515418, + "fcm_dpo/beta": 0.0018346281722187996, + "fcm_dpo/delta": 0.03891323506832123, + "fcm_dpo/margin": 197.35801696777344, + "fcm_dpo/q_t": 0.416460245847702, + "grad_norm": 28.248476028442383, + "learning_rate": 2.512833246961859e-07, + "logits/chosen": -0.48038458824157715, + "logits/rejected": -0.4829840064048767, + "logps/chosen": -377.0509948730469, + "logps/ref_chosen": -60.080223083496094, + "logps/ref_rejected": -88.93830871582031, + "logps/rejected": -603.26708984375, + "loss": 1.1373, + "margin_dpo/margin_mean": 197.35800170898438, + "margin_dpo/margin_std": 312.4132995605469, + "step": 375 + }, + { + "KL/chosen_KL_mean": -300.00848388671875, + "KL/mean": -428.203857421875, + "KL/rejected_KL_mean": -556.3992919921875, + "KL/std": 273.4169616699219, + "epoch": 0.5521292217327459, + "fcm_dpo/beta": 0.0018291289452463388, + "fcm_dpo/delta": -0.07235552370548248, + "fcm_dpo/margin": 256.3907470703125, + "fcm_dpo/q_t": 0.39261579513549805, + "grad_norm": 25.545181274414062, + "learning_rate": 2.5e-07, + "logits/chosen": -0.44510942697525024, + "logits/rejected": -0.43642458319664, + "logps/chosen": -362.66876220703125, + "logps/ref_chosen": -62.660308837890625, + "logps/ref_rejected": -105.52660369873047, + "logps/rejected": -661.9258422851562, + "loss": 1.0513, + "margin_dpo/margin_mean": 256.3907470703125, + "margin_dpo/margin_std": 320.7281188964844, + "step": 376 + }, + { + "KL/chosen_KL_mean": -301.1566467285156, + "KL/mean": -427.1124267578125, + "KL/rejected_KL_mean": -553.0682373046875, + "KL/std": 280.93780517578125, + "epoch": 0.55359765051395, + "fcm_dpo/beta": 0.0018094563856720924, + "fcm_dpo/delta": -0.05844918638467789, + "fcm_dpo/margin": 251.91156005859375, + "fcm_dpo/q_t": 0.39543959498405457, + "grad_norm": 25.23697853088379, + "learning_rate": 2.487166753038141e-07, + "logits/chosen": -0.397521436214447, + "logits/rejected": -0.39853352308273315, + "logps/chosen": -355.6353759765625, + "logps/ref_chosen": -54.478736877441406, + "logps/ref_rejected": -98.70335388183594, + "logps/rejected": -651.7716064453125, + "loss": 1.0517, + "margin_dpo/margin_mean": 251.91156005859375, + "margin_dpo/margin_std": 303.8875732421875, + "step": 377 + }, + { + "KL/chosen_KL_mean": -276.0648498535156, + "KL/mean": -409.00689697265625, + "KL/rejected_KL_mean": -541.948974609375, + "KL/std": 263.941162109375, + "epoch": 0.5550660792951542, + "fcm_dpo/beta": 0.001778826816007495, + "fcm_dpo/delta": -0.07667370140552521, + "fcm_dpo/margin": 265.88409423828125, + "fcm_dpo/q_t": 0.3893394470214844, + "grad_norm": 30.339950561523438, + "learning_rate": 2.4743338442442754e-07, + "logits/chosen": -0.4182929992675781, + "logits/rejected": -0.4355580806732178, + "logps/chosen": -321.08538818359375, + "logps/ref_chosen": -45.02053451538086, + "logps/ref_rejected": -88.0469741821289, + "logps/rejected": -629.9959716796875, + "loss": 1.0286, + "margin_dpo/margin_mean": 265.88409423828125, + "margin_dpo/margin_std": 285.3680419921875, + "step": 378 + }, + { + "KL/chosen_KL_mean": -324.24822998046875, + "KL/mean": -456.31768798828125, + "KL/rejected_KL_mean": -588.3870849609375, + "KL/std": 270.865478515625, + "epoch": 0.5565345080763583, + "fcm_dpo/beta": 0.0017488367157056928, + "fcm_dpo/delta": -0.06558392941951752, + "fcm_dpo/margin": 264.138916015625, + "fcm_dpo/q_t": 0.39461731910705566, + "grad_norm": 23.810928344726562, + "learning_rate": 2.461501611777483e-07, + "logits/chosen": -0.3999977111816406, + "logits/rejected": -0.4254748225212097, + "logps/chosen": -377.43035888671875, + "logps/ref_chosen": -53.182098388671875, + "logps/ref_rejected": -114.3001708984375, + "logps/rejected": -702.687255859375, + "loss": 1.0496, + "margin_dpo/margin_mean": 264.1388854980469, + "margin_dpo/margin_std": 319.9960021972656, + "step": 379 + }, + { + "KL/chosen_KL_mean": -327.43389892578125, + "KL/mean": -471.5666198730469, + "KL/rejected_KL_mean": -615.6993408203125, + "KL/std": 298.45489501953125, + "epoch": 0.5580029368575624, + "fcm_dpo/beta": 0.0017266274662688375, + "fcm_dpo/delta": -0.10273480415344238, + "fcm_dpo/margin": 288.2655029296875, + "fcm_dpo/q_t": 0.3840462565422058, + "grad_norm": 27.789323806762695, + "learning_rate": 2.4486703937790243e-07, + "logits/chosen": -0.4401572346687317, + "logits/rejected": -0.46762269735336304, + "logps/chosen": -378.78692626953125, + "logps/ref_chosen": -51.3530387878418, + "logps/ref_rejected": -104.19169616699219, + "logps/rejected": -719.8910522460938, + "loss": 1.0222, + "margin_dpo/margin_mean": 288.2655029296875, + "margin_dpo/margin_std": 325.3714599609375, + "step": 380 + }, + { + "KL/chosen_KL_mean": -348.72802734375, + "KL/mean": -450.8270263671875, + "KL/rejected_KL_mean": -552.926025390625, + "KL/std": 260.6754455566406, + "epoch": 0.5594713656387665, + "fcm_dpo/beta": 0.0017242280300706625, + "fcm_dpo/delta": 0.04968461021780968, + "fcm_dpo/margin": 204.19798278808594, + "fcm_dpo/q_t": 0.42024338245391846, + "grad_norm": 25.380268096923828, + "learning_rate": 2.435840528363426e-07, + "logits/chosen": -0.43789827823638916, + "logits/rejected": -0.4210563898086548, + "logps/chosen": -406.5311279296875, + "logps/ref_chosen": -57.80306625366211, + "logps/ref_rejected": -79.21940612792969, + "logps/rejected": -632.1454467773438, + "loss": 1.1515, + "margin_dpo/margin_mean": 204.197998046875, + "margin_dpo/margin_std": 352.91259765625, + "step": 381 + }, + { + "KL/chosen_KL_mean": -352.124267578125, + "KL/mean": -475.0651550292969, + "KL/rejected_KL_mean": -598.0060424804688, + "KL/std": 246.15951538085938, + "epoch": 0.5609397944199707, + "fcm_dpo/beta": 0.0017251023091375828, + "fcm_dpo/delta": -0.02524741366505623, + "fcm_dpo/margin": 245.88180541992188, + "fcm_dpo/q_t": 0.4009873569011688, + "grad_norm": 30.70073890686035, + "learning_rate": 2.4230123536095745e-07, + "logits/chosen": -0.46005573868751526, + "logits/rejected": -0.4692569375038147, + "logps/chosen": -418.14453125, + "logps/ref_chosen": -66.02030181884766, + "logps/ref_rejected": -110.71016693115234, + "logps/rejected": -708.7161865234375, + "loss": 1.0578, + "margin_dpo/margin_mean": 245.88180541992188, + "margin_dpo/margin_std": 270.6402587890625, + "step": 382 + }, + { + "KL/chosen_KL_mean": -358.22650146484375, + "KL/mean": -477.87152099609375, + "KL/rejected_KL_mean": -597.5165405273438, + "KL/std": 270.5577392578125, + "epoch": 0.5624082232011748, + "fcm_dpo/beta": 0.0017189650097861886, + "fcm_dpo/delta": -0.011835414916276932, + "fcm_dpo/margin": 239.28997802734375, + "fcm_dpo/q_t": 0.40572264790534973, + "grad_norm": 29.060136795043945, + "learning_rate": 2.4101862075518037e-07, + "logits/chosen": -0.4785361886024475, + "logits/rejected": -0.4892638325691223, + "logps/chosen": -408.6180114746094, + "logps/ref_chosen": -50.39148712158203, + "logps/ref_rejected": -93.71589660644531, + "logps/rejected": -691.232421875, + "loss": 1.1034, + "margin_dpo/margin_mean": 239.28997802734375, + "margin_dpo/margin_std": 355.65509033203125, + "step": 383 + }, + { + "KL/chosen_KL_mean": -376.49822998046875, + "KL/mean": -473.9187316894531, + "KL/rejected_KL_mean": -571.3392333984375, + "KL/std": 258.77777099609375, + "epoch": 0.5638766519823789, + "fcm_dpo/beta": 0.0017420074436813593, + "fcm_dpo/delta": 0.062150660902261734, + "fcm_dpo/margin": 194.84095764160156, + "fcm_dpo/q_t": 0.4199580252170563, + "grad_norm": 28.971044540405273, + "learning_rate": 2.397362428170992e-07, + "logits/chosen": -0.49737972021102905, + "logits/rejected": -0.4919343590736389, + "logps/chosen": -428.5443420410156, + "logps/ref_chosen": -52.046104431152344, + "logps/ref_rejected": -85.76089477539062, + "logps/rejected": -657.10009765625, + "loss": 1.1205, + "margin_dpo/margin_mean": 194.84097290039062, + "margin_dpo/margin_std": 245.9820556640625, + "step": 384 + }, + { + "KL/chosen_KL_mean": -361.83416748046875, + "KL/mean": -478.52764892578125, + "KL/rejected_KL_mean": -595.2210693359375, + "KL/std": 224.6497802734375, + "epoch": 0.5653450807635829, + "fcm_dpo/beta": 0.0017391443252563477, + "fcm_dpo/delta": -0.006142602767795324, + "fcm_dpo/margin": 233.38693237304688, + "fcm_dpo/q_t": 0.4041079580783844, + "grad_norm": 34.824005126953125, + "learning_rate": 2.3845413533856514e-07, + "logits/chosen": -0.5053662061691284, + "logits/rejected": -0.48552972078323364, + "logps/chosen": -427.3863220214844, + "logps/ref_chosen": -65.55215454101562, + "logps/ref_rejected": -77.82792663574219, + "logps/rejected": -673.049072265625, + "loss": 1.0656, + "margin_dpo/margin_mean": 233.38693237304688, + "margin_dpo/margin_std": 248.42532348632812, + "step": 385 + }, + { + "KL/chosen_KL_mean": -378.93572998046875, + "KL/mean": -507.42462158203125, + "KL/rejected_KL_mean": -635.9134521484375, + "KL/std": 285.33013916015625, + "epoch": 0.566813509544787, + "fcm_dpo/beta": 0.0017280435422435403, + "fcm_dpo/delta": -0.046092525124549866, + "fcm_dpo/margin": 256.977783203125, + "fcm_dpo/q_t": 0.39897212386131287, + "grad_norm": 34.94272994995117, + "learning_rate": 2.3717233210430254e-07, + "logits/chosen": -0.4857565760612488, + "logits/rejected": -0.4821171760559082, + "logps/chosen": -437.1575927734375, + "logps/ref_chosen": -58.22185516357422, + "logps/ref_rejected": -92.32742309570312, + "logps/rejected": -728.2409057617188, + "loss": 1.066, + "margin_dpo/margin_mean": 256.977783203125, + "margin_dpo/margin_std": 332.15789794921875, + "step": 386 + }, + { + "KL/chosen_KL_mean": -393.4022216796875, + "KL/mean": -504.98333740234375, + "KL/rejected_KL_mean": -616.564453125, + "KL/std": 264.5252685546875, + "epoch": 0.5682819383259912, + "fcm_dpo/beta": 0.001718209940008819, + "fcm_dpo/delta": 0.01678801327943802, + "fcm_dpo/margin": 223.16220092773438, + "fcm_dpo/q_t": 0.41014280915260315, + "grad_norm": 37.69805908203125, + "learning_rate": 2.3589086689101889e-07, + "logits/chosen": -0.5313735008239746, + "logits/rejected": -0.5198484063148499, + "logps/chosen": -459.8216552734375, + "logps/ref_chosen": -66.41944885253906, + "logps/ref_rejected": -92.16915893554688, + "logps/rejected": -708.733642578125, + "loss": 1.093, + "margin_dpo/margin_mean": 223.16221618652344, + "margin_dpo/margin_std": 270.58795166015625, + "step": 387 + }, + { + "KL/chosen_KL_mean": -380.30328369140625, + "KL/mean": -524.3448486328125, + "KL/rejected_KL_mean": -668.386474609375, + "KL/std": 307.99615478515625, + "epoch": 0.5697503671071953, + "fcm_dpo/beta": 0.0016994503093883395, + "fcm_dpo/delta": -0.09461631625890732, + "fcm_dpo/margin": 288.0831604003906, + "fcm_dpo/q_t": 0.39002934098243713, + "grad_norm": 34.622745513916016, + "learning_rate": 2.3460977346651428e-07, + "logits/chosen": -0.49403852224349976, + "logits/rejected": -0.5061089396476746, + "logps/chosen": -430.4327392578125, + "logps/ref_chosen": -50.129459381103516, + "logps/ref_rejected": -104.43305969238281, + "logps/rejected": -772.8195190429688, + "loss": 1.0371, + "margin_dpo/margin_mean": 288.0831604003906, + "margin_dpo/margin_std": 356.5264892578125, + "step": 388 + }, + { + "KL/chosen_KL_mean": -403.150390625, + "KL/mean": -520.1739501953125, + "KL/rejected_KL_mean": -637.1974487304688, + "KL/std": 292.00225830078125, + "epoch": 0.5712187958883994, + "fcm_dpo/beta": 0.001694181701168418, + "fcm_dpo/delta": 0.003580855205655098, + "fcm_dpo/margin": 234.0470733642578, + "fcm_dpo/q_t": 0.40893417596817017, + "grad_norm": 31.105833053588867, + "learning_rate": 2.3332908558879177e-07, + "logits/chosen": -0.5170685648918152, + "logits/rejected": -0.5164707899093628, + "logps/chosen": -461.0569763183594, + "logps/ref_chosen": -57.906593322753906, + "logps/ref_rejected": -77.91454315185547, + "logps/rejected": -715.1119995117188, + "loss": 1.0973, + "margin_dpo/margin_mean": 234.04705810546875, + "margin_dpo/margin_std": 320.1253967285156, + "step": 389 + }, + { + "KL/chosen_KL_mean": -388.3230285644531, + "KL/mean": -506.873291015625, + "KL/rejected_KL_mean": -625.4235229492188, + "KL/std": 285.4632568359375, + "epoch": 0.5726872246696035, + "fcm_dpo/beta": 0.0016903409268707037, + "fcm_dpo/delta": -0.00112185999751091, + "fcm_dpo/margin": 237.10052490234375, + "fcm_dpo/q_t": 0.41132819652557373, + "grad_norm": 31.483285903930664, + "learning_rate": 2.320488370051681e-07, + "logits/chosen": -0.449199914932251, + "logits/rejected": -0.4437105655670166, + "logps/chosen": -437.5489501953125, + "logps/ref_chosen": -49.22591781616211, + "logps/ref_rejected": -85.5281982421875, + "logps/rejected": -710.9517211914062, + "loss": 1.1107, + "margin_dpo/margin_mean": 237.10049438476562, + "margin_dpo/margin_std": 363.8388366699219, + "step": 390 + }, + { + "KL/chosen_KL_mean": -366.3382263183594, + "KL/mean": -437.36956787109375, + "KL/rejected_KL_mean": -508.40093994140625, + "KL/std": 255.61053466796875, + "epoch": 0.5741556534508077, + "fcm_dpo/beta": 0.001737719401717186, + "fcm_dpo/delta": 0.1569492220878601, + "fcm_dpo/margin": 142.06271362304688, + "fcm_dpo/q_t": 0.44300517439842224, + "grad_norm": 39.402198791503906, + "learning_rate": 2.3076906145138405e-07, + "logits/chosen": -0.4606980085372925, + "logits/rejected": -0.4532572627067566, + "logps/chosen": -430.6678771972656, + "logps/ref_chosen": -64.32965087890625, + "logps/ref_rejected": -86.73820495605469, + "logps/rejected": -595.13916015625, + "loss": 1.2092, + "margin_dpo/margin_mean": 142.0626983642578, + "margin_dpo/margin_std": 269.4259033203125, + "step": 391 + }, + { + "KL/chosen_KL_mean": -303.680908203125, + "KL/mean": -440.0765380859375, + "KL/rejected_KL_mean": -576.47216796875, + "KL/std": 265.79547119140625, + "epoch": 0.5756240822320118, + "fcm_dpo/beta": 0.0017368567641824484, + "fcm_dpo/delta": -0.07744710892438889, + "fcm_dpo/margin": 272.79132080078125, + "fcm_dpo/q_t": 0.38911527395248413, + "grad_norm": 29.656238555908203, + "learning_rate": 2.294897926507156e-07, + "logits/chosen": -0.4456912875175476, + "logits/rejected": -0.4384923577308655, + "logps/chosen": -357.18487548828125, + "logps/ref_chosen": -53.50397872924805, + "logps/ref_rejected": -102.34584045410156, + "logps/rejected": -678.8179931640625, + "loss": 1.0146, + "margin_dpo/margin_mean": 272.7912902832031, + "margin_dpo/margin_std": 255.12586975097656, + "step": 392 + }, + { + "KL/chosen_KL_mean": -293.63629150390625, + "KL/mean": -398.53155517578125, + "KL/rejected_KL_mean": -503.4267578125, + "KL/std": 257.4723205566406, + "epoch": 0.5770925110132159, + "fcm_dpo/beta": 0.0017312290146946907, + "fcm_dpo/delta": 0.03820331022143364, + "fcm_dpo/margin": 209.79046630859375, + "fcm_dpo/q_t": 0.41823697090148926, + "grad_norm": 25.39501190185547, + "learning_rate": 2.2821106431308543e-07, + "logits/chosen": -0.4462127685546875, + "logits/rejected": -0.445779412984848, + "logps/chosen": -340.1102294921875, + "logps/ref_chosen": -46.473915100097656, + "logps/ref_rejected": -71.96885681152344, + "logps/rejected": -575.3956298828125, + "loss": 1.124, + "margin_dpo/margin_mean": 209.79046630859375, + "margin_dpo/margin_std": 317.695068359375, + "step": 393 + }, + { + "KL/chosen_KL_mean": -321.10101318359375, + "KL/mean": -431.11602783203125, + "KL/rejected_KL_mean": -541.131103515625, + "KL/std": 263.2855529785156, + "epoch": 0.57856093979442, + "fcm_dpo/beta": 0.001739653293043375, + "fcm_dpo/delta": 0.017899950966238976, + "fcm_dpo/margin": 220.0300750732422, + "fcm_dpo/q_t": 0.4115217924118042, + "grad_norm": 22.869054794311523, + "learning_rate": 2.2693291013417452e-07, + "logits/chosen": -0.46011653542518616, + "logits/rejected": -0.4607650935649872, + "logps/chosen": -374.0125732421875, + "logps/ref_chosen": -52.91154861450195, + "logps/ref_rejected": -90.8226318359375, + "logps/rejected": -631.9537353515625, + "loss": 1.0947, + "margin_dpo/margin_mean": 220.0300750732422, + "margin_dpo/margin_std": 278.54339599609375, + "step": 394 + }, + { + "KL/chosen_KL_mean": -319.739990234375, + "KL/mean": -439.35211181640625, + "KL/rejected_KL_mean": -558.9642333984375, + "KL/std": 267.854248046875, + "epoch": 0.580029368575624, + "fcm_dpo/beta": 0.0017342737410217524, + "fcm_dpo/delta": -0.015743978321552277, + "fcm_dpo/margin": 239.2242431640625, + "fcm_dpo/q_t": 0.40489462018013, + "grad_norm": 21.308685302734375, + "learning_rate": 2.2565536379453404e-07, + "logits/chosen": -0.4947971701622009, + "logits/rejected": -0.49258559942245483, + "logps/chosen": -382.2861022949219, + "logps/ref_chosen": -62.546112060546875, + "logps/ref_rejected": -83.78262329101562, + "logps/rejected": -642.7468872070312, + "loss": 1.0802, + "margin_dpo/margin_mean": 239.2242431640625, + "margin_dpo/margin_std": 307.40301513671875, + "step": 395 + }, + { + "KL/chosen_KL_mean": -327.5321044921875, + "KL/mean": -437.19598388671875, + "KL/rejected_KL_mean": -546.8599853515625, + "KL/std": 254.036865234375, + "epoch": 0.5814977973568282, + "fcm_dpo/beta": 0.0017403149977326393, + "fcm_dpo/delta": 0.019010702148079872, + "fcm_dpo/margin": 219.32781982421875, + "fcm_dpo/q_t": 0.41025522351264954, + "grad_norm": 23.98872947692871, + "learning_rate": 2.2437845895869825e-07, + "logits/chosen": -0.4915603995323181, + "logits/rejected": -0.475990355014801, + "logps/chosen": -396.52801513671875, + "logps/ref_chosen": -68.99594116210938, + "logps/ref_rejected": -88.64665985107422, + "logps/rejected": -635.506591796875, + "loss": 1.0806, + "margin_dpo/margin_mean": 219.32781982421875, + "margin_dpo/margin_std": 237.82656860351562, + "step": 396 + }, + { + "KL/chosen_KL_mean": -305.2785339355469, + "KL/mean": -444.10589599609375, + "KL/rejected_KL_mean": -582.9332275390625, + "KL/std": 257.37060546875, + "epoch": 0.5829662261380323, + "fcm_dpo/beta": 0.0017182010924443603, + "fcm_dpo/delta": -0.08137989044189453, + "fcm_dpo/margin": 277.6547546386719, + "fcm_dpo/q_t": 0.38798123598098755, + "grad_norm": 34.52851867675781, + "learning_rate": 2.2310222927429716e-07, + "logits/chosen": -0.434369832277298, + "logits/rejected": -0.43857717514038086, + "logps/chosen": -366.5556945800781, + "logps/ref_chosen": -61.27716827392578, + "logps/ref_rejected": -103.11612701416016, + "logps/rejected": -686.0493774414062, + "loss": 1.0139, + "margin_dpo/margin_mean": 277.65472412109375, + "margin_dpo/margin_std": 265.4613037109375, + "step": 397 + }, + { + "KL/chosen_KL_mean": -339.3876953125, + "KL/mean": -462.7353515625, + "KL/rejected_KL_mean": -586.0830078125, + "KL/std": 268.5569152832031, + "epoch": 0.5844346549192364, + "fcm_dpo/beta": 0.0017101437551900744, + "fcm_dpo/delta": -0.022855112329125404, + "fcm_dpo/margin": 246.69528198242188, + "fcm_dpo/q_t": 0.40368321537971497, + "grad_norm": 21.167795181274414, + "learning_rate": 2.2182670837116972e-07, + "logits/chosen": -0.504738986492157, + "logits/rejected": -0.5006571412086487, + "logps/chosen": -407.53924560546875, + "logps/ref_chosen": -68.15155029296875, + "logps/ref_rejected": -108.52360534667969, + "logps/rejected": -694.6066284179688, + "loss": 1.0739, + "margin_dpo/margin_mean": 246.69528198242188, + "margin_dpo/margin_std": 314.19610595703125, + "step": 398 + }, + { + "KL/chosen_KL_mean": -292.50311279296875, + "KL/mean": -409.574462890625, + "KL/rejected_KL_mean": -526.645751953125, + "KL/std": 240.19448852539062, + "epoch": 0.5859030837004405, + "fcm_dpo/beta": 0.0017040125094354153, + "fcm_dpo/delta": 0.000902075320482254, + "fcm_dpo/margin": 234.1426544189453, + "fcm_dpo/q_t": 0.40744373202323914, + "grad_norm": 26.88203239440918, + "learning_rate": 2.2055192986047804e-07, + "logits/chosen": -0.4666723310947418, + "logits/rejected": -0.42695629596710205, + "logps/chosen": -353.3929138183594, + "logps/ref_chosen": -60.889801025390625, + "logps/ref_rejected": -77.965576171875, + "logps/rejected": -604.611328125, + "loss": 1.0882, + "margin_dpo/margin_mean": 234.1426544189453, + "margin_dpo/margin_std": 298.5992431640625, + "step": 399 + }, + { + "KL/chosen_KL_mean": -281.47064208984375, + "KL/mean": -446.57061767578125, + "KL/rejected_KL_mean": -611.6705932617188, + "KL/std": 261.82757568359375, + "epoch": 0.5873715124816447, + "fcm_dpo/beta": 0.0016647314187139273, + "fcm_dpo/delta": -0.158945232629776, + "fcm_dpo/margin": 330.199951171875, + "fcm_dpo/q_t": 0.3706052005290985, + "grad_norm": 28.340599060058594, + "learning_rate": 2.192779273338215e-07, + "logits/chosen": -0.4434245228767395, + "logits/rejected": -0.43936118483543396, + "logps/chosen": -345.1142272949219, + "logps/ref_chosen": -63.64359664916992, + "logps/ref_rejected": -105.252685546875, + "logps/rejected": -716.9232788085938, + "loss": 0.9628, + "margin_dpo/margin_mean": 330.199951171875, + "margin_dpo/margin_std": 286.12255859375, + "step": 400 + }, + { + "KL/chosen_KL_mean": -341.30584716796875, + "KL/mean": -434.10931396484375, + "KL/rejected_KL_mean": -526.9127197265625, + "KL/std": 270.2348937988281, + "epoch": 0.5888399412628488, + "fcm_dpo/beta": 0.0016737841069698334, + "fcm_dpo/delta": 0.09229836612939835, + "fcm_dpo/margin": 185.60690307617188, + "fcm_dpo/q_t": 0.4304611086845398, + "grad_norm": 23.37901496887207, + "learning_rate": 2.1800473436235136e-07, + "logits/chosen": -0.4471530318260193, + "logits/rejected": -0.4397915005683899, + "logps/chosen": -398.4688720703125, + "logps/ref_chosen": -57.16303253173828, + "logps/ref_rejected": -83.79249572753906, + "logps/rejected": -610.7052001953125, + "loss": 1.1851, + "margin_dpo/margin_mean": 185.60690307617188, + "margin_dpo/margin_std": 357.864013671875, + "step": 401 + }, + { + "KL/chosen_KL_mean": -254.9739532470703, + "KL/mean": -427.3084716796875, + "KL/rejected_KL_mean": -599.6429443359375, + "KL/std": 300.2375793457031, + "epoch": 0.5903083700440529, + "fcm_dpo/beta": 0.0016427625669166446, + "fcm_dpo/delta": -0.17618390917778015, + "fcm_dpo/margin": 344.6689758300781, + "fcm_dpo/q_t": 0.3684191107749939, + "grad_norm": 21.227405548095703, + "learning_rate": 2.1673238449588665e-07, + "logits/chosen": -0.490563303232193, + "logits/rejected": -0.4836328625679016, + "logps/chosen": -305.7143249511719, + "logps/ref_chosen": -50.74037170410156, + "logps/ref_rejected": -81.0460433959961, + "logps/rejected": -680.68896484375, + "loss": 0.9545, + "margin_dpo/margin_mean": 344.6689758300781, + "margin_dpo/margin_std": 306.67181396484375, + "step": 402 + }, + { + "KL/chosen_KL_mean": -286.7187805175781, + "KL/mean": -420.0758056640625, + "KL/rejected_KL_mean": -553.4327392578125, + "KL/std": 288.28253173828125, + "epoch": 0.591776798825257, + "fcm_dpo/beta": 0.0016190607566386461, + "fcm_dpo/delta": -0.03328249230980873, + "fcm_dpo/margin": 266.7139892578125, + "fcm_dpo/q_t": 0.3999168574810028, + "grad_norm": 23.255849838256836, + "learning_rate": 2.154609112620295e-07, + "logits/chosen": -0.4604523479938507, + "logits/rejected": -0.4616071879863739, + "logps/chosen": -333.8660888671875, + "logps/ref_chosen": -47.14731216430664, + "logps/ref_rejected": -77.2666015625, + "logps/rejected": -630.6993408203125, + "loss": 1.056, + "margin_dpo/margin_mean": 266.7139892578125, + "margin_dpo/margin_std": 298.5339050292969, + "step": 403 + }, + { + "KL/chosen_KL_mean": -327.2615966796875, + "KL/mean": -458.3185119628906, + "KL/rejected_KL_mean": -589.3754272460938, + "KL/std": 274.367919921875, + "epoch": 0.593245227606461, + "fcm_dpo/beta": 0.0016095450846478343, + "fcm_dpo/delta": -0.022908374667167664, + "fcm_dpo/margin": 262.11383056640625, + "fcm_dpo/q_t": 0.40320682525634766, + "grad_norm": 30.493053436279297, + "learning_rate": 2.1419034816528218e-07, + "logits/chosen": -0.4503590166568756, + "logits/rejected": -0.4424477815628052, + "logps/chosen": -375.13690185546875, + "logps/ref_chosen": -47.875274658203125, + "logps/ref_rejected": -77.15499877929688, + "logps/rejected": -666.5303955078125, + "loss": 1.0819, + "margin_dpo/margin_mean": 262.11383056640625, + "margin_dpo/margin_std": 347.977783203125, + "step": 404 + }, + { + "KL/chosen_KL_mean": -380.44219970703125, + "KL/mean": -484.3817443847656, + "KL/rejected_KL_mean": -588.3212890625, + "KL/std": 300.7879943847656, + "epoch": 0.5947136563876652, + "fcm_dpo/beta": 0.001602754695340991, + "fcm_dpo/delta": -0.043147142976522446, + "fcm_dpo/margin": 207.8790283203125, + "fcm_dpo/q_t": 0.4246622323989868, + "grad_norm": 32.75885772705078, + "learning_rate": 2.129207286861638e-07, + "logits/chosen": -0.44959819316864014, + "logits/rejected": -0.4414255619049072, + "logps/chosen": -445.6051025390625, + "logps/ref_chosen": -65.16290283203125, + "logps/ref_rejected": -87.18678283691406, + "logps/rejected": -675.508056640625, + "loss": 1.1666, + "margin_dpo/margin_mean": 207.8790283203125, + "margin_dpo/margin_std": 365.72845458984375, + "step": 405 + }, + { + "KL/chosen_KL_mean": -342.9149475097656, + "KL/mean": -486.18707275390625, + "KL/rejected_KL_mean": -629.459228515625, + "KL/std": 308.88067626953125, + "epoch": 0.5961820851688693, + "fcm_dpo/beta": 0.0015887843910604715, + "fcm_dpo/delta": -0.05800767242908478, + "fcm_dpo/margin": 286.5443115234375, + "fcm_dpo/q_t": 0.39615678787231445, + "grad_norm": 23.16806983947754, + "learning_rate": 2.1165208628032861e-07, + "logits/chosen": -0.48730766773223877, + "logits/rejected": -0.5009229183197021, + "logps/chosen": -392.65576171875, + "logps/ref_chosen": -49.740814208984375, + "logps/ref_rejected": -92.07862854003906, + "logps/rejected": -721.5379028320312, + "loss": 1.0517, + "margin_dpo/margin_mean": 286.5443115234375, + "margin_dpo/margin_std": 344.0789794921875, + "step": 406 + }, + { + "KL/chosen_KL_mean": -372.48846435546875, + "KL/mean": -458.53515625, + "KL/rejected_KL_mean": -544.5818481445312, + "KL/std": 246.12185668945312, + "epoch": 0.5976505139500734, + "fcm_dpo/beta": 0.0015788807068020105, + "fcm_dpo/delta": 0.009174516424536705, + "fcm_dpo/margin": 172.09339904785156, + "fcm_dpo/q_t": 0.43600770831108093, + "grad_norm": 27.538284301757812, + "learning_rate": 2.1038445437768375e-07, + "logits/chosen": -0.4871164560317993, + "logits/rejected": -0.45956844091415405, + "logps/chosen": -428.81915283203125, + "logps/ref_chosen": -56.33069610595703, + "logps/ref_rejected": -77.51209259033203, + "logps/rejected": -622.093994140625, + "loss": 1.1991, + "margin_dpo/margin_mean": 172.09341430664062, + "margin_dpo/margin_std": 325.55999755859375, + "step": 407 + }, + { + "KL/chosen_KL_mean": -372.11248779296875, + "KL/mean": -475.17840576171875, + "KL/rejected_KL_mean": -578.244384765625, + "KL/std": 243.3355255126953, + "epoch": 0.5991189427312775, + "fcm_dpo/beta": 0.0015977565199136734, + "fcm_dpo/delta": 0.07305292040109634, + "fcm_dpo/margin": 206.1318359375, + "fcm_dpo/q_t": 0.42254602909088135, + "grad_norm": 33.735374450683594, + "learning_rate": 2.0911786638150872e-07, + "logits/chosen": -0.5174983143806458, + "logits/rejected": -0.4989047050476074, + "logps/chosen": -441.90179443359375, + "logps/ref_chosen": -69.789306640625, + "logps/ref_rejected": -90.09693908691406, + "logps/rejected": -668.34130859375, + "loss": 1.1292, + "margin_dpo/margin_mean": 206.1318359375, + "margin_dpo/margin_std": 271.8330993652344, + "step": 408 + }, + { + "KL/chosen_KL_mean": -352.27313232421875, + "KL/mean": -452.4332275390625, + "KL/rejected_KL_mean": -552.5933837890625, + "KL/std": 252.9176025390625, + "epoch": 0.6005873715124816, + "fcm_dpo/beta": 0.0016257348470389843, + "fcm_dpo/delta": 0.07655191421508789, + "fcm_dpo/margin": 200.32015991210938, + "fcm_dpo/q_t": 0.42412787675857544, + "grad_norm": 37.519432067871094, + "learning_rate": 2.0785235566757517e-07, + "logits/chosen": -0.4930969476699829, + "logits/rejected": -0.48252415657043457, + "logps/chosen": -419.590576171875, + "logps/ref_chosen": -67.31744384765625, + "logps/ref_rejected": -84.904296875, + "logps/rejected": -637.4976196289062, + "loss": 1.1368, + "margin_dpo/margin_mean": 200.32015991210938, + "margin_dpo/margin_std": 280.4720153808594, + "step": 409 + }, + { + "KL/chosen_KL_mean": -331.1689453125, + "KL/mean": -449.50732421875, + "KL/rejected_KL_mean": -567.8456420898438, + "KL/std": 254.5586395263672, + "epoch": 0.6020558002936858, + "fcm_dpo/beta": 0.0016305126482620835, + "fcm_dpo/delta": 0.014659320935606956, + "fcm_dpo/margin": 236.67666625976562, + "fcm_dpo/q_t": 0.40897810459136963, + "grad_norm": 31.696603775024414, + "learning_rate": 2.065879555832674e-07, + "logits/chosen": -0.4725341796875, + "logits/rejected": -0.47504281997680664, + "logps/chosen": -382.63433837890625, + "logps/ref_chosen": -51.465354919433594, + "logps/ref_rejected": -83.198974609375, + "logps/rejected": -651.0446166992188, + "loss": 1.0884, + "margin_dpo/margin_mean": 236.67666625976562, + "margin_dpo/margin_std": 284.35211181640625, + "step": 410 + }, + { + "KL/chosen_KL_mean": -353.330810546875, + "KL/mean": -468.7662658691406, + "KL/rejected_KL_mean": -584.20166015625, + "KL/std": 282.9371337890625, + "epoch": 0.6035242290748899, + "fcm_dpo/beta": 0.001626357901841402, + "fcm_dpo/delta": 0.02442072331905365, + "fcm_dpo/margin": 230.87094116210938, + "fcm_dpo/q_t": 0.41455578804016113, + "grad_norm": 44.872047424316406, + "learning_rate": 2.0532469944670343e-07, + "logits/chosen": -0.48675569891929626, + "logits/rejected": -0.5024890303611755, + "logps/chosen": -405.6380615234375, + "logps/ref_chosen": -52.30727005004883, + "logps/ref_rejected": -80.69495391845703, + "logps/rejected": -664.8966674804688, + "loss": 1.1138, + "margin_dpo/margin_mean": 230.87094116210938, + "margin_dpo/margin_std": 319.9390869140625, + "step": 411 + }, + { + "KL/chosen_KL_mean": -358.72283935546875, + "KL/mean": -475.9858093261719, + "KL/rejected_KL_mean": -593.2487182617188, + "KL/std": 272.87725830078125, + "epoch": 0.604992657856094, + "fcm_dpo/beta": 0.0016456831945106387, + "fcm_dpo/delta": 0.014591998420655727, + "fcm_dpo/margin": 234.52587890625, + "fcm_dpo/q_t": 0.4107934236526489, + "grad_norm": 37.48828887939453, + "learning_rate": 2.0406262054585738e-07, + "logits/chosen": -0.5510473251342773, + "logits/rejected": -0.5846823453903198, + "logps/chosen": -411.86700439453125, + "logps/ref_chosen": -53.144126892089844, + "logps/ref_rejected": -100.0608139038086, + "logps/rejected": -693.3095703125, + "loss": 1.1019, + "margin_dpo/margin_mean": 234.52587890625, + "margin_dpo/margin_std": 319.07745361328125, + "step": 412 + }, + { + "KL/chosen_KL_mean": -391.5623779296875, + "KL/mean": -510.7938537597656, + "KL/rejected_KL_mean": -630.025390625, + "KL/std": 288.8511962890625, + "epoch": 0.6064610866372981, + "fcm_dpo/beta": 0.0016510069835931063, + "fcm_dpo/delta": 0.0064473580569028854, + "fcm_dpo/margin": 238.46292114257812, + "fcm_dpo/q_t": 0.40730637311935425, + "grad_norm": 33.53278732299805, + "learning_rate": 2.0280175213768205e-07, + "logits/chosen": -0.5170902013778687, + "logits/rejected": -0.5278250575065613, + "logps/chosen": -453.14434814453125, + "logps/ref_chosen": -61.58196258544922, + "logps/ref_rejected": -99.47340393066406, + "logps/rejected": -729.498779296875, + "loss": 1.0936, + "margin_dpo/margin_mean": 238.4629364013672, + "margin_dpo/margin_std": 311.4296875, + "step": 413 + }, + { + "KL/chosen_KL_mean": -364.56231689453125, + "KL/mean": -492.68621826171875, + "KL/rejected_KL_mean": -620.8101806640625, + "KL/std": 266.0479736328125, + "epoch": 0.6079295154185022, + "fcm_dpo/beta": 0.001652669394388795, + "fcm_dpo/delta": -0.025241520255804062, + "fcm_dpo/margin": 256.247802734375, + "fcm_dpo/q_t": 0.4015337824821472, + "grad_norm": 35.41373062133789, + "learning_rate": 2.0154212744723247e-07, + "logits/chosen": -0.5074818134307861, + "logits/rejected": -0.5076801776885986, + "logps/chosen": -411.1938171386719, + "logps/ref_chosen": -46.63148498535156, + "logps/ref_rejected": -87.64653015136719, + "logps/rejected": -708.4566650390625, + "loss": 1.0711, + "margin_dpo/margin_mean": 256.2478332519531, + "margin_dpo/margin_std": 307.007080078125, + "step": 414 + }, + { + "KL/chosen_KL_mean": -394.33807373046875, + "KL/mean": -489.5907897949219, + "KL/rejected_KL_mean": -584.843505859375, + "KL/std": 265.6529541015625, + "epoch": 0.6093979441997063, + "fcm_dpo/beta": 0.0016560875810682774, + "fcm_dpo/delta": 0.08733348548412323, + "fcm_dpo/margin": 190.50540161132812, + "fcm_dpo/q_t": 0.42633694410324097, + "grad_norm": 29.98908233642578, + "learning_rate": 2.002837796667909e-07, + "logits/chosen": -0.5528023838996887, + "logits/rejected": -0.5534902811050415, + "logps/chosen": -472.95635986328125, + "logps/ref_chosen": -78.6182861328125, + "logps/ref_rejected": -100.47752380371094, + "logps/rejected": -685.321044921875, + "loss": 1.1517, + "margin_dpo/margin_mean": 190.50540161132812, + "margin_dpo/margin_std": 290.72491455078125, + "step": 415 + }, + { + "KL/chosen_KL_mean": -382.89056396484375, + "KL/mean": -540.4343872070312, + "KL/rejected_KL_mean": -697.9782104492188, + "KL/std": 309.90460205078125, + "epoch": 0.6108663729809104, + "fcm_dpo/beta": 0.0016415867721661925, + "fcm_dpo/delta": -0.12347446382045746, + "fcm_dpo/margin": 315.0876770019531, + "fcm_dpo/q_t": 0.38012266159057617, + "grad_norm": 37.14603042602539, + "learning_rate": 1.990267419549914e-07, + "logits/chosen": -0.5974301099777222, + "logits/rejected": -0.6131728887557983, + "logps/chosen": -441.1697082519531, + "logps/ref_chosen": -58.27912521362305, + "logps/ref_rejected": -90.56871795654297, + "logps/rejected": -788.5469360351562, + "loss": 0.9956, + "margin_dpo/margin_mean": 315.0876770019531, + "margin_dpo/margin_std": 312.2025146484375, + "step": 416 + }, + { + "KL/chosen_KL_mean": -382.858154296875, + "KL/mean": -517.802978515625, + "KL/rejected_KL_mean": -652.7478637695312, + "KL/std": 286.8901672363281, + "epoch": 0.6123348017621145, + "fcm_dpo/beta": 0.0016207349253818393, + "fcm_dpo/delta": -0.039116691797971725, + "fcm_dpo/margin": 269.8897399902344, + "fcm_dpo/q_t": 0.3975698947906494, + "grad_norm": 29.581459045410156, + "learning_rate": 1.9777104743594686e-07, + "logits/chosen": -0.5895746946334839, + "logits/rejected": -0.583941638469696, + "logps/chosen": -433.0568542480469, + "logps/ref_chosen": -50.1987190246582, + "logps/ref_rejected": -68.15184020996094, + "logps/rejected": -720.8997192382812, + "loss": 1.0467, + "margin_dpo/margin_mean": 269.8897705078125, + "margin_dpo/margin_std": 286.52203369140625, + "step": 417 + }, + { + "KL/chosen_KL_mean": -423.42840576171875, + "KL/mean": -557.5980224609375, + "KL/rejected_KL_mean": -691.767578125, + "KL/std": 328.6751708984375, + "epoch": 0.6138032305433186, + "fcm_dpo/beta": 0.0016204738058149815, + "fcm_dpo/delta": -0.03766999393701553, + "fcm_dpo/margin": 268.33917236328125, + "fcm_dpo/q_t": 0.403804749250412, + "grad_norm": 33.6888427734375, + "learning_rate": 1.965167291983757e-07, + "logits/chosen": -0.6470938920974731, + "logits/rejected": -0.6322601437568665, + "logps/chosen": -505.4068603515625, + "logps/ref_chosen": -81.97846984863281, + "logps/ref_rejected": -104.69148254394531, + "logps/rejected": -796.4591064453125, + "loss": 1.0912, + "margin_dpo/margin_mean": 268.33917236328125, + "margin_dpo/margin_std": 380.9486999511719, + "step": 418 + }, + { + "KL/chosen_KL_mean": -383.6288146972656, + "KL/mean": -537.1128540039062, + "KL/rejected_KL_mean": -690.5968017578125, + "KL/std": 298.24053955078125, + "epoch": 0.6152716593245228, + "fcm_dpo/beta": 0.0015830930788069963, + "fcm_dpo/delta": -0.09028756618499756, + "fcm_dpo/margin": 306.96807861328125, + "fcm_dpo/q_t": 0.38735997676849365, + "grad_norm": 34.53618240356445, + "learning_rate": 1.9526382029472988e-07, + "logits/chosen": -0.6050629019737244, + "logits/rejected": -0.6123736500740051, + "logps/chosen": -436.57745361328125, + "logps/ref_chosen": -52.948646545410156, + "logps/ref_rejected": -91.58309936523438, + "logps/rejected": -782.179931640625, + "loss": 1.0281, + "margin_dpo/margin_mean": 306.9680480957031, + "margin_dpo/margin_std": 345.80462646484375, + "step": 419 + }, + { + "KL/chosen_KL_mean": -483.5123291015625, + "KL/mean": -582.5636596679688, + "KL/rejected_KL_mean": -681.614990234375, + "KL/std": 315.52850341796875, + "epoch": 0.6167400881057269, + "fcm_dpo/beta": 0.0015975853893905878, + "fcm_dpo/delta": 0.08602797240018845, + "fcm_dpo/margin": 198.1026153564453, + "fcm_dpo/q_t": 0.43039628863334656, + "grad_norm": 62.545352935791016, + "learning_rate": 1.9401235374032425e-07, + "logits/chosen": -0.652934730052948, + "logits/rejected": -0.626418948173523, + "logps/chosen": -561.2822265625, + "logps/ref_chosen": -77.7699203491211, + "logps/ref_rejected": -69.31985473632812, + "logps/rejected": -750.934814453125, + "loss": 1.195, + "margin_dpo/margin_mean": 198.1026153564453, + "margin_dpo/margin_std": 413.45147705078125, + "step": 420 + }, + { + "KL/chosen_KL_mean": -400.49127197265625, + "KL/mean": -509.7319030761719, + "KL/rejected_KL_mean": -618.9725341796875, + "KL/std": 300.7071533203125, + "epoch": 0.618208516886931, + "fcm_dpo/beta": 0.001619070884771645, + "fcm_dpo/delta": 0.04750995337963104, + "fcm_dpo/margin": 218.4813232421875, + "fcm_dpo/q_t": 0.4164373278617859, + "grad_norm": 31.18450927734375, + "learning_rate": 1.9276236251246653e-07, + "logits/chosen": -0.6628319025039673, + "logits/rejected": -0.6538623571395874, + "logps/chosen": -454.25714111328125, + "logps/ref_chosen": -53.765865325927734, + "logps/ref_rejected": -89.28144836425781, + "logps/rejected": -708.2540283203125, + "loss": 1.1261, + "margin_dpo/margin_mean": 218.4813232421875, + "margin_dpo/margin_std": 307.9769287109375, + "step": 421 + }, + { + "KL/chosen_KL_mean": -447.5567626953125, + "KL/mean": -573.166015625, + "KL/rejected_KL_mean": -698.7752685546875, + "KL/std": 324.9825134277344, + "epoch": 0.6196769456681351, + "fcm_dpo/beta": 0.001614267472177744, + "fcm_dpo/delta": -0.005787511821836233, + "fcm_dpo/margin": 251.21852111816406, + "fcm_dpo/q_t": 0.4061550498008728, + "grad_norm": 39.39680480957031, + "learning_rate": 1.9151387954958792e-07, + "logits/chosen": -0.6519845724105835, + "logits/rejected": -0.6586755514144897, + "logps/chosen": -516.1905517578125, + "logps/ref_chosen": -68.6337661743164, + "logps/ref_rejected": -87.86351013183594, + "logps/rejected": -786.6387939453125, + "loss": 1.1048, + "margin_dpo/margin_mean": 251.21853637695312, + "margin_dpo/margin_std": 371.8038330078125, + "step": 422 + }, + { + "KL/chosen_KL_mean": -412.92333984375, + "KL/mean": -550.4478149414062, + "KL/rejected_KL_mean": -687.9722900390625, + "KL/std": 291.155029296875, + "epoch": 0.6211453744493393, + "fcm_dpo/beta": 0.0016041090711951256, + "fcm_dpo/delta": -0.04313413053750992, + "fcm_dpo/margin": 275.04901123046875, + "fcm_dpo/q_t": 0.39793986082077026, + "grad_norm": 34.45840835571289, + "learning_rate": 1.902669377503756e-07, + "logits/chosen": -0.6469055414199829, + "logits/rejected": -0.6586620807647705, + "logps/chosen": -467.91363525390625, + "logps/ref_chosen": -54.99030303955078, + "logps/ref_rejected": -86.30654907226562, + "logps/rejected": -774.2788696289062, + "loss": 1.0554, + "margin_dpo/margin_mean": 275.04901123046875, + "margin_dpo/margin_std": 323.095947265625, + "step": 423 + }, + { + "KL/chosen_KL_mean": -366.37255859375, + "KL/mean": -494.26788330078125, + "KL/rejected_KL_mean": -622.1632080078125, + "KL/std": 289.16485595703125, + "epoch": 0.6226138032305433, + "fcm_dpo/beta": 0.0015932890819385648, + "fcm_dpo/delta": -0.008194293826818466, + "fcm_dpo/margin": 255.79061889648438, + "fcm_dpo/q_t": 0.4079640209674835, + "grad_norm": 39.47249221801758, + "learning_rate": 1.890215699729057e-07, + "logits/chosen": -0.6213667392730713, + "logits/rejected": -0.6005524396896362, + "logps/chosen": -422.38446044921875, + "logps/ref_chosen": -56.01192092895508, + "logps/ref_rejected": -66.47896575927734, + "logps/rejected": -688.6422119140625, + "loss": 1.0894, + "margin_dpo/margin_mean": 255.79061889648438, + "margin_dpo/margin_std": 346.87823486328125, + "step": 424 + }, + { + "KL/chosen_KL_mean": -408.7755126953125, + "KL/mean": -500.9407958984375, + "KL/rejected_KL_mean": -593.10595703125, + "KL/std": 269.2953186035156, + "epoch": 0.6240822320117474, + "fcm_dpo/beta": 0.0016285117017105222, + "fcm_dpo/delta": 0.10236521810293198, + "fcm_dpo/margin": 184.3304901123047, + "fcm_dpo/q_t": 0.42912036180496216, + "grad_norm": 33.893001556396484, + "learning_rate": 1.8777780903377732e-07, + "logits/chosen": -0.6225741505622864, + "logits/rejected": -0.6225865483283997, + "logps/chosen": -455.64453125, + "logps/ref_chosen": -46.86899948120117, + "logps/ref_rejected": -95.92545318603516, + "logps/rejected": -689.031494140625, + "loss": 1.1745, + "margin_dpo/margin_mean": 184.3304901123047, + "margin_dpo/margin_std": 319.94305419921875, + "step": 425 + }, + { + "KL/chosen_KL_mean": -371.005615234375, + "KL/mean": -493.8805236816406, + "KL/rejected_KL_mean": -616.7554321289062, + "KL/std": 271.8358154296875, + "epoch": 0.6255506607929515, + "fcm_dpo/beta": 0.0016375456470996141, + "fcm_dpo/delta": -0.0028386712074279785, + "fcm_dpo/margin": 245.7498321533203, + "fcm_dpo/q_t": 0.40605005621910095, + "grad_norm": 24.651735305786133, + "learning_rate": 1.8653568770724803e-07, + "logits/chosen": -0.603665292263031, + "logits/rejected": -0.5721160173416138, + "logps/chosen": -447.58917236328125, + "logps/ref_chosen": -76.58354187011719, + "logps/ref_rejected": -81.26658630371094, + "logps/rejected": -698.0220336914062, + "loss": 1.0796, + "margin_dpo/margin_mean": 245.7498321533203, + "margin_dpo/margin_std": 290.891357421875, + "step": 426 + }, + { + "KL/chosen_KL_mean": -333.1865539550781, + "KL/mean": -426.1375427246094, + "KL/rejected_KL_mean": -519.0885620117188, + "KL/std": 232.90151977539062, + "epoch": 0.6270190895741556, + "fcm_dpo/beta": 0.0016515168827027082, + "fcm_dpo/delta": 0.09603013098239899, + "fcm_dpo/margin": 185.9020233154297, + "fcm_dpo/q_t": 0.4287715554237366, + "grad_norm": 25.280298233032227, + "learning_rate": 1.8529523872436977e-07, + "logits/chosen": -0.6069827079772949, + "logits/rejected": -0.5897522568702698, + "logps/chosen": -398.0404357910156, + "logps/ref_chosen": -64.8538818359375, + "logps/ref_rejected": -78.5660171508789, + "logps/rejected": -597.654541015625, + "loss": 1.1536, + "margin_dpo/margin_mean": 185.9020233154297, + "margin_dpo/margin_std": 277.96832275390625, + "step": 427 + }, + { + "KL/chosen_KL_mean": -421.74224853515625, + "KL/mean": -548.2906494140625, + "KL/rejected_KL_mean": -674.8390502929688, + "KL/std": 312.8956298828125, + "epoch": 0.6284875183553598, + "fcm_dpo/beta": 0.001651531783863902, + "fcm_dpo/delta": -0.019138701260089874, + "fcm_dpo/margin": 253.09681701660156, + "fcm_dpo/q_t": 0.4038928151130676, + "grad_norm": 31.78797721862793, + "learning_rate": 1.8405649477212697e-07, + "logits/chosen": -0.5810732245445251, + "logits/rejected": -0.5857928395271301, + "logps/chosen": -484.37890625, + "logps/ref_chosen": -62.63666534423828, + "logps/ref_rejected": -103.28181457519531, + "logps/rejected": -778.120849609375, + "loss": 1.1004, + "margin_dpo/margin_mean": 253.09683227539062, + "margin_dpo/margin_std": 371.6815185546875, + "step": 428 + }, + { + "KL/chosen_KL_mean": -435.04058837890625, + "KL/mean": -534.104248046875, + "KL/rejected_KL_mean": -633.1678466796875, + "KL/std": 291.46722412109375, + "epoch": 0.6299559471365639, + "fcm_dpo/beta": 0.0016507648397237062, + "fcm_dpo/delta": -0.03221222758293152, + "fcm_dpo/margin": 198.1272430419922, + "fcm_dpo/q_t": 0.4247916340827942, + "grad_norm": 31.24496078491211, + "learning_rate": 1.828194884925749e-07, + "logits/chosen": -0.5936084985733032, + "logits/rejected": -0.570778489112854, + "logps/chosen": -516.2745971679688, + "logps/ref_chosen": -81.23401641845703, + "logps/ref_rejected": -91.79493713378906, + "logps/rejected": -724.9627685546875, + "loss": 1.1706, + "margin_dpo/margin_mean": 198.1272430419922, + "margin_dpo/margin_std": 350.78887939453125, + "step": 429 + }, + { + "KL/chosen_KL_mean": -348.882568359375, + "KL/mean": -453.02349853515625, + "KL/rejected_KL_mean": -557.1644287109375, + "KL/std": 258.8692321777344, + "epoch": 0.631424375917768, + "fcm_dpo/beta": 0.0016623124247416854, + "fcm_dpo/delta": 0.055517442524433136, + "fcm_dpo/margin": 208.2819061279297, + "fcm_dpo/q_t": 0.41966137290000916, + "grad_norm": 36.20037078857422, + "learning_rate": 1.8158425248197928e-07, + "logits/chosen": -0.5428692102432251, + "logits/rejected": -0.5417746305465698, + "logps/chosen": -409.8028564453125, + "logps/ref_chosen": -60.920326232910156, + "logps/ref_rejected": -104.42280578613281, + "logps/rejected": -661.5872802734375, + "loss": 1.122, + "margin_dpo/margin_mean": 208.2819061279297, + "margin_dpo/margin_std": 283.18389892578125, + "step": 430 + }, + { + "KL/chosen_KL_mean": -318.82403564453125, + "KL/mean": -467.67669677734375, + "KL/rejected_KL_mean": -616.529296875, + "KL/std": 278.1539001464844, + "epoch": 0.6328928046989721, + "fcm_dpo/beta": 0.0016387823270633817, + "fcm_dpo/delta": -0.09255114197731018, + "fcm_dpo/margin": 297.705322265625, + "fcm_dpo/q_t": 0.3862955570220947, + "grad_norm": 29.066301345825195, + "learning_rate": 1.8035081928995788e-07, + "logits/chosen": -0.5722877383232117, + "logits/rejected": -0.5794203281402588, + "logps/chosen": -376.17279052734375, + "logps/ref_chosen": -57.34874725341797, + "logps/ref_rejected": -92.84022521972656, + "logps/rejected": -709.3695678710938, + "loss": 1.0176, + "margin_dpo/margin_mean": 297.705322265625, + "margin_dpo/margin_std": 304.55963134765625, + "step": 431 + }, + { + "KL/chosen_KL_mean": -330.6824645996094, + "KL/mean": -471.57916259765625, + "KL/rejected_KL_mean": -612.475830078125, + "KL/std": 277.7702941894531, + "epoch": 0.6343612334801763, + "fcm_dpo/beta": 0.0016283730510622263, + "fcm_dpo/delta": -0.06209279224276543, + "fcm_dpo/margin": 281.7933654785156, + "fcm_dpo/q_t": 0.3932652473449707, + "grad_norm": 31.19976043701172, + "learning_rate": 1.791192214186223e-07, + "logits/chosen": -0.5432115793228149, + "logits/rejected": -0.5331372618675232, + "logps/chosen": -401.75726318359375, + "logps/ref_chosen": -71.07479095458984, + "logps/ref_rejected": -98.57952880859375, + "logps/rejected": -711.055419921875, + "loss": 1.0323, + "margin_dpo/margin_mean": 281.7933654785156, + "margin_dpo/margin_std": 281.0911560058594, + "step": 432 + }, + { + "KL/chosen_KL_mean": -419.6158447265625, + "KL/mean": -512.61572265625, + "KL/rejected_KL_mean": -605.6156005859375, + "KL/std": 285.9530029296875, + "epoch": 0.6358296622613803, + "fcm_dpo/beta": 0.0016389940865337849, + "fcm_dpo/delta": 0.09798242151737213, + "fcm_dpo/margin": 185.99978637695312, + "fcm_dpo/q_t": 0.4270647466182709, + "grad_norm": 37.97488784790039, + "learning_rate": 1.7788949132172193e-07, + "logits/chosen": -0.6025904417037964, + "logits/rejected": -0.591471791267395, + "logps/chosen": -477.8890380859375, + "logps/ref_chosen": -58.273193359375, + "logps/ref_rejected": -95.95089721679688, + "logps/rejected": -701.5665283203125, + "loss": 1.1752, + "margin_dpo/margin_mean": 185.99978637695312, + "margin_dpo/margin_std": 330.22344970703125, + "step": 433 + }, + { + "KL/chosen_KL_mean": -354.8839111328125, + "KL/mean": -471.41546630859375, + "KL/rejected_KL_mean": -587.9470825195312, + "KL/std": 279.48162841796875, + "epoch": 0.6372980910425844, + "fcm_dpo/beta": 0.0016441468615084887, + "fcm_dpo/delta": 0.017331628128886223, + "fcm_dpo/margin": 233.06314086914062, + "fcm_dpo/q_t": 0.41576558351516724, + "grad_norm": 20.974876403808594, + "learning_rate": 1.7666166140378853e-07, + "logits/chosen": -0.5894551873207092, + "logits/rejected": -0.5889327526092529, + "logps/chosen": -416.8576354980469, + "logps/ref_chosen": -61.97370147705078, + "logps/ref_rejected": -78.49861145019531, + "logps/rejected": -666.4456787109375, + "loss": 1.112, + "margin_dpo/margin_mean": 233.06314086914062, + "margin_dpo/margin_std": 350.6734619140625, + "step": 434 + }, + { + "KL/chosen_KL_mean": -339.70953369140625, + "KL/mean": -465.9414978027344, + "KL/rejected_KL_mean": -592.1734619140625, + "KL/std": 277.168701171875, + "epoch": 0.6387665198237885, + "fcm_dpo/beta": 0.001645256532356143, + "fcm_dpo/delta": -0.016068164259195328, + "fcm_dpo/margin": 252.4639129638672, + "fcm_dpo/q_t": 0.40314286947250366, + "grad_norm": 31.517038345336914, + "learning_rate": 1.7543576401928218e-07, + "logits/chosen": -0.6623108386993408, + "logits/rejected": -0.656818151473999, + "logps/chosen": -391.21160888671875, + "logps/ref_chosen": -51.502052307128906, + "logps/ref_rejected": -87.56689453125, + "logps/rejected": -679.7403564453125, + "loss": 1.0747, + "margin_dpo/margin_mean": 252.46392822265625, + "margin_dpo/margin_std": 302.1197509765625, + "step": 435 + }, + { + "KL/chosen_KL_mean": -361.44921875, + "KL/mean": -471.92071533203125, + "KL/rejected_KL_mean": -582.3922119140625, + "KL/std": 250.78880310058594, + "epoch": 0.6402349486049926, + "fcm_dpo/beta": 0.0016492058057338, + "fcm_dpo/delta": 0.03687084838747978, + "fcm_dpo/margin": 220.94302368164062, + "fcm_dpo/q_t": 0.4155094027519226, + "grad_norm": 41.7802619934082, + "learning_rate": 1.742118314717391e-07, + "logits/chosen": -0.604122519493103, + "logits/rejected": -0.5753868222236633, + "logps/chosen": -432.8529052734375, + "logps/ref_chosen": -71.40371704101562, + "logps/ref_rejected": -82.72775268554688, + "logps/rejected": -665.1199951171875, + "loss": 1.1117, + "margin_dpo/margin_mean": 220.94302368164062, + "margin_dpo/margin_std": 296.26336669921875, + "step": 436 + }, + { + "KL/chosen_KL_mean": -363.95184326171875, + "KL/mean": -478.0007629394531, + "KL/rejected_KL_mean": -592.0496826171875, + "KL/std": 241.47042846679688, + "epoch": 0.6417033773861968, + "fcm_dpo/beta": 0.0016615703934803605, + "fcm_dpo/delta": 0.021829720586538315, + "fcm_dpo/margin": 228.0978240966797, + "fcm_dpo/q_t": 0.4112783670425415, + "grad_norm": 22.802751541137695, + "learning_rate": 1.7298989601292036e-07, + "logits/chosen": -0.6008163690567017, + "logits/rejected": -0.5768181681632996, + "logps/chosen": -428.69610595703125, + "logps/ref_chosen": -64.7442626953125, + "logps/ref_rejected": -82.04356384277344, + "logps/rejected": -674.0932006835938, + "loss": 1.0971, + "margin_dpo/margin_mean": 228.09780883789062, + "margin_dpo/margin_std": 287.695556640625, + "step": 437 + }, + { + "KL/chosen_KL_mean": -375.4371337890625, + "KL/mean": -506.3113708496094, + "KL/rejected_KL_mean": -637.185546875, + "KL/std": 277.33953857421875, + "epoch": 0.6431718061674009, + "fcm_dpo/beta": 0.0016493103466928005, + "fcm_dpo/delta": -0.03368060290813446, + "fcm_dpo/margin": 261.7484436035156, + "fcm_dpo/q_t": 0.39925286173820496, + "grad_norm": 27.77182388305664, + "learning_rate": 1.7176998984196144e-07, + "logits/chosen": -0.6229407787322998, + "logits/rejected": -0.6052076816558838, + "logps/chosen": -434.455810546875, + "logps/ref_chosen": -59.0186653137207, + "logps/ref_rejected": -83.07682800292969, + "logps/rejected": -720.2623901367188, + "loss": 1.0575, + "margin_dpo/margin_mean": 261.7484436035156, + "margin_dpo/margin_std": 294.62152099609375, + "step": 438 + }, + { + "KL/chosen_KL_mean": -397.02178955078125, + "KL/mean": -500.23809814453125, + "KL/rejected_KL_mean": -603.4544067382812, + "KL/std": 279.9316711425781, + "epoch": 0.644640234948605, + "fcm_dpo/beta": 0.001632218947634101, + "fcm_dpo/delta": -0.06549854576587677, + "fcm_dpo/margin": 206.4326171875, + "fcm_dpo/q_t": 0.42261505126953125, + "grad_norm": 28.284347534179688, + "learning_rate": 1.7055214510452458e-07, + "logits/chosen": -0.6147496700286865, + "logits/rejected": -0.6177977323532104, + "logps/chosen": -450.8058776855469, + "logps/ref_chosen": -53.78407669067383, + "logps/ref_rejected": -83.98545837402344, + "logps/rejected": -687.4398803710938, + "loss": 1.1492, + "margin_dpo/margin_mean": 206.43260192871094, + "margin_dpo/margin_std": 320.11016845703125, + "step": 439 + }, + { + "KL/chosen_KL_mean": -427.0113525390625, + "KL/mean": -549.5577392578125, + "KL/rejected_KL_mean": -672.10400390625, + "KL/std": 337.44097900390625, + "epoch": 0.6461086637298091, + "fcm_dpo/beta": 0.0016363917384296656, + "fcm_dpo/delta": -0.0012616775929927826, + "fcm_dpo/margin": 245.09262084960938, + "fcm_dpo/q_t": 0.4104476571083069, + "grad_norm": 33.926116943359375, + "learning_rate": 1.6933639389195134e-07, + "logits/chosen": -0.6553194522857666, + "logits/rejected": -0.6481237411499023, + "logps/chosen": -505.5780944824219, + "logps/ref_chosen": -78.56671905517578, + "logps/ref_rejected": -96.49775695800781, + "logps/rejected": -768.601806640625, + "loss": 1.0939, + "margin_dpo/margin_mean": 245.0926513671875, + "margin_dpo/margin_std": 338.4553527832031, + "step": 440 + }, + { + "KL/chosen_KL_mean": -478.03643798828125, + "KL/mean": -599.6422729492188, + "KL/rejected_KL_mean": -721.2481689453125, + "KL/std": 347.5352783203125, + "epoch": 0.6475770925110133, + "fcm_dpo/beta": 0.0016381317982450128, + "fcm_dpo/delta": 0.0013750754296779633, + "fcm_dpo/margin": 243.21163940429688, + "fcm_dpo/q_t": 0.4130883812904358, + "grad_norm": 35.49950408935547, + "learning_rate": 1.681227682404166e-07, + "logits/chosen": -0.6869616508483887, + "logits/rejected": -0.6749493479728699, + "logps/chosen": -538.86083984375, + "logps/ref_chosen": -60.824440002441406, + "logps/ref_rejected": -96.47080993652344, + "logps/rejected": -817.7189331054688, + "loss": 1.1354, + "margin_dpo/margin_mean": 243.21163940429688, + "margin_dpo/margin_std": 415.7077331542969, + "step": 441 + }, + { + "KL/chosen_KL_mean": -415.85516357421875, + "KL/mean": -557.4616088867188, + "KL/rejected_KL_mean": -699.0679931640625, + "KL/std": 336.6131591796875, + "epoch": 0.6490455212922174, + "fcm_dpo/beta": 0.0016285094898194075, + "fcm_dpo/delta": -0.06479034572839737, + "fcm_dpo/margin": 283.212890625, + "fcm_dpo/q_t": 0.39779287576675415, + "grad_norm": 30.064672470092773, + "learning_rate": 1.669113001300851e-07, + "logits/chosen": -0.6426968574523926, + "logits/rejected": -0.6355198621749878, + "logps/chosen": -462.8663635253906, + "logps/ref_chosen": -47.01121520996094, + "logps/ref_rejected": -76.53926086425781, + "logps/rejected": -775.6072998046875, + "loss": 1.0672, + "margin_dpo/margin_mean": 283.212890625, + "margin_dpo/margin_std": 374.67840576171875, + "step": 442 + }, + { + "KL/chosen_KL_mean": -472.62872314453125, + "KL/mean": -562.2388305664062, + "KL/rejected_KL_mean": -651.8489990234375, + "KL/std": 327.6013488769531, + "epoch": 0.6505139500734214, + "fcm_dpo/beta": 0.0016116888727992773, + "fcm_dpo/delta": 0.0013880077749490738, + "fcm_dpo/margin": 179.22032165527344, + "fcm_dpo/q_t": 0.4350769817829132, + "grad_norm": 37.92525863647461, + "learning_rate": 1.6570202148426815e-07, + "logits/chosen": -0.6569748520851135, + "logits/rejected": -0.636266827583313, + "logps/chosen": -543.9017333984375, + "logps/ref_chosen": -71.27301788330078, + "logps/ref_rejected": -86.679931640625, + "logps/rejected": -738.5289306640625, + "loss": 1.2139, + "margin_dpo/margin_mean": 179.22032165527344, + "margin_dpo/margin_std": 391.09747314453125, + "step": 443 + }, + { + "KL/chosen_KL_mean": -461.75042724609375, + "KL/mean": -612.273193359375, + "KL/rejected_KL_mean": -762.7958984375, + "KL/std": 354.7071533203125, + "epoch": 0.6519823788546255, + "fcm_dpo/beta": 0.001586769474670291, + "fcm_dpo/delta": -0.0820961445569992, + "fcm_dpo/margin": 301.04541015625, + "fcm_dpo/q_t": 0.3927891254425049, + "grad_norm": 28.29939842224121, + "learning_rate": 1.6449496416858282e-07, + "logits/chosen": -0.606819748878479, + "logits/rejected": -0.6156275272369385, + "logps/chosen": -518.9641723632812, + "logps/ref_chosen": -57.213706970214844, + "logps/ref_rejected": -97.25489807128906, + "logps/rejected": -860.05078125, + "loss": 1.0506, + "margin_dpo/margin_mean": 301.04541015625, + "margin_dpo/margin_std": 390.89599609375, + "step": 444 + }, + { + "KL/chosen_KL_mean": -402.9183044433594, + "KL/mean": -539.9268798828125, + "KL/rejected_KL_mean": -676.935546875, + "KL/std": 278.75872802734375, + "epoch": 0.6534508076358296, + "fcm_dpo/beta": 0.0015799321699887514, + "fcm_dpo/delta": -0.03441721200942993, + "fcm_dpo/margin": 274.0172119140625, + "fcm_dpo/q_t": 0.4012291133403778, + "grad_norm": 31.61493492126465, + "learning_rate": 1.6329015999011182e-07, + "logits/chosen": -0.6219183206558228, + "logits/rejected": -0.6123214960098267, + "logps/chosen": -470.21807861328125, + "logps/ref_chosen": -67.29979705810547, + "logps/ref_rejected": -92.68267059326172, + "logps/rejected": -769.6182250976562, + "loss": 1.0706, + "margin_dpo/margin_mean": 274.0172424316406, + "margin_dpo/margin_std": 343.6361083984375, + "step": 445 + }, + { + "KL/chosen_KL_mean": -358.52130126953125, + "KL/mean": -507.340087890625, + "KL/rejected_KL_mean": -656.158935546875, + "KL/std": 294.689453125, + "epoch": 0.6549192364170338, + "fcm_dpo/beta": 0.0015673264861106873, + "fcm_dpo/delta": -0.0700267031788826, + "fcm_dpo/margin": 297.6376037597656, + "fcm_dpo/q_t": 0.3905888795852661, + "grad_norm": 33.535675048828125, + "learning_rate": 1.6208764069656578e-07, + "logits/chosen": -0.6319636106491089, + "logits/rejected": -0.6446236371994019, + "logps/chosen": -417.61981201171875, + "logps/ref_chosen": -59.098487854003906, + "logps/ref_rejected": -101.26419067382812, + "logps/rejected": -757.423095703125, + "loss": 1.0286, + "margin_dpo/margin_mean": 297.6376037597656, + "margin_dpo/margin_std": 300.60076904296875, + "step": 446 + }, + { + "KL/chosen_KL_mean": -370.45916748046875, + "KL/mean": -522.060791015625, + "KL/rejected_KL_mean": -673.6624755859375, + "KL/std": 334.0948486328125, + "epoch": 0.6563876651982379, + "fcm_dpo/beta": 0.001533093280158937, + "fcm_dpo/delta": -0.06830260902643204, + "fcm_dpo/margin": 303.20330810546875, + "fcm_dpo/q_t": 0.39436084032058716, + "grad_norm": 30.70143699645996, + "learning_rate": 1.608874379754465e-07, + "logits/chosen": -0.6682947874069214, + "logits/rejected": -0.6798413395881653, + "logps/chosen": -426.53448486328125, + "logps/ref_chosen": -56.07533264160156, + "logps/ref_rejected": -98.69475555419922, + "logps/rejected": -772.357177734375, + "loss": 1.0422, + "margin_dpo/margin_mean": 303.20330810546875, + "margin_dpo/margin_std": 361.6494140625, + "step": 447 + }, + { + "KL/chosen_KL_mean": -412.73431396484375, + "KL/mean": -553.443359375, + "KL/rejected_KL_mean": -694.1524658203125, + "KL/std": 292.61798095703125, + "epoch": 0.657856093979442, + "fcm_dpo/beta": 0.001529197907075286, + "fcm_dpo/delta": -0.03181178867816925, + "fcm_dpo/margin": 281.41815185546875, + "fcm_dpo/q_t": 0.4007987380027771, + "grad_norm": 32.050010681152344, + "learning_rate": 1.5968958345321177e-07, + "logits/chosen": -0.5674072504043579, + "logits/rejected": -0.5757938623428345, + "logps/chosen": -472.7381591796875, + "logps/ref_chosen": -60.00384521484375, + "logps/ref_rejected": -102.26465606689453, + "logps/rejected": -796.4171142578125, + "loss": 1.0647, + "margin_dpo/margin_mean": 281.41815185546875, + "margin_dpo/margin_std": 337.77032470703125, + "step": 448 + }, + { + "KL/chosen_KL_mean": -404.6903076171875, + "KL/mean": -552.0719604492188, + "KL/rejected_KL_mean": -699.4535522460938, + "KL/std": 354.35406494140625, + "epoch": 0.6593245227606461, + "fcm_dpo/beta": 0.0015087838983163238, + "fcm_dpo/delta": -0.046950291842222214, + "fcm_dpo/margin": 294.76324462890625, + "fcm_dpo/q_t": 0.4015204608440399, + "grad_norm": 29.442913055419922, + "learning_rate": 1.584941086944423e-07, + "logits/chosen": -0.6377573013305664, + "logits/rejected": -0.6385862231254578, + "logps/chosen": -472.2169189453125, + "logps/ref_chosen": -67.52661895751953, + "logps/ref_rejected": -88.59690856933594, + "logps/rejected": -788.0504760742188, + "loss": 1.0779, + "margin_dpo/margin_mean": 294.76324462890625, + "margin_dpo/margin_std": 421.296142578125, + "step": 449 + }, + { + "KL/chosen_KL_mean": -330.97320556640625, + "KL/mean": -487.33258056640625, + "KL/rejected_KL_mean": -643.69189453125, + "KL/std": 313.515625, + "epoch": 0.6607929515418502, + "fcm_dpo/beta": 0.0014938064850866795, + "fcm_dpo/delta": -0.07037577033042908, + "fcm_dpo/margin": 312.7186584472656, + "fcm_dpo/q_t": 0.3899462819099426, + "grad_norm": 41.06943893432617, + "learning_rate": 1.573010452010098e-07, + "logits/chosen": -0.6237972974777222, + "logits/rejected": -0.6363176107406616, + "logps/chosen": -388.0813293457031, + "logps/ref_chosen": -57.10811996459961, + "logps/ref_rejected": -102.75494384765625, + "logps/rejected": -746.44677734375, + "loss": 1.0229, + "margin_dpo/margin_mean": 312.7186584472656, + "margin_dpo/margin_std": 310.42205810546875, + "step": 450 + }, + { + "KL/chosen_KL_mean": -439.8880920410156, + "KL/mean": -556.1387329101562, + "KL/rejected_KL_mean": -672.389404296875, + "KL/std": 351.8367004394531, + "epoch": 0.6622613803230544, + "fcm_dpo/beta": 0.0015017553232610226, + "fcm_dpo/delta": 0.052446845918893814, + "fcm_dpo/margin": 232.50131225585938, + "fcm_dpo/q_t": 0.41802603006362915, + "grad_norm": 32.329261779785156, + "learning_rate": 1.5611042441124687e-07, + "logits/chosen": -0.6703058481216431, + "logits/rejected": -0.6499719619750977, + "logps/chosen": -498.35693359375, + "logps/ref_chosen": -58.46883010864258, + "logps/ref_rejected": -72.92941284179688, + "logps/rejected": -745.3187866210938, + "loss": 1.1564, + "margin_dpo/margin_mean": 232.50131225585938, + "margin_dpo/margin_std": 410.9925537109375, + "step": 451 + }, + { + "KL/chosen_KL_mean": -307.5163269042969, + "KL/mean": -446.1638488769531, + "KL/rejected_KL_mean": -584.8113403320312, + "KL/std": 270.72735595703125, + "epoch": 0.6637298091042585, + "fcm_dpo/beta": 0.001496224314905703, + "fcm_dpo/delta": -0.015626225620508194, + "fcm_dpo/margin": 277.29498291015625, + "fcm_dpo/q_t": 0.4016070067882538, + "grad_norm": 22.36501693725586, + "learning_rate": 1.549222776991186e-07, + "logits/chosen": -0.5967873930931091, + "logits/rejected": -0.6146633625030518, + "logps/chosen": -357.9068908691406, + "logps/ref_chosen": -50.39055252075195, + "logps/ref_rejected": -97.77142333984375, + "logps/rejected": -682.582763671875, + "loss": 1.0545, + "margin_dpo/margin_mean": 277.29498291015625, + "margin_dpo/margin_std": 275.4556579589844, + "step": 452 + }, + { + "KL/chosen_KL_mean": -370.3055419921875, + "KL/mean": -499.5263671875, + "KL/rejected_KL_mean": -628.7472534179688, + "KL/std": 281.7098388671875, + "epoch": 0.6651982378854625, + "fcm_dpo/beta": 0.001497291261330247, + "fcm_dpo/delta": 0.013492653146386147, + "fcm_dpo/margin": 258.44171142578125, + "fcm_dpo/q_t": 0.4115417003631592, + "grad_norm": 27.039974212646484, + "learning_rate": 1.5373663637339584e-07, + "logits/chosen": -0.6106635332107544, + "logits/rejected": -0.5978103876113892, + "logps/chosen": -428.0203857421875, + "logps/ref_chosen": -57.71485137939453, + "logps/ref_rejected": -82.20741271972656, + "logps/rejected": -710.9547119140625, + "loss": 1.0961, + "margin_dpo/margin_mean": 258.44171142578125, + "margin_dpo/margin_std": 340.037353515625, + "step": 453 + }, + { + "KL/chosen_KL_mean": -440.86590576171875, + "KL/mean": -588.3302001953125, + "KL/rejected_KL_mean": -735.7944946289062, + "KL/std": 319.46673583984375, + "epoch": 0.6666666666666666, + "fcm_dpo/beta": 0.0014888541772961617, + "fcm_dpo/delta": -0.041121020913124084, + "fcm_dpo/margin": 294.9285888671875, + "fcm_dpo/q_t": 0.3986594080924988, + "grad_norm": 27.240726470947266, + "learning_rate": 1.5255353167683017e-07, + "logits/chosen": -0.6478193402290344, + "logits/rejected": -0.6373401880264282, + "logps/chosen": -501.8115234375, + "logps/ref_chosen": -60.945648193359375, + "logps/ref_rejected": -84.95079040527344, + "logps/rejected": -820.7452392578125, + "loss": 1.0565, + "margin_dpo/margin_mean": 294.9285888671875, + "margin_dpo/margin_std": 350.4466857910156, + "step": 454 + }, + { + "KL/chosen_KL_mean": -384.628662109375, + "KL/mean": -554.451416015625, + "KL/rejected_KL_mean": -724.274169921875, + "KL/std": 327.93011474609375, + "epoch": 0.6681350954478708, + "fcm_dpo/beta": 0.0014744448708370328, + "fcm_dpo/delta": -0.10616149008274078, + "fcm_dpo/margin": 339.64556884765625, + "fcm_dpo/q_t": 0.3856102526187897, + "grad_norm": 32.356143951416016, + "learning_rate": 1.5137299478533064e-07, + "logits/chosen": -0.6396904587745667, + "logits/rejected": -0.6583499908447266, + "logps/chosen": -429.5153503417969, + "logps/ref_chosen": -44.88671112060547, + "logps/ref_rejected": -115.30147552490234, + "logps/rejected": -839.57568359375, + "loss": 1.0187, + "margin_dpo/margin_mean": 339.64556884765625, + "margin_dpo/margin_std": 368.43084716796875, + "step": 455 + }, + { + "KL/chosen_KL_mean": -411.4013977050781, + "KL/mean": -578.405029296875, + "KL/rejected_KL_mean": -745.4085693359375, + "KL/std": 345.68756103515625, + "epoch": 0.6696035242290749, + "fcm_dpo/beta": 0.0014414741890504956, + "fcm_dpo/delta": -0.08550744503736496, + "fcm_dpo/margin": 334.0072021484375, + "fcm_dpo/q_t": 0.38874322175979614, + "grad_norm": 29.948514938354492, + "learning_rate": 1.5019505680714232e-07, + "logits/chosen": -0.6326814889907837, + "logits/rejected": -0.6543838977813721, + "logps/chosen": -468.43817138671875, + "logps/ref_chosen": -57.036781311035156, + "logps/ref_rejected": -105.21784210205078, + "logps/rejected": -850.6264038085938, + "loss": 1.0142, + "margin_dpo/margin_mean": 334.0072021484375, + "margin_dpo/margin_std": 335.52606201171875, + "step": 456 + }, + { + "KL/chosen_KL_mean": -410.80609130859375, + "KL/mean": -580.7474365234375, + "KL/rejected_KL_mean": -750.6888427734375, + "KL/std": 341.3404541015625, + "epoch": 0.671071953010279, + "fcm_dpo/beta": 0.0014121406711637974, + "fcm_dpo/delta": -0.08430389314889908, + "fcm_dpo/margin": 339.88275146484375, + "fcm_dpo/q_t": 0.3872183859348297, + "grad_norm": 36.677040100097656, + "learning_rate": 1.4901974878202627e-07, + "logits/chosen": -0.6665825843811035, + "logits/rejected": -0.6668688058853149, + "logps/chosen": -465.0486145019531, + "logps/ref_chosen": -54.24253845214844, + "logps/ref_rejected": -85.10956573486328, + "logps/rejected": -835.79833984375, + "loss": 1.0146, + "margin_dpo/margin_mean": 339.8827209472656, + "margin_dpo/margin_std": 330.81744384765625, + "step": 457 + }, + { + "KL/chosen_KL_mean": -430.34912109375, + "KL/mean": -581.6856689453125, + "KL/rejected_KL_mean": -733.022216796875, + "KL/std": 319.516357421875, + "epoch": 0.6725403817914831, + "fcm_dpo/beta": 0.0013981210067868233, + "fcm_dpo/delta": -0.0246875062584877, + "fcm_dpo/margin": 302.67303466796875, + "fcm_dpo/q_t": 0.40338659286499023, + "grad_norm": 24.763858795166016, + "learning_rate": 1.4784710168044212e-07, + "logits/chosen": -0.6812525987625122, + "logits/rejected": -0.6768727898597717, + "logps/chosen": -485.75799560546875, + "logps/ref_chosen": -55.40888214111328, + "logps/ref_rejected": -97.68325805664062, + "logps/rejected": -830.7054443359375, + "loss": 1.0701, + "margin_dpo/margin_mean": 302.6730651855469, + "margin_dpo/margin_std": 369.3429260253906, + "step": 458 + }, + { + "KL/chosen_KL_mean": -459.40435791015625, + "KL/mean": -625.1992797851562, + "KL/rejected_KL_mean": -790.994140625, + "KL/std": 361.43035888671875, + "epoch": 0.6740088105726872, + "fcm_dpo/beta": 0.001385183772072196, + "fcm_dpo/delta": -0.0625496357679367, + "fcm_dpo/margin": 331.5897216796875, + "fcm_dpo/q_t": 0.3948526680469513, + "grad_norm": 32.20987319946289, + "learning_rate": 1.466771464027316e-07, + "logits/chosen": -0.6852984428405762, + "logits/rejected": -0.7049773931503296, + "logps/chosen": -505.96185302734375, + "logps/ref_chosen": -46.55748748779297, + "logps/ref_rejected": -86.16854095458984, + "logps/rejected": -877.1627197265625, + "loss": 1.0536, + "margin_dpo/margin_mean": 331.5897521972656, + "margin_dpo/margin_std": 406.781982421875, + "step": 459 + }, + { + "KL/chosen_KL_mean": -512.4947509765625, + "KL/mean": -693.9195556640625, + "KL/rejected_KL_mean": -875.3443603515625, + "KL/std": 360.5986328125, + "epoch": 0.6754772393538914, + "fcm_dpo/beta": 0.0013653924688696861, + "fcm_dpo/delta": -0.10034875571727753, + "fcm_dpo/margin": 362.849609375, + "fcm_dpo/q_t": 0.3863321542739868, + "grad_norm": 40.046512603759766, + "learning_rate": 1.4550991377830423e-07, + "logits/chosen": -0.7233697772026062, + "logits/rejected": -0.7532409429550171, + "logps/chosen": -564.1296997070312, + "logps/ref_chosen": -51.63489532470703, + "logps/ref_rejected": -104.11935424804688, + "logps/rejected": -979.4637451171875, + "loss": 1.0176, + "margin_dpo/margin_mean": 362.849609375, + "margin_dpo/margin_std": 396.6766357421875, + "step": 460 + }, + { + "KL/chosen_KL_mean": -548.9373779296875, + "KL/mean": -692.796142578125, + "KL/rejected_KL_mean": -836.6549072265625, + "KL/std": 373.7737731933594, + "epoch": 0.6769456681350955, + "fcm_dpo/beta": 0.0013628401793539524, + "fcm_dpo/delta": 0.008031336590647697, + "fcm_dpo/margin": 287.7176208496094, + "fcm_dpo/q_t": 0.41146761178970337, + "grad_norm": 25.319110870361328, + "learning_rate": 1.4434543456482518e-07, + "logits/chosen": -0.7724506855010986, + "logits/rejected": -0.7853858470916748, + "logps/chosen": -604.1192626953125, + "logps/ref_chosen": -55.18195724487305, + "logps/ref_rejected": -86.47689819335938, + "logps/rejected": -923.1318359375, + "loss": 1.1035, + "margin_dpo/margin_mean": 287.71759033203125, + "margin_dpo/margin_std": 409.4786071777344, + "step": 461 + }, + { + "KL/chosen_KL_mean": -559.73583984375, + "KL/mean": -675.0146484375, + "KL/rejected_KL_mean": -790.2933959960938, + "KL/std": 367.716552734375, + "epoch": 0.6784140969162996, + "fcm_dpo/beta": 0.0013765160692855716, + "fcm_dpo/delta": 0.08537392318248749, + "fcm_dpo/margin": 230.5576171875, + "fcm_dpo/q_t": 0.428949773311615, + "grad_norm": 43.80839920043945, + "learning_rate": 1.4318373944740484e-07, + "logits/chosen": -0.8070446848869324, + "logits/rejected": -0.7969012260437012, + "logps/chosen": -629.663818359375, + "logps/ref_chosen": -69.92803192138672, + "logps/ref_rejected": -78.84111022949219, + "logps/rejected": -869.134521484375, + "loss": 1.1705, + "margin_dpo/margin_mean": 230.55758666992188, + "margin_dpo/margin_std": 416.346923828125, + "step": 462 + }, + { + "KL/chosen_KL_mean": -559.2445068359375, + "KL/mean": -710.7879638671875, + "KL/rejected_KL_mean": -862.3314208984375, + "KL/std": 379.8200378417969, + "epoch": 0.6798825256975036, + "fcm_dpo/beta": 0.0013845614157617092, + "fcm_dpo/delta": -0.02077137678861618, + "fcm_dpo/margin": 303.08685302734375, + "fcm_dpo/q_t": 0.405579149723053, + "grad_norm": 37.92364501953125, + "learning_rate": 1.4202485903778976e-07, + "logits/chosen": -0.7985125780105591, + "logits/rejected": -0.8062667846679688, + "logps/chosen": -614.5189208984375, + "logps/ref_chosen": -55.27437210083008, + "logps/ref_rejected": -89.02497863769531, + "logps/rejected": -951.3563842773438, + "loss": 1.0939, + "margin_dpo/margin_mean": 303.08685302734375, + "margin_dpo/margin_std": 430.80828857421875, + "step": 463 + }, + { + "KL/chosen_KL_mean": -555.7085571289062, + "KL/mean": -794.9366455078125, + "KL/rejected_KL_mean": -1034.1646728515625, + "KL/std": 459.53875732421875, + "epoch": 0.6813509544787077, + "fcm_dpo/beta": 0.0013179676607251167, + "fcm_dpo/delta": -0.24893316626548767, + "fcm_dpo/margin": 478.4560546875, + "fcm_dpo/q_t": 0.3569212555885315, + "grad_norm": 38.354400634765625, + "learning_rate": 1.4086882387355658e-07, + "logits/chosen": -0.7893344163894653, + "logits/rejected": -0.8504258990287781, + "logps/chosen": -606.620849609375, + "logps/ref_chosen": -50.91230010986328, + "logps/ref_rejected": -102.4893798828125, + "logps/rejected": -1136.654052734375, + "loss": 0.936, + "margin_dpo/margin_mean": 478.4560546875, + "margin_dpo/margin_std": 467.5229187011719, + "step": 464 + }, + { + "KL/chosen_KL_mean": -564.8525390625, + "KL/mean": -755.03271484375, + "KL/rejected_KL_mean": -945.2127075195312, + "KL/std": 466.654052734375, + "epoch": 0.6828193832599119, + "fcm_dpo/beta": 0.0012960683088749647, + "fcm_dpo/delta": -0.09770198166370392, + "fcm_dpo/margin": 380.36016845703125, + "fcm_dpo/q_t": 0.3856911063194275, + "grad_norm": 38.300540924072266, + "learning_rate": 1.3971566441730714e-07, + "logits/chosen": -0.793292760848999, + "logits/rejected": -0.8128570318222046, + "logps/chosen": -624.9694213867188, + "logps/ref_chosen": -60.116851806640625, + "logps/ref_rejected": -113.94602966308594, + "logps/rejected": -1059.15869140625, + "loss": 1.046, + "margin_dpo/margin_mean": 380.36016845703125, + "margin_dpo/margin_std": 488.3748779296875, + "step": 465 + }, + { + "KL/chosen_KL_mean": -626.0338134765625, + "KL/mean": -793.6627197265625, + "KL/rejected_KL_mean": -961.291748046875, + "KL/std": 443.3585205078125, + "epoch": 0.684287812041116, + "fcm_dpo/beta": 0.0012731440365314484, + "fcm_dpo/delta": -0.028695937246084213, + "fcm_dpo/margin": 335.2580261230469, + "fcm_dpo/q_t": 0.40266337990760803, + "grad_norm": 36.897422790527344, + "learning_rate": 1.3856541105586545e-07, + "logits/chosen": -0.8305766582489014, + "logits/rejected": -0.8335669040679932, + "logps/chosen": -678.9547119140625, + "logps/ref_chosen": -52.920921325683594, + "logps/ref_rejected": -90.3154296875, + "logps/rejected": -1051.607177734375, + "loss": 1.0959, + "margin_dpo/margin_mean": 335.2580261230469, + "margin_dpo/margin_std": 487.9477233886719, + "step": 466 + }, + { + "KL/chosen_KL_mean": -786.627685546875, + "KL/mean": -963.7129516601562, + "KL/rejected_KL_mean": -1140.7982177734375, + "KL/std": 571.1072998046875, + "epoch": 0.6857562408223201, + "fcm_dpo/beta": 0.0012568333186209202, + "fcm_dpo/delta": -0.049399569630622864, + "fcm_dpo/margin": 354.17047119140625, + "fcm_dpo/q_t": 0.403271347284317, + "grad_norm": 49.63898468017578, + "learning_rate": 1.3741809409947729e-07, + "logits/chosen": -0.9358654022216797, + "logits/rejected": -0.9291361570358276, + "logps/chosen": -865.343505859375, + "logps/ref_chosen": -78.7158203125, + "logps/ref_rejected": -102.86019897460938, + "logps/rejected": -1243.658447265625, + "loss": 1.1501, + "margin_dpo/margin_mean": 354.17047119140625, + "margin_dpo/margin_std": 657.70361328125, + "step": 467 + }, + { + "KL/chosen_KL_mean": -622.95849609375, + "KL/mean": -845.2493896484375, + "KL/rejected_KL_mean": -1067.540283203125, + "KL/std": 515.7662963867188, + "epoch": 0.6872246696035242, + "fcm_dpo/beta": 0.0012354985810816288, + "fcm_dpo/delta": -0.15800079703330994, + "fcm_dpo/margin": 444.5817565917969, + "fcm_dpo/q_t": 0.38068056106567383, + "grad_norm": 39.59514617919922, + "learning_rate": 1.362737437810114e-07, + "logits/chosen": -0.9017723798751831, + "logits/rejected": -0.9113543629646301, + "logps/chosen": -692.8939208984375, + "logps/ref_chosen": -69.93536376953125, + "logps/ref_rejected": -101.02880859375, + "logps/rejected": -1168.569091796875, + "loss": 1.017, + "margin_dpo/margin_mean": 444.5817565917969, + "margin_dpo/margin_std": 578.4088745117188, + "step": 468 + }, + { + "KL/chosen_KL_mean": -662.3765258789062, + "KL/mean": -883.9148559570312, + "KL/rejected_KL_mean": -1105.453125, + "KL/std": 464.3486022949219, + "epoch": 0.6886930983847284, + "fcm_dpo/beta": 0.0011932153720408678, + "fcm_dpo/delta": -0.13762570917606354, + "fcm_dpo/margin": 443.07666015625, + "fcm_dpo/q_t": 0.37997373938560486, + "grad_norm": 33.920169830322266, + "learning_rate": 1.351323902551631e-07, + "logits/chosen": -0.9452608227729797, + "logits/rejected": -0.9607683420181274, + "logps/chosen": -730.501220703125, + "logps/ref_chosen": -68.12469482421875, + "logps/ref_rejected": -104.78640747070312, + "logps/rejected": -1210.239501953125, + "loss": 1.0151, + "margin_dpo/margin_mean": 443.07666015625, + "margin_dpo/margin_std": 517.1961669921875, + "step": 469 + }, + { + "KL/chosen_KL_mean": -561.1131591796875, + "KL/mean": -754.48193359375, + "KL/rejected_KL_mean": -947.8507080078125, + "KL/std": 474.6820068359375, + "epoch": 0.6901615271659325, + "fcm_dpo/beta": 0.001185485627502203, + "fcm_dpo/delta": -0.06129393354058266, + "fcm_dpo/margin": 386.737548828125, + "fcm_dpo/q_t": 0.3954838514328003, + "grad_norm": 28.70859718322754, + "learning_rate": 1.339940635976592e-07, + "logits/chosen": -0.885380744934082, + "logits/rejected": -0.8969517350196838, + "logps/chosen": -604.9050903320312, + "logps/ref_chosen": -43.791927337646484, + "logps/ref_rejected": -82.70285034179688, + "logps/rejected": -1030.5535888671875, + "loss": 1.0719, + "margin_dpo/margin_mean": 386.737548828125, + "margin_dpo/margin_std": 531.3819580078125, + "step": 470 + }, + { + "KL/chosen_KL_mean": -726.427734375, + "KL/mean": -887.2966918945312, + "KL/rejected_KL_mean": -1048.165771484375, + "KL/std": 499.47198486328125, + "epoch": 0.6916299559471366, + "fcm_dpo/beta": 0.0011751014972105622, + "fcm_dpo/delta": 0.022231273353099823, + "fcm_dpo/margin": 321.73797607421875, + "fcm_dpo/q_t": 0.41689130663871765, + "grad_norm": 37.73147964477539, + "learning_rate": 1.3285879380446563e-07, + "logits/chosen": -0.9937692880630493, + "logits/rejected": -1.0016134977340698, + "logps/chosen": -789.7672119140625, + "logps/ref_chosen": -63.33952331542969, + "logps/ref_rejected": -83.61048126220703, + "logps/rejected": -1131.776123046875, + "loss": 1.1346, + "margin_dpo/margin_mean": 321.73797607421875, + "margin_dpo/margin_std": 533.201904296875, + "step": 471 + }, + { + "KL/chosen_KL_mean": -712.793701171875, + "KL/mean": -912.88037109375, + "KL/rejected_KL_mean": -1112.967041015625, + "KL/std": 599.86572265625, + "epoch": 0.6930983847283406, + "fcm_dpo/beta": 0.0011670588282868266, + "fcm_dpo/delta": -0.07075389474630356, + "fcm_dpo/margin": 400.17333984375, + "fcm_dpo/q_t": 0.4006522297859192, + "grad_norm": 35.72392272949219, + "learning_rate": 1.317266107909975e-07, + "logits/chosen": -0.954893946647644, + "logits/rejected": -0.93065345287323, + "logps/chosen": -796.4598388671875, + "logps/ref_chosen": -83.66610717773438, + "logps/ref_rejected": -117.20919799804688, + "logps/rejected": -1230.17626953125, + "loss": 1.102, + "margin_dpo/margin_mean": 400.17333984375, + "margin_dpo/margin_std": 645.8973999023438, + "step": 472 + }, + { + "KL/chosen_KL_mean": -852.4325561523438, + "KL/mean": -943.5150146484375, + "KL/rejected_KL_mean": -1034.5975341796875, + "KL/std": 626.2051391601562, + "epoch": 0.6945668135095447, + "fcm_dpo/beta": 0.0011666135396808386, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 182.16502380371094, + "fcm_dpo/q_t": 0.45503491163253784, + "grad_norm": 116.40143585205078, + "learning_rate": 1.3059754439133002e-07, + "logits/chosen": -1.0035320520401, + "logits/rejected": -0.9779636859893799, + "logps/chosen": -915.9295654296875, + "logps/ref_chosen": -63.49696731567383, + "logps/ref_rejected": -81.14657592773438, + "logps/rejected": -1115.744140625, + "loss": 1.3781, + "margin_dpo/margin_mean": 182.16500854492188, + "margin_dpo/margin_std": 820.9163208007812, + "step": 473 + }, + { + "KL/chosen_KL_mean": -686.1465454101562, + "KL/mean": -849.884521484375, + "KL/rejected_KL_mean": -1013.62255859375, + "KL/std": 536.9088134765625, + "epoch": 0.6960352422907489, + "fcm_dpo/beta": 0.0011568089248612523, + "fcm_dpo/delta": -0.0847577303647995, + "fcm_dpo/margin": 327.47601318359375, + "fcm_dpo/q_t": 0.41386541724205017, + "grad_norm": 36.65470886230469, + "learning_rate": 1.2947162435741277e-07, + "logits/chosen": -0.944141149520874, + "logits/rejected": -0.9484027624130249, + "logps/chosen": -738.7584838867188, + "logps/ref_chosen": -52.6119384765625, + "logps/ref_rejected": -90.08041381835938, + "logps/rejected": -1103.7030029296875, + "loss": 1.1566, + "margin_dpo/margin_mean": 327.47601318359375, + "margin_dpo/margin_std": 581.762939453125, + "step": 474 + }, + { + "KL/chosen_KL_mean": -497.8739013671875, + "KL/mean": -715.1051025390625, + "KL/rejected_KL_mean": -932.3363037109375, + "KL/std": 428.21868896484375, + "epoch": 0.697503671071953, + "fcm_dpo/beta": 0.0011289971880614758, + "fcm_dpo/delta": -0.09538697451353073, + "fcm_dpo/margin": 434.46240234375, + "fcm_dpo/q_t": 0.38658300042152405, + "grad_norm": 42.494869232177734, + "learning_rate": 1.2834888035828596e-07, + "logits/chosen": -0.9651395082473755, + "logits/rejected": -0.9933245182037354, + "logps/chosen": -540.369140625, + "logps/ref_chosen": -42.49519348144531, + "logps/ref_rejected": -90.06294250488281, + "logps/rejected": -1022.3992919921875, + "loss": 1.017, + "margin_dpo/margin_mean": 434.46240234375, + "margin_dpo/margin_std": 460.6612548828125, + "step": 475 + }, + { + "KL/chosen_KL_mean": -643.5517578125, + "KL/mean": -811.9400634765625, + "KL/rejected_KL_mean": -980.328369140625, + "KL/std": 494.214111328125, + "epoch": 0.6989720998531571, + "fcm_dpo/beta": 0.0011274400167167187, + "fcm_dpo/delta": 0.021099748089909554, + "fcm_dpo/margin": 336.776611328125, + "fcm_dpo/q_t": 0.4138496518135071, + "grad_norm": 58.33089065551758, + "learning_rate": 1.2722934197929802e-07, + "logits/chosen": -0.9680467844009399, + "logits/rejected": -0.985359787940979, + "logps/chosen": -686.5010986328125, + "logps/ref_chosen": -42.94938278198242, + "logps/ref_rejected": -73.71023559570312, + "logps/rejected": -1054.03857421875, + "loss": 1.108, + "margin_dpo/margin_mean": 336.776611328125, + "margin_dpo/margin_std": 474.5893859863281, + "step": 476 + }, + { + "KL/chosen_KL_mean": -671.0191040039062, + "KL/mean": -845.5562744140625, + "KL/rejected_KL_mean": -1020.0934448242188, + "KL/std": 523.93212890625, + "epoch": 0.7004405286343612, + "fcm_dpo/beta": 0.0011346408864483237, + "fcm_dpo/delta": 0.0038326121866703033, + "fcm_dpo/margin": 349.0743408203125, + "fcm_dpo/q_t": 0.41092249751091003, + "grad_norm": 32.38017272949219, + "learning_rate": 1.2611303872132631e-07, + "logits/chosen": -0.9727839231491089, + "logits/rejected": -0.9398672580718994, + "logps/chosen": -741.791748046875, + "logps/ref_chosen": -70.77261352539062, + "logps/ref_rejected": -76.13737487792969, + "logps/rejected": -1096.2308349609375, + "loss": 1.138, + "margin_dpo/margin_mean": 349.0743408203125, + "margin_dpo/margin_std": 609.6284790039062, + "step": 477 + }, + { + "KL/chosen_KL_mean": -543.57373046875, + "KL/mean": -744.6063842773438, + "KL/rejected_KL_mean": -945.6390380859375, + "KL/std": 450.7725830078125, + "epoch": 0.7019089574155654, + "fcm_dpo/beta": 0.001124206930398941, + "fcm_dpo/delta": -0.05444270372390747, + "fcm_dpo/margin": 402.06536865234375, + "fcm_dpo/q_t": 0.39735937118530273, + "grad_norm": 33.31148147583008, + "learning_rate": 1.2500000000000005e-07, + "logits/chosen": -0.8353407979011536, + "logits/rejected": -0.8606827259063721, + "logps/chosen": -585.0142822265625, + "logps/ref_chosen": -41.440513610839844, + "logps/ref_rejected": -85.36196899414062, + "logps/rejected": -1031.0009765625, + "loss": 1.0658, + "margin_dpo/margin_mean": 402.06536865234375, + "margin_dpo/margin_std": 529.6115112304688, + "step": 478 + }, + { + "KL/chosen_KL_mean": -704.5205078125, + "KL/mean": -897.114501953125, + "KL/rejected_KL_mean": -1089.7083740234375, + "KL/std": 577.3861083984375, + "epoch": 0.7033773861967695, + "fcm_dpo/beta": 0.0011205710470676422, + "fcm_dpo/delta": -0.0337379164993763, + "fcm_dpo/margin": 385.18798828125, + "fcm_dpo/q_t": 0.40699535608291626, + "grad_norm": 30.347566604614258, + "learning_rate": 1.2389025514492456e-07, + "logits/chosen": -0.9103279113769531, + "logits/rejected": -0.9409140348434448, + "logps/chosen": -758.428466796875, + "logps/ref_chosen": -53.907920837402344, + "logps/ref_rejected": -95.1163330078125, + "logps/rejected": -1184.82470703125, + "loss": 1.1246, + "margin_dpo/margin_mean": 385.18798828125, + "margin_dpo/margin_std": 668.6539306640625, + "step": 479 + }, + { + "KL/chosen_KL_mean": -799.6283569335938, + "KL/mean": -950.8204345703125, + "KL/rejected_KL_mean": -1102.0125732421875, + "KL/std": 511.99639892578125, + "epoch": 0.7048458149779736, + "fcm_dpo/beta": 0.0011057795491069555, + "fcm_dpo/delta": -0.04888079687952995, + "fcm_dpo/margin": 302.38421630859375, + "fcm_dpo/q_t": 0.4251486659049988, + "grad_norm": 53.98848342895508, + "learning_rate": 1.227838333989088e-07, + "logits/chosen": -0.9347141981124878, + "logits/rejected": -0.9363481998443604, + "logps/chosen": -858.31103515625, + "logps/ref_chosen": -58.682701110839844, + "logps/ref_rejected": -82.93248748779297, + "logps/rejected": -1184.945068359375, + "loss": 1.1745, + "margin_dpo/margin_mean": 302.38421630859375, + "margin_dpo/margin_std": 548.7213134765625, + "step": 480 + }, + { + "KL/chosen_KL_mean": -666.797119140625, + "KL/mean": -899.0219116210938, + "KL/rejected_KL_mean": -1131.24658203125, + "KL/std": 542.1533813476562, + "epoch": 0.7063142437591777, + "fcm_dpo/beta": 0.0010841806652024388, + "fcm_dpo/delta": -0.10892824828624725, + "fcm_dpo/margin": 464.44964599609375, + "fcm_dpo/q_t": 0.3871780037879944, + "grad_norm": 28.775257110595703, + "learning_rate": 1.2168076391719489e-07, + "logits/chosen": -0.9464655518531799, + "logits/rejected": -0.9776947498321533, + "logps/chosen": -721.7613525390625, + "logps/ref_chosen": -54.964271545410156, + "logps/ref_rejected": -92.42044067382812, + "logps/rejected": -1223.6671142578125, + "loss": 1.0317, + "margin_dpo/margin_mean": 464.4496154785156, + "margin_dpo/margin_std": 576.0606689453125, + "step": 481 + }, + { + "KL/chosen_KL_mean": -718.3167114257812, + "KL/mean": -820.912353515625, + "KL/rejected_KL_mean": -923.5079345703125, + "KL/std": 538.7589111328125, + "epoch": 0.7077826725403817, + "fcm_dpo/beta": 0.0010857656598091125, + "fcm_dpo/delta": 0.08385416120290756, + "fcm_dpo/margin": 205.1912384033203, + "fcm_dpo/q_t": 0.4472288489341736, + "grad_norm": 54.073997497558594, + "learning_rate": 1.2058107576668938e-07, + "logits/chosen": -0.8339771032333374, + "logits/rejected": -0.8247455358505249, + "logps/chosen": -785.8701171875, + "logps/ref_chosen": -67.553466796875, + "logps/ref_rejected": -87.58953857421875, + "logps/rejected": -1011.0975341796875, + "loss": 1.2821, + "margin_dpo/margin_mean": 205.19125366210938, + "margin_dpo/margin_std": 625.9072265625, + "step": 482 + }, + { + "KL/chosen_KL_mean": -616.08154296875, + "KL/mean": -854.05029296875, + "KL/rejected_KL_mean": -1092.01904296875, + "KL/std": 518.1444091796875, + "epoch": 0.7092511013215859, + "fcm_dpo/beta": 0.001074553350917995, + "fcm_dpo/delta": -0.11760546267032623, + "fcm_dpo/margin": 475.93743896484375, + "fcm_dpo/q_t": 0.38575249910354614, + "grad_norm": 31.249698638916016, + "learning_rate": 1.194847979251979e-07, + "logits/chosen": -0.9016849994659424, + "logits/rejected": -0.9181410074234009, + "logps/chosen": -679.411376953125, + "logps/ref_chosen": -63.32981872558594, + "logps/ref_rejected": -95.78697204589844, + "logps/rejected": -1187.805908203125, + "loss": 1.0223, + "margin_dpo/margin_mean": 475.9373779296875, + "margin_dpo/margin_std": 576.4466552734375, + "step": 483 + }, + { + "KL/chosen_KL_mean": -521.124755859375, + "KL/mean": -732.0531005859375, + "KL/rejected_KL_mean": -942.9813842773438, + "KL/std": 495.1884765625, + "epoch": 0.71071953010279, + "fcm_dpo/beta": 0.0010655100923031569, + "fcm_dpo/delta": -0.05194704234600067, + "fcm_dpo/margin": 421.85662841796875, + "fcm_dpo/q_t": 0.3974398374557495, + "grad_norm": 62.45989990234375, + "learning_rate": 1.1839195928066101e-07, + "logits/chosen": -0.8718733787536621, + "logits/rejected": -0.8974796533584595, + "logps/chosen": -580.262939453125, + "logps/ref_chosen": -59.13812255859375, + "logps/ref_rejected": -84.37144470214844, + "logps/rejected": -1027.352783203125, + "loss": 1.0546, + "margin_dpo/margin_mean": 421.85662841796875, + "margin_dpo/margin_std": 506.4351806640625, + "step": 484 + }, + { + "KL/chosen_KL_mean": -539.0780639648438, + "KL/mean": -738.4888916015625, + "KL/rejected_KL_mean": -937.8997192382812, + "KL/std": 491.9963073730469, + "epoch": 0.7121879588839941, + "fcm_dpo/beta": 0.0010573656763881445, + "fcm_dpo/delta": -0.022721393033862114, + "fcm_dpo/margin": 398.82159423828125, + "fcm_dpo/q_t": 0.40495431423187256, + "grad_norm": 35.550392150878906, + "learning_rate": 1.1730258863039347e-07, + "logits/chosen": -0.794913649559021, + "logits/rejected": -0.8159662485122681, + "logps/chosen": -597.9276123046875, + "logps/ref_chosen": -58.849571228027344, + "logps/ref_rejected": -103.36408233642578, + "logps/rejected": -1041.2637939453125, + "loss": 1.089, + "margin_dpo/margin_mean": 398.8216552734375, + "margin_dpo/margin_std": 560.9061279296875, + "step": 485 + }, + { + "KL/chosen_KL_mean": -621.9697265625, + "KL/mean": -850.4727172851562, + "KL/rejected_KL_mean": -1078.9757080078125, + "KL/std": 568.115478515625, + "epoch": 0.7136563876651982, + "fcm_dpo/beta": 0.0010410689283162355, + "fcm_dpo/delta": -0.07956840097904205, + "fcm_dpo/margin": 457.0060119628906, + "fcm_dpo/q_t": 0.3951270878314972, + "grad_norm": 30.52185821533203, + "learning_rate": 1.1621671468032493e-07, + "logits/chosen": -0.8822107911109924, + "logits/rejected": -0.9047358632087708, + "logps/chosen": -677.2293701171875, + "logps/ref_chosen": -55.25966262817383, + "logps/ref_rejected": -92.13936614990234, + "logps/rejected": -1171.114990234375, + "loss": 1.0777, + "margin_dpo/margin_mean": 457.00604248046875, + "margin_dpo/margin_std": 684.0638427734375, + "step": 486 + }, + { + "KL/chosen_KL_mean": -674.0582275390625, + "KL/mean": -843.9366455078125, + "KL/rejected_KL_mean": -1013.8150634765625, + "KL/std": 521.2279663085938, + "epoch": 0.7151248164464024, + "fcm_dpo/beta": 0.001048167236149311, + "fcm_dpo/delta": 0.044978074729442596, + "fcm_dpo/margin": 339.7569580078125, + "fcm_dpo/q_t": 0.41704893112182617, + "grad_norm": 33.21575164794922, + "learning_rate": 1.1513436604424378e-07, + "logits/chosen": -0.8942869901657104, + "logits/rejected": -0.9046221971511841, + "logps/chosen": -727.1214599609375, + "logps/ref_chosen": -53.06330871582031, + "logps/ref_rejected": -92.41883087158203, + "logps/rejected": -1106.23388671875, + "loss": 1.1342, + "margin_dpo/margin_mean": 339.7569580078125, + "margin_dpo/margin_std": 529.3812255859375, + "step": 487 + }, + { + "KL/chosen_KL_mean": -542.7528076171875, + "KL/mean": -720.9403686523438, + "KL/rejected_KL_mean": -899.1280517578125, + "KL/std": 461.8782043457031, + "epoch": 0.7165932452276065, + "fcm_dpo/beta": 0.001053705345839262, + "fcm_dpo/delta": 0.02513560838997364, + "fcm_dpo/margin": 356.37518310546875, + "fcm_dpo/q_t": 0.4128515124320984, + "grad_norm": 34.97057342529297, + "learning_rate": 1.1405557124304335e-07, + "logits/chosen": -0.8141319751739502, + "logits/rejected": -0.8222429752349854, + "logps/chosen": -594.98095703125, + "logps/ref_chosen": -52.22815704345703, + "logps/ref_rejected": -84.00656127929688, + "logps/rejected": -983.1345825195312, + "loss": 1.0978, + "margin_dpo/margin_mean": 356.37518310546875, + "margin_dpo/margin_std": 445.29071044921875, + "step": 488 + }, + { + "KL/chosen_KL_mean": -500.2234802246094, + "KL/mean": -671.4964599609375, + "KL/rejected_KL_mean": -842.7694091796875, + "KL/std": 455.5792236328125, + "epoch": 0.7180616740088106, + "fcm_dpo/beta": 0.0010611966717988253, + "fcm_dpo/delta": 0.03748384118080139, + "fcm_dpo/margin": 342.54595947265625, + "fcm_dpo/q_t": 0.4179996848106384, + "grad_norm": 27.448223114013672, + "learning_rate": 1.1298035870396985e-07, + "logits/chosen": -0.8774303197860718, + "logits/rejected": -0.8812981247901917, + "logps/chosen": -556.213134765625, + "logps/ref_chosen": -55.989627838134766, + "logps/ref_rejected": -79.39812469482422, + "logps/rejected": -922.1675415039062, + "loss": 1.1201, + "margin_dpo/margin_mean": 342.5459289550781, + "margin_dpo/margin_std": 497.568359375, + "step": 489 + }, + { + "KL/chosen_KL_mean": -602.345703125, + "KL/mean": -789.0585327148438, + "KL/rejected_KL_mean": -975.7713012695312, + "KL/std": 554.9224853515625, + "epoch": 0.7195301027900147, + "fcm_dpo/beta": 0.0010620702523738146, + "fcm_dpo/delta": 0.0034542735666036606, + "fcm_dpo/margin": 373.4256591796875, + "fcm_dpo/q_t": 0.4118698239326477, + "grad_norm": 37.27909851074219, + "learning_rate": 1.1190875675987355e-07, + "logits/chosen": -0.8731991052627563, + "logits/rejected": -0.911872148513794, + "logps/chosen": -654.7120361328125, + "logps/ref_chosen": -52.36639404296875, + "logps/ref_rejected": -110.4090576171875, + "logps/rejected": -1086.180419921875, + "loss": 1.1353, + "margin_dpo/margin_mean": 373.4256896972656, + "margin_dpo/margin_std": 642.0191650390625, + "step": 490 + }, + { + "KL/chosen_KL_mean": -558.270263671875, + "KL/mean": -684.875, + "KL/rejected_KL_mean": -811.479736328125, + "KL/std": 470.6094970703125, + "epoch": 0.7209985315712188, + "fcm_dpo/beta": 0.0010826380457729101, + "fcm_dpo/delta": 0.12914934754371643, + "fcm_dpo/margin": 253.20947265625, + "fcm_dpo/q_t": 0.4377876818180084, + "grad_norm": 32.712093353271484, + "learning_rate": 1.1084079364846241e-07, + "logits/chosen": -0.9021086096763611, + "logits/rejected": -0.8983560800552368, + "logps/chosen": -618.3865356445312, + "logps/ref_chosen": -60.11626434326172, + "logps/ref_rejected": -73.27278900146484, + "logps/rejected": -884.7525634765625, + "loss": 1.1945, + "margin_dpo/margin_mean": 253.20948791503906, + "margin_dpo/margin_std": 480.40399169921875, + "step": 491 + }, + { + "KL/chosen_KL_mean": -578.8584594726562, + "KL/mean": -706.635498046875, + "KL/rejected_KL_mean": -834.41259765625, + "KL/std": 479.8116149902344, + "epoch": 0.7224669603524229, + "fcm_dpo/beta": 0.0011053578928112984, + "fcm_dpo/delta": 0.12110729515552521, + "fcm_dpo/margin": 255.5540771484375, + "fcm_dpo/q_t": 0.4370453357696533, + "grad_norm": 31.351417541503906, + "learning_rate": 1.097764975115576e-07, + "logits/chosen": -0.9315870404243469, + "logits/rejected": -0.9179561734199524, + "logps/chosen": -632.8526611328125, + "logps/ref_chosen": -53.994178771972656, + "logps/ref_rejected": -72.65962219238281, + "logps/rejected": -907.0721435546875, + "loss": 1.2158, + "margin_dpo/margin_mean": 255.55409240722656, + "margin_dpo/margin_std": 563.4765014648438, + "step": 492 + }, + { + "KL/chosen_KL_mean": -622.0254516601562, + "KL/mean": -759.6363525390625, + "KL/rejected_KL_mean": -897.247314453125, + "KL/std": 523.6834106445312, + "epoch": 0.723935389133627, + "fcm_dpo/beta": 0.0011120472336187959, + "fcm_dpo/delta": -0.011631077155470848, + "fcm_dpo/margin": 275.22186279296875, + "fcm_dpo/q_t": 0.4283533990383148, + "grad_norm": 35.48196029663086, + "learning_rate": 1.0871589639435203e-07, + "logits/chosen": -0.9604239463806152, + "logits/rejected": -0.9354810118675232, + "logps/chosen": -697.522705078125, + "logps/ref_chosen": -75.49723815917969, + "logps/ref_rejected": -87.32301330566406, + "logps/rejected": -984.5703125, + "loss": 1.1808, + "margin_dpo/margin_mean": 275.22186279296875, + "margin_dpo/margin_std": 509.6318359375, + "step": 493 + }, + { + "KL/chosen_KL_mean": -498.29388427734375, + "KL/mean": -716.009521484375, + "KL/rejected_KL_mean": -933.72509765625, + "KL/std": 477.13525390625, + "epoch": 0.7254038179148311, + "fcm_dpo/beta": 0.0010983939282596111, + "fcm_dpo/delta": -0.0822177529335022, + "fcm_dpo/margin": 435.43121337890625, + "fcm_dpo/q_t": 0.3895619511604309, + "grad_norm": 48.854129791259766, + "learning_rate": 1.0765901824467166e-07, + "logits/chosen": -0.8259874582290649, + "logits/rejected": -0.8612606525421143, + "logps/chosen": -539.6531982421875, + "logps/ref_chosen": -41.35926818847656, + "logps/ref_rejected": -86.09136962890625, + "logps/rejected": -1019.8164672851562, + "loss": 1.0284, + "margin_dpo/margin_mean": 435.43121337890625, + "margin_dpo/margin_std": 479.2147216796875, + "step": 494 + }, + { + "KL/chosen_KL_mean": -555.478759765625, + "KL/mean": -748.5401611328125, + "KL/rejected_KL_mean": -941.6015625, + "KL/std": 487.3724060058594, + "epoch": 0.7268722466960352, + "fcm_dpo/beta": 0.0010908616241067648, + "fcm_dpo/delta": -0.022133061662316322, + "fcm_dpo/margin": 386.12274169921875, + "fcm_dpo/q_t": 0.4062625765800476, + "grad_norm": 31.173200607299805, + "learning_rate": 1.0660589091223854e-07, + "logits/chosen": -0.9227169752120972, + "logits/rejected": -0.9331672191619873, + "logps/chosen": -619.0137939453125, + "logps/ref_chosen": -63.53507995605469, + "logps/ref_rejected": -91.42443084716797, + "logps/rejected": -1033.02587890625, + "loss": 1.0958, + "margin_dpo/margin_mean": 386.1227722167969, + "margin_dpo/margin_std": 574.861328125, + "step": 495 + }, + { + "KL/chosen_KL_mean": -699.9385986328125, + "KL/mean": -801.8622436523438, + "KL/rejected_KL_mean": -903.785888671875, + "KL/std": 393.8895568847656, + "epoch": 0.7283406754772394, + "fcm_dpo/beta": 0.0011188681237399578, + "fcm_dpo/delta": 0.17580503225326538, + "fcm_dpo/margin": 203.84727478027344, + "fcm_dpo/q_t": 0.44692689180374146, + "grad_norm": 64.04462432861328, + "learning_rate": 1.0555654214793722e-07, + "logits/chosen": -0.8876965641975403, + "logits/rejected": -0.8597399592399597, + "logps/chosen": -772.530517578125, + "logps/ref_chosen": -72.5919189453125, + "logps/ref_rejected": -84.32933807373047, + "logps/rejected": -988.115234375, + "loss": 1.2315, + "margin_dpo/margin_mean": 203.84725952148438, + "margin_dpo/margin_std": 444.61639404296875, + "step": 496 + }, + { + "KL/chosen_KL_mean": -672.76513671875, + "KL/mean": -776.4244384765625, + "KL/rejected_KL_mean": -880.0836791992188, + "KL/std": 496.9170227050781, + "epoch": 0.7298091042584435, + "fcm_dpo/beta": 0.001132933422923088, + "fcm_dpo/delta": 0.02158385142683983, + "fcm_dpo/margin": 207.31849670410156, + "fcm_dpo/q_t": 0.44608232378959656, + "grad_norm": 41.72929000854492, + "learning_rate": 1.0451099960308374e-07, + "logits/chosen": -0.858148455619812, + "logits/rejected": -0.8439843654632568, + "logps/chosen": -731.359130859375, + "logps/ref_chosen": -58.59397506713867, + "logps/ref_rejected": -76.28836822509766, + "logps/rejected": -956.3720703125, + "loss": 1.2363, + "margin_dpo/margin_mean": 207.31849670410156, + "margin_dpo/margin_std": 470.48712158203125, + "step": 497 + }, + { + "KL/chosen_KL_mean": -647.0549926757812, + "KL/mean": -820.817626953125, + "KL/rejected_KL_mean": -994.5802001953125, + "KL/std": 524.9059448242188, + "epoch": 0.7312775330396476, + "fcm_dpo/beta": 0.0011333951260894537, + "fcm_dpo/delta": 0.006364853121340275, + "fcm_dpo/margin": 347.5252685546875, + "fcm_dpo/q_t": 0.4110422730445862, + "grad_norm": 44.66307067871094, + "learning_rate": 1.0346929082869641e-07, + "logits/chosen": -0.9270666837692261, + "logits/rejected": -0.9164772033691406, + "logps/chosen": -718.2606201171875, + "logps/ref_chosen": -71.20565795898438, + "logps/ref_rejected": -83.95803833007812, + "logps/rejected": -1078.538330078125, + "loss": 1.133, + "margin_dpo/margin_mean": 347.5252685546875, + "margin_dpo/margin_std": 589.1495361328125, + "step": 498 + }, + { + "KL/chosen_KL_mean": -564.4229736328125, + "KL/mean": -775.9797973632812, + "KL/rejected_KL_mean": -987.5364990234375, + "KL/std": 518.1436767578125, + "epoch": 0.7327459618208517, + "fcm_dpo/beta": 0.0011198758147656918, + "fcm_dpo/delta": -0.0776449665427208, + "fcm_dpo/margin": 423.1136169433594, + "fcm_dpo/q_t": 0.3932623863220215, + "grad_norm": 42.19448471069336, + "learning_rate": 1.0243144327477013e-07, + "logits/chosen": -0.9195848107337952, + "logits/rejected": -0.955754280090332, + "logps/chosen": -615.6781616210938, + "logps/ref_chosen": -51.25519561767578, + "logps/ref_rejected": -101.07870483398438, + "logps/rejected": -1088.615234375, + "loss": 1.0594, + "margin_dpo/margin_mean": 423.11358642578125, + "margin_dpo/margin_std": 571.1682739257812, + "step": 499 + }, + { + "KL/chosen_KL_mean": -671.8187255859375, + "KL/mean": -847.9236450195312, + "KL/rejected_KL_mean": -1024.028564453125, + "KL/std": 458.623046875, + "epoch": 0.7342143906020558, + "fcm_dpo/beta": 0.001116940751671791, + "fcm_dpo/delta": 0.006859854329377413, + "fcm_dpo/margin": 352.2098388671875, + "fcm_dpo/q_t": 0.41066765785217285, + "grad_norm": 35.08540725708008, + "learning_rate": 1.0139748428955333e-07, + "logits/chosen": -0.9325329661369324, + "logits/rejected": -0.9658868312835693, + "logps/chosen": -728.84619140625, + "logps/ref_chosen": -57.027442932128906, + "logps/ref_rejected": -93.93421173095703, + "logps/rejected": -1117.9627685546875, + "loss": 1.1271, + "margin_dpo/margin_mean": 352.2098388671875, + "margin_dpo/margin_std": 569.4359741210938, + "step": 500 + }, + { + "KL/chosen_KL_mean": -572.5186767578125, + "KL/mean": -759.097900390625, + "KL/rejected_KL_mean": -945.6771240234375, + "KL/std": 483.7222900390625, + "epoch": 0.73568281938326, + "fcm_dpo/beta": 0.0011179624125361443, + "fcm_dpo/delta": -0.017988204956054688, + "fcm_dpo/margin": 373.15838623046875, + "fcm_dpo/q_t": 0.4073890149593353, + "grad_norm": 33.07366943359375, + "learning_rate": 1.0036744111882672e-07, + "logits/chosen": -0.8823180198669434, + "logits/rejected": -0.8721954822540283, + "logps/chosen": -626.8782348632812, + "logps/ref_chosen": -54.359527587890625, + "logps/ref_rejected": -80.15670013427734, + "logps/rejected": -1025.833740234375, + "loss": 1.1115, + "margin_dpo/margin_mean": 373.15838623046875, + "margin_dpo/margin_std": 589.1995849609375, + "step": 501 + }, + { + "KL/chosen_KL_mean": -518.148681640625, + "KL/mean": -708.8804321289062, + "KL/rejected_KL_mean": -899.612060546875, + "KL/std": 450.12200927734375, + "epoch": 0.737151248164464, + "fcm_dpo/beta": 0.0011112934444099665, + "fcm_dpo/delta": -0.024970781058073044, + "fcm_dpo/margin": 381.4633483886719, + "fcm_dpo/q_t": 0.40389347076416016, + "grad_norm": 29.106658935546875, + "learning_rate": 9.934134090518592e-08, + "logits/chosen": -0.8181363940238953, + "logits/rejected": -0.8096420764923096, + "logps/chosen": -585.7492065429688, + "logps/ref_chosen": -67.60050964355469, + "logps/ref_rejected": -82.94876098632812, + "logps/rejected": -982.5608520507812, + "loss": 1.0688, + "margin_dpo/margin_mean": 381.4633483886719, + "margin_dpo/margin_std": 472.6005859375, + "step": 502 + }, + { + "KL/chosen_KL_mean": -523.8008422851562, + "KL/mean": -695.634521484375, + "KL/rejected_KL_mean": -867.4680786132812, + "KL/std": 425.3750305175781, + "epoch": 0.7386196769456681, + "fcm_dpo/beta": 0.001108947559259832, + "fcm_dpo/delta": 0.01953038200736046, + "fcm_dpo/margin": 343.667236328125, + "fcm_dpo/q_t": 0.41361480951309204, + "grad_norm": 28.44597816467285, + "learning_rate": 9.831921068732571e-08, + "logits/chosen": -0.8395601511001587, + "logits/rejected": -0.8324748873710632, + "logps/chosen": -578.8792724609375, + "logps/ref_chosen": -55.078407287597656, + "logps/ref_rejected": -82.50544738769531, + "logps/rejected": -949.9735107421875, + "loss": 1.1006, + "margin_dpo/margin_mean": 343.6672668457031, + "margin_dpo/margin_std": 457.1417236328125, + "step": 503 + }, + { + "KL/chosen_KL_mean": -567.9959716796875, + "KL/mean": -775.1651611328125, + "KL/rejected_KL_mean": -982.3343505859375, + "KL/std": 495.7042541503906, + "epoch": 0.7400881057268722, + "fcm_dpo/beta": 0.0011051710462197661, + "fcm_dpo/delta": -0.06065092608332634, + "fcm_dpo/margin": 414.3384094238281, + "fcm_dpo/q_t": 0.3969269096851349, + "grad_norm": 29.1925106048584, + "learning_rate": 9.730107739932805e-08, + "logits/chosen": -0.8878883123397827, + "logits/rejected": -0.9151204228401184, + "logps/chosen": -627.961669921875, + "logps/ref_chosen": -59.96575164794922, + "logps/ref_rejected": -103.76212310791016, + "logps/rejected": -1086.096435546875, + "loss": 1.0678, + "margin_dpo/margin_mean": 414.3384094238281, + "margin_dpo/margin_std": 545.068603515625, + "step": 504 + }, + { + "KL/chosen_KL_mean": -644.965087890625, + "KL/mean": -754.7982177734375, + "KL/rejected_KL_mean": -864.63134765625, + "KL/std": 480.5470275878906, + "epoch": 0.7415565345080763, + "fcm_dpo/beta": 0.0011264740023761988, + "fcm_dpo/delta": 0.15632013976573944, + "fcm_dpo/margin": 219.66627502441406, + "fcm_dpo/q_t": 0.44324439764022827, + "grad_norm": 46.33066940307617, + "learning_rate": 9.628696786995188e-08, + "logits/chosen": -0.926541805267334, + "logits/rejected": -0.9026806354522705, + "logps/chosen": -721.1199951171875, + "logps/ref_chosen": -76.1549072265625, + "logps/ref_rejected": -88.58537292480469, + "logps/rejected": -953.2167358398438, + "loss": 1.2186, + "margin_dpo/margin_mean": 219.666259765625, + "margin_dpo/margin_std": 454.0839538574219, + "step": 505 + }, + { + "KL/chosen_KL_mean": -516.7422485351562, + "KL/mean": -701.2262573242188, + "KL/rejected_KL_mean": -885.7102661132812, + "KL/std": 465.8466796875, + "epoch": 0.7430249632892805, + "fcm_dpo/beta": 0.0011272106785327196, + "fcm_dpo/delta": -0.017008088529109955, + "fcm_dpo/margin": 368.968017578125, + "fcm_dpo/q_t": 0.405214786529541, + "grad_norm": 40.91910171508789, + "learning_rate": 9.527690882192635e-08, + "logits/chosen": -0.909249484539032, + "logits/rejected": -0.9271351099014282, + "logps/chosen": -565.7027587890625, + "logps/ref_chosen": -48.96050262451172, + "logps/ref_rejected": -78.41505432128906, + "logps/rejected": -964.1253662109375, + "loss": 1.0916, + "margin_dpo/margin_mean": 368.968017578125, + "margin_dpo/margin_std": 511.87725830078125, + "step": 506 + }, + { + "KL/chosen_KL_mean": -589.63037109375, + "KL/mean": -747.434326171875, + "KL/rejected_KL_mean": -905.2382202148438, + "KL/std": 535.4652099609375, + "epoch": 0.7444933920704846, + "fcm_dpo/beta": 0.0011373506858944893, + "fcm_dpo/delta": 0.04258999228477478, + "fcm_dpo/margin": 315.60784912109375, + "fcm_dpo/q_t": 0.4222760498523712, + "grad_norm": 30.438766479492188, + "learning_rate": 9.427092687124691e-08, + "logits/chosen": -0.9080416560173035, + "logits/rejected": -0.9134109020233154, + "logps/chosen": -656.431884765625, + "logps/ref_chosen": -66.80149841308594, + "logps/ref_rejected": -95.37289428710938, + "logps/rejected": -1000.611083984375, + "loss": 1.1574, + "margin_dpo/margin_mean": 315.6078186035156, + "margin_dpo/margin_std": 585.120849609375, + "step": 507 + }, + { + "KL/chosen_KL_mean": -643.0169067382812, + "KL/mean": -777.5562744140625, + "KL/rejected_KL_mean": -912.095703125, + "KL/std": 526.7119750976562, + "epoch": 0.7459618208516887, + "fcm_dpo/beta": 0.0011591333895921707, + "fcm_dpo/delta": 0.09055158495903015, + "fcm_dpo/margin": 269.07879638671875, + "fcm_dpo/q_t": 0.43193942308425903, + "grad_norm": 43.13357162475586, + "learning_rate": 9.326904852647344e-08, + "logits/chosen": -0.8901297450065613, + "logits/rejected": -0.8925095796585083, + "logps/chosen": -714.3203735351562, + "logps/ref_chosen": -71.303466796875, + "logps/ref_rejected": -95.6275405883789, + "logps/rejected": -1007.7232666015625, + "loss": 1.2159, + "margin_dpo/margin_mean": 269.07879638671875, + "margin_dpo/margin_std": 613.018310546875, + "step": 508 + }, + { + "KL/chosen_KL_mean": -475.68719482421875, + "KL/mean": -625.1077880859375, + "KL/rejected_KL_mean": -774.5283813476562, + "KL/std": 375.84735107421875, + "epoch": 0.7474302496328928, + "fcm_dpo/beta": 0.0011755790328606963, + "fcm_dpo/delta": 0.04976864904165268, + "fcm_dpo/margin": 298.8412170410156, + "fcm_dpo/q_t": 0.4198834300041199, + "grad_norm": 28.78981590270996, + "learning_rate": 9.227130018803195e-08, + "logits/chosen": -0.8108519315719604, + "logits/rejected": -0.8077250123023987, + "logps/chosen": -539.5061645507812, + "logps/ref_chosen": -63.81895065307617, + "logps/ref_rejected": -83.25643920898438, + "logps/rejected": -857.7847900390625, + "loss": 1.1345, + "margin_dpo/margin_mean": 298.8412170410156, + "margin_dpo/margin_std": 454.3382873535156, + "step": 509 + }, + { + "KL/chosen_KL_mean": -573.8865966796875, + "KL/mean": -767.037841796875, + "KL/rejected_KL_mean": -960.1890869140625, + "KL/std": 436.06304931640625, + "epoch": 0.748898678414097, + "fcm_dpo/beta": 0.001165606314316392, + "fcm_dpo/delta": -0.05261443555355072, + "fcm_dpo/margin": 386.30255126953125, + "fcm_dpo/q_t": 0.395079642534256, + "grad_norm": 31.50473403930664, + "learning_rate": 9.127770814751932e-08, + "logits/chosen": -0.8299954533576965, + "logits/rejected": -0.8495923280715942, + "logps/chosen": -625.7650756835938, + "logps/ref_chosen": -51.878448486328125, + "logps/ref_rejected": -102.7651596069336, + "logps/rejected": -1062.954345703125, + "loss": 1.0442, + "margin_dpo/margin_mean": 386.30255126953125, + "margin_dpo/margin_std": 427.64312744140625, + "step": 510 + }, + { + "KL/chosen_KL_mean": -543.1624145507812, + "KL/mean": -698.3929443359375, + "KL/rejected_KL_mean": -853.62353515625, + "KL/std": 473.2135314941406, + "epoch": 0.750367107195301, + "fcm_dpo/beta": 0.0011685066856443882, + "fcm_dpo/delta": 0.03857073932886124, + "fcm_dpo/margin": 310.46112060546875, + "fcm_dpo/q_t": 0.4177062213420868, + "grad_norm": 36.895851135253906, + "learning_rate": 9.028829858700973e-08, + "logits/chosen": -0.8786974549293518, + "logits/rejected": -0.8848339319229126, + "logps/chosen": -603.4005126953125, + "logps/ref_chosen": -60.23811721801758, + "logps/ref_rejected": -92.85676574707031, + "logps/rejected": -946.4803466796875, + "loss": 1.1563, + "margin_dpo/margin_mean": 310.46112060546875, + "margin_dpo/margin_std": 569.1368408203125, + "step": 511 + }, + { + "KL/chosen_KL_mean": -426.22259521484375, + "KL/mean": -634.8209838867188, + "KL/rejected_KL_mean": -843.4193115234375, + "KL/std": 422.7352294921875, + "epoch": 0.7518355359765051, + "fcm_dpo/beta": 0.0011527151800692081, + "fcm_dpo/delta": -0.08524032682180405, + "fcm_dpo/margin": 417.19671630859375, + "fcm_dpo/q_t": 0.38830769062042236, + "grad_norm": 52.474979400634766, + "learning_rate": 8.930309757836516e-08, + "logits/chosen": -0.8602747917175293, + "logits/rejected": -0.8812437057495117, + "logps/chosen": -481.12811279296875, + "logps/ref_chosen": -54.905494689941406, + "logps/ref_rejected": -81.87586975097656, + "logps/rejected": -925.295166015625, + "loss": 1.0194, + "margin_dpo/margin_mean": 417.19671630859375, + "margin_dpo/margin_std": 433.2279052734375, + "step": 512 + }, + { + "KL/chosen_KL_mean": -534.08837890625, + "KL/mean": -682.8746337890625, + "KL/rejected_KL_mean": -831.660888671875, + "KL/std": 402.69537353515625, + "epoch": 0.7533039647577092, + "fcm_dpo/beta": 0.0011531409109011292, + "fcm_dpo/delta": 0.05850052088499069, + "fcm_dpo/margin": 297.572509765625, + "fcm_dpo/q_t": 0.42183050513267517, + "grad_norm": 44.99782943725586, + "learning_rate": 8.832213108254863e-08, + "logits/chosen": -0.9019182920455933, + "logits/rejected": -0.8876909017562866, + "logps/chosen": -599.0048828125, + "logps/ref_chosen": -64.91644287109375, + "logps/ref_rejected": -76.06245422363281, + "logps/rejected": -907.7233276367188, + "loss": 1.1427, + "margin_dpo/margin_mean": 297.572509765625, + "margin_dpo/margin_std": 461.5359191894531, + "step": 513 + }, + { + "KL/chosen_KL_mean": -556.1077270507812, + "KL/mean": -703.8541259765625, + "KL/rejected_KL_mean": -851.6004638671875, + "KL/std": 437.5733642578125, + "epoch": 0.7547723935389133, + "fcm_dpo/beta": 0.0011747241951525211, + "fcm_dpo/delta": 0.05465298146009445, + "fcm_dpo/margin": 295.4927673339844, + "fcm_dpo/q_t": 0.42250925302505493, + "grad_norm": 27.680599212646484, + "learning_rate": 8.734542494893954e-08, + "logits/chosen": -0.8176130652427673, + "logits/rejected": -0.8068991899490356, + "logps/chosen": -630.3372802734375, + "logps/ref_chosen": -74.22957611083984, + "logps/ref_rejected": -78.945556640625, + "logps/rejected": -930.5460205078125, + "loss": 1.1448, + "margin_dpo/margin_mean": 295.4927673339844, + "margin_dpo/margin_std": 489.8013610839844, + "step": 514 + }, + { + "KL/chosen_KL_mean": -475.355712890625, + "KL/mean": -587.893798828125, + "KL/rejected_KL_mean": -700.431884765625, + "KL/std": 380.00994873046875, + "epoch": 0.7562408223201175, + "fcm_dpo/beta": 0.0011996763059869409, + "fcm_dpo/delta": 0.13362111151218414, + "fcm_dpo/margin": 225.07615661621094, + "fcm_dpo/q_t": 0.4364135265350342, + "grad_norm": 43.433387756347656, + "learning_rate": 8.637300491465272e-08, + "logits/chosen": -0.8131271600723267, + "logits/rejected": -0.8236969709396362, + "logps/chosen": -525.75732421875, + "logps/ref_chosen": -50.40156555175781, + "logps/ref_rejected": -87.09774780273438, + "logps/rejected": -787.5296630859375, + "loss": 1.2001, + "margin_dpo/margin_mean": 225.076171875, + "margin_dpo/margin_std": 440.53546142578125, + "step": 515 + }, + { + "KL/chosen_KL_mean": -509.38916015625, + "KL/mean": -684.5283813476562, + "KL/rejected_KL_mean": -859.6676025390625, + "KL/std": 431.451416015625, + "epoch": 0.7577092511013216, + "fcm_dpo/beta": 0.0012069368967786431, + "fcm_dpo/delta": -0.02384302206337452, + "fcm_dpo/margin": 350.2784118652344, + "fcm_dpo/q_t": 0.4014623761177063, + "grad_norm": 40.77699279785156, + "learning_rate": 8.540489660386064e-08, + "logits/chosen": -0.9214959740638733, + "logits/rejected": -0.9501833319664001, + "logps/chosen": -574.0386962890625, + "logps/ref_chosen": -64.64956665039062, + "logps/ref_rejected": -111.72237396240234, + "logps/rejected": -971.3899536132812, + "loss": 1.0691, + "margin_dpo/margin_mean": 350.2784118652344, + "margin_dpo/margin_std": 421.2920227050781, + "step": 516 + }, + { + "KL/chosen_KL_mean": -546.0828857421875, + "KL/mean": -750.1161499023438, + "KL/rejected_KL_mean": -954.1494140625, + "KL/std": 485.58392333984375, + "epoch": 0.7591776798825257, + "fcm_dpo/beta": 0.0011841601226478815, + "fcm_dpo/delta": -0.08773398399353027, + "fcm_dpo/margin": 408.0665283203125, + "fcm_dpo/q_t": 0.39262643456459045, + "grad_norm": 32.40835189819336, + "learning_rate": 8.444112552711752e-08, + "logits/chosen": -0.8471901416778564, + "logits/rejected": -0.8455414772033691, + "logps/chosen": -606.9964599609375, + "logps/ref_chosen": -60.913551330566406, + "logps/ref_rejected": -89.08308410644531, + "logps/rejected": -1043.232421875, + "loss": 1.0478, + "margin_dpo/margin_mean": 408.0665283203125, + "margin_dpo/margin_std": 531.6887817382812, + "step": 517 + }, + { + "KL/chosen_KL_mean": -491.3916931152344, + "KL/mean": -659.3934936523438, + "KL/rejected_KL_mean": -827.3953247070312, + "KL/std": 386.0772705078125, + "epoch": 0.7606461086637298, + "fcm_dpo/beta": 0.0011782585643231869, + "fcm_dpo/delta": 0.004059506580233574, + "fcm_dpo/margin": 336.003662109375, + "fcm_dpo/q_t": 0.40781712532043457, + "grad_norm": 53.09996795654297, + "learning_rate": 8.348171708068747e-08, + "logits/chosen": -0.8820132613182068, + "logits/rejected": -0.8964939117431641, + "logps/chosen": -548.8475341796875, + "logps/ref_chosen": -57.45589065551758, + "logps/ref_rejected": -85.31269836425781, + "logps/rejected": -912.7080078125, + "loss": 1.0905, + "margin_dpo/margin_mean": 336.003662109375, + "margin_dpo/margin_std": 429.115966796875, + "step": 518 + }, + { + "KL/chosen_KL_mean": -507.8600769042969, + "KL/mean": -623.6256713867188, + "KL/rejected_KL_mean": -739.3912963867188, + "KL/std": 352.45123291015625, + "epoch": 0.762114537444934, + "fcm_dpo/beta": 0.0011857892386615276, + "fcm_dpo/delta": 0.028549687936902046, + "fcm_dpo/margin": 231.53121948242188, + "fcm_dpo/q_t": 0.4360736012458801, + "grad_norm": 40.137332916259766, + "learning_rate": 8.25266965458755e-08, + "logits/chosen": -0.8481384515762329, + "logits/rejected": -0.8306090235710144, + "logps/chosen": -581.9234008789062, + "logps/ref_chosen": -74.06331634521484, + "logps/ref_rejected": -104.44416809082031, + "logps/rejected": -843.83544921875, + "loss": 1.1966, + "margin_dpo/margin_mean": 231.53121948242188, + "margin_dpo/margin_std": 438.93048095703125, + "step": 519 + }, + { + "KL/chosen_KL_mean": -543.58251953125, + "KL/mean": -701.6224975585938, + "KL/rejected_KL_mean": -859.6624145507812, + "KL/std": 423.9530029296875, + "epoch": 0.7635829662261381, + "fcm_dpo/beta": 0.0011898789089173079, + "fcm_dpo/delta": 0.024717746302485466, + "fcm_dpo/margin": 316.0799560546875, + "fcm_dpo/q_t": 0.4156090021133423, + "grad_norm": 39.273406982421875, + "learning_rate": 8.15760890883607e-08, + "logits/chosen": -0.8328167200088501, + "logits/rejected": -0.839728832244873, + "logps/chosen": -613.88232421875, + "logps/ref_chosen": -70.2998275756836, + "logps/ref_rejected": -99.98133850097656, + "logps/rejected": -959.643798828125, + "loss": 1.1217, + "margin_dpo/margin_mean": 316.0799560546875, + "margin_dpo/margin_std": 469.7958068847656, + "step": 520 + }, + { + "KL/chosen_KL_mean": -485.7915954589844, + "KL/mean": -656.0485229492188, + "KL/rejected_KL_mean": -826.305419921875, + "KL/std": 436.52398681640625, + "epoch": 0.7650513950073421, + "fcm_dpo/beta": 0.0012013925006613135, + "fcm_dpo/delta": -0.010350905358791351, + "fcm_dpo/margin": 340.51385498046875, + "fcm_dpo/q_t": 0.4064163863658905, + "grad_norm": 35.94675827026367, + "learning_rate": 8.062991975753378e-08, + "logits/chosen": -0.8887852430343628, + "logits/rejected": -0.8937211036682129, + "logps/chosen": -543.9345703125, + "logps/ref_chosen": -58.14292526245117, + "logps/ref_rejected": -83.28060913085938, + "logps/rejected": -909.5860595703125, + "loss": 1.0873, + "margin_dpo/margin_mean": 340.51385498046875, + "margin_dpo/margin_std": 440.299560546875, + "step": 521 + }, + { + "KL/chosen_KL_mean": -558.0612182617188, + "KL/mean": -710.2643432617188, + "KL/rejected_KL_mean": -862.467529296875, + "KL/std": 458.29718017578125, + "epoch": 0.7665198237885462, + "fcm_dpo/beta": 0.0011995111126452684, + "fcm_dpo/delta": 0.03616529330611229, + "fcm_dpo/margin": 304.40625, + "fcm_dpo/q_t": 0.41692230105400085, + "grad_norm": 31.800031661987305, + "learning_rate": 7.968821348583643e-08, + "logits/chosen": -0.887365460395813, + "logits/rejected": -0.8906110525131226, + "logps/chosen": -604.60888671875, + "logps/ref_chosen": -46.54766845703125, + "logps/ref_rejected": -66.01388549804688, + "logps/rejected": -928.4814453125, + "loss": 1.1338, + "margin_dpo/margin_mean": 304.40625, + "margin_dpo/margin_std": 485.10552978515625, + "step": 522 + }, + { + "KL/chosen_KL_mean": -589.586669921875, + "KL/mean": -753.298828125, + "KL/rejected_KL_mean": -917.010986328125, + "KL/std": 527.0751342773438, + "epoch": 0.7679882525697503, + "fcm_dpo/beta": 0.0012012626975774765, + "fcm_dpo/delta": 0.006936301477253437, + "fcm_dpo/margin": 327.4243469238281, + "fcm_dpo/q_t": 0.4126874804496765, + "grad_norm": 40.38140106201172, + "learning_rate": 7.875099508810484e-08, + "logits/chosen": -0.954033613204956, + "logits/rejected": -0.9616571068763733, + "logps/chosen": -651.356201171875, + "logps/ref_chosen": -61.76960372924805, + "logps/ref_rejected": -83.76141357421875, + "logps/rejected": -1000.7723999023438, + "loss": 1.1345, + "margin_dpo/margin_mean": 327.42437744140625, + "margin_dpo/margin_std": 560.2000732421875, + "step": 523 + }, + { + "KL/chosen_KL_mean": -570.922119140625, + "KL/mean": -741.5078735351562, + "KL/rejected_KL_mean": -912.0936279296875, + "KL/std": 479.9468994140625, + "epoch": 0.7694566813509545, + "fcm_dpo/beta": 0.001195290358737111, + "fcm_dpo/delta": -0.008701588958501816, + "fcm_dpo/margin": 341.17156982421875, + "fcm_dpo/q_t": 0.40466296672821045, + "grad_norm": 41.24837112426758, + "learning_rate": 7.781828926091535e-08, + "logits/chosen": -0.9870351552963257, + "logits/rejected": -0.9809165000915527, + "logps/chosen": -648.994140625, + "logps/ref_chosen": -78.0720443725586, + "logps/ref_rejected": -81.30198669433594, + "logps/rejected": -993.3956298828125, + "loss": 1.1083, + "margin_dpo/margin_mean": 341.17156982421875, + "margin_dpo/margin_std": 499.772705078125, + "step": 524 + }, + { + "KL/chosen_KL_mean": -579.7892456054688, + "KL/mean": -798.8469848632812, + "KL/rejected_KL_mean": -1017.9046630859375, + "KL/std": 511.00323486328125, + "epoch": 0.7709251101321586, + "fcm_dpo/beta": 0.001174594508484006, + "fcm_dpo/delta": -0.12164277583360672, + "fcm_dpo/margin": 438.11553955078125, + "fcm_dpo/q_t": 0.384994238615036, + "grad_norm": 30.923978805541992, + "learning_rate": 7.689012058193384e-08, + "logits/chosen": -0.9067383408546448, + "logits/rejected": -0.943909764289856, + "logps/chosen": -630.6170654296875, + "logps/ref_chosen": -50.827857971191406, + "logps/ref_rejected": -100.05294036865234, + "logps/rejected": -1117.9576416015625, + "loss": 1.0245, + "margin_dpo/margin_mean": 438.11553955078125, + "margin_dpo/margin_std": 533.79833984375, + "step": 525 + }, + { + "KL/chosen_KL_mean": -613.677001953125, + "KL/mean": -823.80029296875, + "KL/rejected_KL_mean": -1033.9234619140625, + "KL/std": 496.4564208984375, + "epoch": 0.7723935389133627, + "fcm_dpo/beta": 0.0011603353777900338, + "fcm_dpo/delta": -0.0920577421784401, + "fcm_dpo/margin": 420.246337890625, + "fcm_dpo/q_t": 0.38871896266937256, + "grad_norm": 29.568998336791992, + "learning_rate": 7.596651350926836e-08, + "logits/chosen": -0.9136035442352295, + "logits/rejected": -0.9113898873329163, + "logps/chosen": -676.84423828125, + "logps/ref_chosen": -63.167236328125, + "logps/ref_rejected": -86.30934143066406, + "logps/rejected": -1120.2327880859375, + "loss": 1.0538, + "margin_dpo/margin_mean": 420.246337890625, + "margin_dpo/margin_std": 552.4700317382812, + "step": 526 + }, + { + "KL/chosen_KL_mean": -627.5474853515625, + "KL/mean": -774.3292236328125, + "KL/rejected_KL_mean": -921.1109619140625, + "KL/std": 518.6869506835938, + "epoch": 0.7738619676945668, + "fcm_dpo/beta": 0.0011582564329728484, + "fcm_dpo/delta": 0.062111612409353256, + "fcm_dpo/margin": 293.5635070800781, + "fcm_dpo/q_t": 0.4208937883377075, + "grad_norm": 32.31246566772461, + "learning_rate": 7.504749238082414e-08, + "logits/chosen": -1.083193063735962, + "logits/rejected": -1.0532429218292236, + "logps/chosen": -698.6761474609375, + "logps/ref_chosen": -71.12867736816406, + "logps/ref_rejected": -78.3425521850586, + "logps/rejected": -999.4535522460938, + "loss": 1.1347, + "margin_dpo/margin_mean": 293.5634765625, + "margin_dpo/margin_std": 429.7330322265625, + "step": 527 + }, + { + "KL/chosen_KL_mean": -646.1875, + "KL/mean": -824.3826904296875, + "KL/rejected_KL_mean": -1002.5779418945312, + "KL/std": 493.030029296875, + "epoch": 0.775330396475771, + "fcm_dpo/beta": 0.0011612444650381804, + "fcm_dpo/delta": -0.01449208240956068, + "fcm_dpo/margin": 356.39044189453125, + "fcm_dpo/q_t": 0.40898245573043823, + "grad_norm": 51.520660400390625, + "learning_rate": 7.413308141366254e-08, + "logits/chosen": -1.0223352909088135, + "logits/rejected": -1.0113223791122437, + "logps/chosen": -714.2769165039062, + "logps/ref_chosen": -68.0894546508789, + "logps/ref_rejected": -93.91006469726562, + "logps/rejected": -1096.488037109375, + "loss": 1.1168, + "margin_dpo/margin_mean": 356.39044189453125, + "margin_dpo/margin_std": 576.7999267578125, + "step": 528 + }, + { + "KL/chosen_KL_mean": -762.7345581054688, + "KL/mean": -881.308349609375, + "KL/rejected_KL_mean": -999.882080078125, + "KL/std": 446.8496398925781, + "epoch": 0.7767988252569751, + "fcm_dpo/beta": 0.0011665602214634418, + "fcm_dpo/delta": 0.02131509780883789, + "fcm_dpo/margin": 237.14752197265625, + "fcm_dpo/q_t": 0.4364190697669983, + "grad_norm": 43.96234893798828, + "learning_rate": 7.322330470336313e-08, + "logits/chosen": -1.0255260467529297, + "logits/rejected": -1.0359970331192017, + "logps/chosen": -818.3095703125, + "logps/ref_chosen": -55.57495880126953, + "logps/ref_rejected": -89.20909118652344, + "logps/rejected": -1089.0911865234375, + "loss": 1.2374, + "margin_dpo/margin_mean": 237.14752197265625, + "margin_dpo/margin_std": 574.8782348632812, + "step": 529 + }, + { + "KL/chosen_KL_mean": -668.689208984375, + "KL/mean": -865.7867431640625, + "KL/rejected_KL_mean": -1062.88427734375, + "KL/std": 562.298095703125, + "epoch": 0.7782672540381792, + "fcm_dpo/beta": 0.0011577388504520059, + "fcm_dpo/delta": -0.05902961269021034, + "fcm_dpo/margin": 394.1950988769531, + "fcm_dpo/q_t": 0.4014556407928467, + "grad_norm": 53.588645935058594, + "learning_rate": 7.231818622338822e-08, + "logits/chosen": -0.9486408829689026, + "logits/rejected": -0.9478579759597778, + "logps/chosen": -716.2905883789062, + "logps/ref_chosen": -47.601417541503906, + "logps/ref_rejected": -87.2845230102539, + "logps/rejected": -1150.1688232421875, + "loss": 1.1248, + "margin_dpo/margin_mean": 394.1950988769531, + "margin_dpo/margin_std": 704.854736328125, + "step": 530 + }, + { + "KL/chosen_KL_mean": -726.8511962890625, + "KL/mean": -901.04345703125, + "KL/rejected_KL_mean": -1075.2357177734375, + "KL/std": 581.962158203125, + "epoch": 0.7797356828193832, + "fcm_dpo/beta": 0.0011553821386769414, + "fcm_dpo/delta": -0.0027520228177309036, + "fcm_dpo/margin": 348.384521484375, + "fcm_dpo/q_t": 0.4106895327568054, + "grad_norm": 36.676822662353516, + "learning_rate": 7.141774982445147e-08, + "logits/chosen": -1.0601121187210083, + "logits/rejected": -1.0491019487380981, + "logps/chosen": -782.0972900390625, + "logps/ref_chosen": -55.246063232421875, + "logps/ref_rejected": -70.60598754882812, + "logps/rejected": -1145.841796875, + "loss": 1.1178, + "margin_dpo/margin_mean": 348.3844909667969, + "margin_dpo/margin_std": 555.5078735351562, + "step": 531 + }, + { + "KL/chosen_KL_mean": -733.3638305664062, + "KL/mean": -910.0308227539062, + "KL/rejected_KL_mean": -1086.69775390625, + "KL/std": 548.9955444335938, + "epoch": 0.7812041116005873, + "fcm_dpo/beta": 0.0011431981110945344, + "fcm_dpo/delta": -0.0051701366901397705, + "fcm_dpo/margin": 353.33392333984375, + "fcm_dpo/q_t": 0.41110938787460327, + "grad_norm": 76.43359375, + "learning_rate": 7.052201923388953e-08, + "logits/chosen": -0.986026406288147, + "logits/rejected": -0.962155282497406, + "logps/chosen": -803.6498413085938, + "logps/ref_chosen": -70.28601837158203, + "logps/ref_rejected": -86.5913314819336, + "logps/rejected": -1173.2890625, + "loss": 1.1519, + "margin_dpo/margin_mean": 353.33392333984375, + "margin_dpo/margin_std": 646.3972778320312, + "step": 532 + }, + { + "KL/chosen_KL_mean": -653.2174072265625, + "KL/mean": -784.70263671875, + "KL/rejected_KL_mean": -916.1878051757812, + "KL/std": 470.7712707519531, + "epoch": 0.7826725403817915, + "fcm_dpo/beta": 0.00114994659088552, + "fcm_dpo/delta": -0.009539761580526829, + "fcm_dpo/margin": 262.97039794921875, + "fcm_dpo/q_t": 0.4316534399986267, + "grad_norm": 54.19781494140625, + "learning_rate": 6.963101805503646e-08, + "logits/chosen": -0.9915690422058105, + "logits/rejected": -0.9716913104057312, + "logps/chosen": -718.072509765625, + "logps/ref_chosen": -64.8551025390625, + "logps/ref_rejected": -76.58805847167969, + "logps/rejected": -992.77587890625, + "loss": 1.2054, + "margin_dpo/margin_mean": 262.97039794921875, + "margin_dpo/margin_std": 563.1723022460938, + "step": 533 + }, + { + "KL/chosen_KL_mean": -698.3402099609375, + "KL/mean": -880.8671875, + "KL/rejected_KL_mean": -1063.39404296875, + "KL/std": 534.1531982421875, + "epoch": 0.7841409691629956, + "fcm_dpo/beta": 0.001137340790592134, + "fcm_dpo/delta": -0.017138652503490448, + "fcm_dpo/margin": 365.05377197265625, + "fcm_dpo/q_t": 0.40718841552734375, + "grad_norm": 37.44085693359375, + "learning_rate": 6.874476976660184e-08, + "logits/chosen": -0.9751067757606506, + "logits/rejected": -0.9717357158660889, + "logps/chosen": -758.4595947265625, + "logps/ref_chosen": -60.119388580322266, + "logps/ref_rejected": -78.54347229003906, + "logps/rejected": -1141.9375, + "loss": 1.1061, + "margin_dpo/margin_mean": 365.0538024902344, + "margin_dpo/margin_std": 543.7841796875, + "step": 534 + }, + { + "KL/chosen_KL_mean": -584.0714721679688, + "KL/mean": -793.9010620117188, + "KL/rejected_KL_mean": -1003.7305908203125, + "KL/std": 514.93896484375, + "epoch": 0.7856093979441997, + "fcm_dpo/beta": 0.0011387758422642946, + "fcm_dpo/delta": -0.0821937620639801, + "fcm_dpo/margin": 419.6590576171875, + "fcm_dpo/q_t": 0.39370042085647583, + "grad_norm": 29.454992294311523, + "learning_rate": 6.786329772205246e-08, + "logits/chosen": -0.9155275821685791, + "logits/rejected": -0.9193699359893799, + "logps/chosen": -638.4017333984375, + "logps/ref_chosen": -54.330238342285156, + "logps/ref_rejected": -96.30763244628906, + "logps/rejected": -1100.0382080078125, + "loss": 1.0564, + "margin_dpo/margin_mean": 419.6590576171875, + "margin_dpo/margin_std": 548.3145141601562, + "step": 535 + }, + { + "KL/chosen_KL_mean": -528.38671875, + "KL/mean": -777.7352905273438, + "KL/rejected_KL_mean": -1027.083740234375, + "KL/std": 573.330322265625, + "epoch": 0.7870778267254038, + "fcm_dpo/beta": 0.0011007413268089294, + "fcm_dpo/delta": -0.15751913189888, + "fcm_dpo/margin": 498.69720458984375, + "fcm_dpo/q_t": 0.3827175498008728, + "grad_norm": 38.40432357788086, + "learning_rate": 6.698662514899638e-08, + "logits/chosen": -0.9019815325737, + "logits/rejected": -0.933282732963562, + "logps/chosen": -575.4672241210938, + "logps/ref_chosen": -47.08053207397461, + "logps/ref_rejected": -89.09783935546875, + "logps/rejected": -1116.181640625, + "loss": 1.0258, + "margin_dpo/margin_mean": 498.6971740722656, + "margin_dpo/margin_std": 687.29541015625, + "step": 536 + }, + { + "KL/chosen_KL_mean": -552.53515625, + "KL/mean": -724.3604736328125, + "KL/rejected_KL_mean": -896.1856689453125, + "KL/std": 467.387939453125, + "epoch": 0.788546255506608, + "fcm_dpo/beta": 0.0011007563443854451, + "fcm_dpo/delta": 0.021880976855754852, + "fcm_dpo/margin": 343.6505126953125, + "fcm_dpo/q_t": 0.4137336313724518, + "grad_norm": 42.51823425292969, + "learning_rate": 6.611477514857114e-08, + "logits/chosen": -0.9375029802322388, + "logits/rejected": -0.9250655770301819, + "logps/chosen": -610.2826538085938, + "logps/ref_chosen": -57.747467041015625, + "logps/ref_rejected": -70.43838500976562, + "logps/rejected": -966.6240234375, + "loss": 1.1392, + "margin_dpo/margin_mean": 343.6505126953125, + "margin_dpo/margin_std": 572.0556640625, + "step": 537 + }, + { + "KL/chosen_KL_mean": -675.07861328125, + "KL/mean": -863.2783813476562, + "KL/rejected_KL_mean": -1051.478271484375, + "KL/std": 501.026123046875, + "epoch": 0.7900146842878121, + "fcm_dpo/beta": 0.0010912481229752302, + "fcm_dpo/delta": -0.011391473934054375, + "fcm_dpo/margin": 376.39959716796875, + "fcm_dpo/q_t": 0.40652403235435486, + "grad_norm": 32.99623489379883, + "learning_rate": 6.524777069483525e-08, + "logits/chosen": -0.9146217107772827, + "logits/rejected": -0.9003403186798096, + "logps/chosen": -741.4945678710938, + "logps/ref_chosen": -66.41594696044922, + "logps/ref_rejected": -84.22808837890625, + "logps/rejected": -1135.706298828125, + "loss": 1.0884, + "margin_dpo/margin_mean": 376.3995666503906, + "margin_dpo/margin_std": 509.0475158691406, + "step": 538 + }, + { + "KL/chosen_KL_mean": -576.7205810546875, + "KL/mean": -757.0679931640625, + "KL/rejected_KL_mean": -937.4154052734375, + "KL/std": 426.46710205078125, + "epoch": 0.7914831130690162, + "fcm_dpo/beta": 0.0010967530542984605, + "fcm_dpo/delta": 0.004480024799704552, + "fcm_dpo/margin": 360.6948547363281, + "fcm_dpo/q_t": 0.4094918370246887, + "grad_norm": 36.090091705322266, + "learning_rate": 6.438563463416221e-08, + "logits/chosen": -0.9449999332427979, + "logits/rejected": -0.9356608390808105, + "logps/chosen": -635.21337890625, + "logps/ref_chosen": -58.492855072021484, + "logps/ref_rejected": -91.85395050048828, + "logps/rejected": -1029.269287109375, + "loss": 1.0922, + "margin_dpo/margin_mean": 360.6948547363281, + "margin_dpo/margin_std": 470.793212890625, + "step": 539 + }, + { + "KL/chosen_KL_mean": -585.2135620117188, + "KL/mean": -820.2244262695312, + "KL/rejected_KL_mean": -1055.2353515625, + "KL/std": 539.0792236328125, + "epoch": 0.7929515418502202, + "fcm_dpo/beta": 0.001079935347661376, + "fcm_dpo/delta": -0.1131967157125473, + "fcm_dpo/margin": 470.0218811035156, + "fcm_dpo/q_t": 0.3896099925041199, + "grad_norm": 34.88587951660156, + "learning_rate": 6.352838968463919e-08, + "logits/chosen": -0.9021656513214111, + "logits/rejected": -0.9275361895561218, + "logps/chosen": -648.696044921875, + "logps/ref_chosen": -63.482513427734375, + "logps/ref_rejected": -116.42999267578125, + "logps/rejected": -1171.665283203125, + "loss": 1.052, + "margin_dpo/margin_mean": 470.0218505859375, + "margin_dpo/margin_std": 641.974853515625, + "step": 540 + }, + { + "KL/chosen_KL_mean": -691.6966552734375, + "KL/mean": -813.2490234375, + "KL/rejected_KL_mean": -934.8014526367188, + "KL/std": 476.3492431640625, + "epoch": 0.7944199706314243, + "fcm_dpo/beta": 0.0010710853384807706, + "fcm_dpo/delta": 0.004636428784579039, + "fcm_dpo/margin": 243.10484313964844, + "fcm_dpo/q_t": 0.4407821297645569, + "grad_norm": 53.53697204589844, + "learning_rate": 6.267605843546767e-08, + "logits/chosen": -1.006117343902588, + "logits/rejected": -1.0002844333648682, + "logps/chosen": -769.9769897460938, + "logps/ref_chosen": -78.28036499023438, + "logps/ref_rejected": -103.273681640625, + "logps/rejected": -1038.0751953125, + "loss": 1.2295, + "margin_dpo/margin_mean": 243.10482788085938, + "margin_dpo/margin_std": 556.22802734375, + "step": 541 + }, + { + "KL/chosen_KL_mean": -586.3802490234375, + "KL/mean": -818.114013671875, + "KL/rejected_KL_mean": -1049.8477783203125, + "KL/std": 528.82470703125, + "epoch": 0.7958883994126285, + "fcm_dpo/beta": 0.001048381207510829, + "fcm_dpo/delta": -0.09220831096172333, + "fcm_dpo/margin": 463.4676208496094, + "fcm_dpo/q_t": 0.3913062810897827, + "grad_norm": 52.26215362548828, + "learning_rate": 6.182866334636888e-08, + "logits/chosen": -0.9827414751052856, + "logits/rejected": -1.0148510932922363, + "logps/chosen": -643.865234375, + "logps/ref_chosen": -57.48497009277344, + "logps/ref_rejected": -96.47506713867188, + "logps/rejected": -1146.3228759765625, + "loss": 1.0568, + "margin_dpo/margin_mean": 463.46759033203125, + "margin_dpo/margin_std": 619.2740478515625, + "step": 542 + }, + { + "KL/chosen_KL_mean": -671.8050537109375, + "KL/mean": -835.260986328125, + "KL/rejected_KL_mean": -998.716796875, + "KL/std": 636.306640625, + "epoch": 0.7973568281938326, + "fcm_dpo/beta": 0.0010581112001091242, + "fcm_dpo/delta": 0.05605652183294296, + "fcm_dpo/margin": 326.9117431640625, + "fcm_dpo/q_t": 0.4326293468475342, + "grad_norm": 37.28949737548828, + "learning_rate": 6.098622674699147e-08, + "logits/chosen": -0.9363719820976257, + "logits/rejected": -0.9664100408554077, + "logps/chosen": -732.422607421875, + "logps/ref_chosen": -60.61750793457031, + "logps/ref_rejected": -105.59896850585938, + "logps/rejected": -1104.3157958984375, + "loss": 1.2008, + "margin_dpo/margin_mean": 326.91180419921875, + "margin_dpo/margin_std": 747.7957763671875, + "step": 543 + }, + { + "KL/chosen_KL_mean": -671.8202514648438, + "KL/mean": -865.7568359375, + "KL/rejected_KL_mean": -1059.6934814453125, + "KL/std": 506.9651794433594, + "epoch": 0.7988252569750367, + "fcm_dpo/beta": 0.0010604651179164648, + "fcm_dpo/delta": -0.011842611245810986, + "fcm_dpo/margin": 387.8731994628906, + "fcm_dpo/q_t": 0.4069485068321228, + "grad_norm": 34.128662109375, + "learning_rate": 6.01487708363232e-08, + "logits/chosen": -0.9132235050201416, + "logits/rejected": -0.9313616752624512, + "logps/chosen": -731.4625244140625, + "logps/ref_chosen": -59.642303466796875, + "logps/ref_rejected": -100.95469665527344, + "logps/rejected": -1160.648193359375, + "loss": 1.1033, + "margin_dpo/margin_mean": 387.8731994628906, + "margin_dpo/margin_std": 581.0994262695312, + "step": 544 + }, + { + "KL/chosen_KL_mean": -610.9091796875, + "KL/mean": -838.3101806640625, + "KL/rejected_KL_mean": -1065.711181640625, + "KL/std": 501.88525390625, + "epoch": 0.8002936857562408, + "fcm_dpo/beta": 0.001048812409862876, + "fcm_dpo/delta": -0.0808180570602417, + "fcm_dpo/margin": 454.80206298828125, + "fcm_dpo/q_t": 0.39258188009262085, + "grad_norm": 40.978694915771484, + "learning_rate": 5.9316317682106294e-08, + "logits/chosen": -0.8520915508270264, + "logits/rejected": -0.8862226009368896, + "logps/chosen": -678.5577392578125, + "logps/ref_chosen": -67.64859771728516, + "logps/ref_rejected": -95.90800476074219, + "logps/rejected": -1161.619140625, + "loss": 1.0465, + "margin_dpo/margin_mean": 454.80206298828125, + "margin_dpo/margin_std": 570.3173828125, + "step": 545 + }, + { + "KL/chosen_KL_mean": -588.1337890625, + "KL/mean": -736.3134155273438, + "KL/rejected_KL_mean": -884.4930419921875, + "KL/std": 446.0101013183594, + "epoch": 0.801762114537445, + "fcm_dpo/beta": 0.0010577274952083826, + "fcm_dpo/delta": 0.08934411406517029, + "fcm_dpo/margin": 296.3592529296875, + "fcm_dpo/q_t": 0.42593374848365784, + "grad_norm": 33.9541015625, + "learning_rate": 5.848888922025552e-08, + "logits/chosen": -0.9163818359375, + "logits/rejected": -0.9050056338310242, + "logps/chosen": -638.8780517578125, + "logps/ref_chosen": -50.744232177734375, + "logps/ref_rejected": -81.86622619628906, + "logps/rejected": -966.3592529296875, + "loss": 1.1611, + "margin_dpo/margin_mean": 296.3592529296875, + "margin_dpo/margin_std": 485.8372802734375, + "step": 546 + }, + { + "KL/chosen_KL_mean": -589.9801635742188, + "KL/mean": -785.5772094726562, + "KL/rejected_KL_mean": -981.17431640625, + "KL/std": 491.4225769042969, + "epoch": 0.8032305433186491, + "fcm_dpo/beta": 0.0010618357919156551, + "fcm_dpo/delta": -0.016077794134616852, + "fcm_dpo/margin": 391.194091796875, + "fcm_dpo/q_t": 0.4060874581336975, + "grad_norm": 40.24742126464844, + "learning_rate": 5.7666507254280265e-08, + "logits/chosen": -0.8674280643463135, + "logits/rejected": -0.881703794002533, + "logps/chosen": -663.6678466796875, + "logps/ref_chosen": -73.6877212524414, + "logps/ref_rejected": -90.76136779785156, + "logps/rejected": -1071.9356689453125, + "loss": 1.0896, + "margin_dpo/margin_mean": 391.194091796875, + "margin_dpo/margin_std": 540.9904174804688, + "step": 547 + }, + { + "KL/chosen_KL_mean": -622.0706787109375, + "KL/mean": -800.4874267578125, + "KL/rejected_KL_mean": -978.9041748046875, + "KL/std": 532.3212280273438, + "epoch": 0.8046989720998532, + "fcm_dpo/beta": 0.00106256443541497, + "fcm_dpo/delta": 0.021664846688508987, + "fcm_dpo/margin": 356.8335266113281, + "fcm_dpo/q_t": 0.41794323921203613, + "grad_norm": 31.893949508666992, + "learning_rate": 5.684919345471029e-08, + "logits/chosen": -0.9592008590698242, + "logits/rejected": -0.9574205875396729, + "logps/chosen": -687.3170166015625, + "logps/ref_chosen": -65.24634552001953, + "logps/ref_rejected": -94.11807250976562, + "logps/rejected": -1073.022216796875, + "loss": 1.1266, + "margin_dpo/margin_mean": 356.8335266113281, + "margin_dpo/margin_std": 595.5147094726562, + "step": 548 + }, + { + "KL/chosen_KL_mean": -648.3043823242188, + "KL/mean": -785.566162109375, + "KL/rejected_KL_mean": -922.8280639648438, + "KL/std": 443.6226501464844, + "epoch": 0.8061674008810573, + "fcm_dpo/beta": 0.0010651289485394955, + "fcm_dpo/delta": 0.011987905949354172, + "fcm_dpo/margin": 274.5236511230469, + "fcm_dpo/q_t": 0.4341329336166382, + "grad_norm": 56.424461364746094, + "learning_rate": 5.603696935852426e-08, + "logits/chosen": -0.9499194622039795, + "logits/rejected": -0.94138503074646, + "logps/chosen": -697.5167236328125, + "logps/ref_chosen": -49.21235656738281, + "logps/ref_rejected": -73.91031646728516, + "logps/rejected": -996.7384033203125, + "loss": 1.1934, + "margin_dpo/margin_mean": 274.52362060546875, + "margin_dpo/margin_std": 537.4019775390625, + "step": 549 + }, + { + "KL/chosen_KL_mean": -648.9557495117188, + "KL/mean": -816.33251953125, + "KL/rejected_KL_mean": -983.709228515625, + "KL/std": 498.76806640625, + "epoch": 0.8076358296622613, + "fcm_dpo/beta": 0.0010721642756834626, + "fcm_dpo/delta": 0.04264108091592789, + "fcm_dpo/margin": 334.7535095214844, + "fcm_dpo/q_t": 0.41808733344078064, + "grad_norm": 34.55923843383789, + "learning_rate": 5.5229856368582376e-08, + "logits/chosen": -0.898378849029541, + "logits/rejected": -0.9233511686325073, + "logps/chosen": -705.7626953125, + "logps/ref_chosen": -56.80695343017578, + "logps/ref_rejected": -95.12580871582031, + "logps/rejected": -1078.8350830078125, + "loss": 1.1307, + "margin_dpo/margin_mean": 334.7535400390625, + "margin_dpo/margin_std": 521.0529174804688, + "step": 550 + }, + { + "KL/chosen_KL_mean": -560.6070556640625, + "KL/mean": -831.8469848632812, + "KL/rejected_KL_mean": -1103.0869140625, + "KL/std": 532.996337890625, + "epoch": 0.8091042584434655, + "fcm_dpo/beta": 0.0010456846794113517, + "fcm_dpo/delta": -0.17770320177078247, + "fcm_dpo/margin": 542.4798583984375, + "fcm_dpo/q_t": 0.37006914615631104, + "grad_norm": 50.58546447753906, + "learning_rate": 5.4427875753062734e-08, + "logits/chosen": -0.8850421905517578, + "logits/rejected": -0.9424214363098145, + "logps/chosen": -619.71337890625, + "logps/ref_chosen": -59.10633087158203, + "logps/ref_rejected": -111.67280578613281, + "logps/rejected": -1214.759765625, + "loss": 0.9675, + "margin_dpo/margin_mean": 542.4798583984375, + "margin_dpo/margin_std": 536.1229858398438, + "step": 551 + }, + { + "KL/chosen_KL_mean": -546.4177856445312, + "KL/mean": -849.3590087890625, + "KL/rejected_KL_mean": -1152.300048828125, + "KL/std": 611.6688232421875, + "epoch": 0.8105726872246696, + "fcm_dpo/beta": 0.0009956832509487867, + "fcm_dpo/delta": -0.21994295716285706, + "fcm_dpo/margin": 605.8824462890625, + "fcm_dpo/q_t": 0.36827754974365234, + "grad_norm": 51.7211799621582, + "learning_rate": 5.363104864490034e-08, + "logits/chosen": -0.9471904039382935, + "logits/rejected": -0.9889096021652222, + "logps/chosen": -608.7723999023438, + "logps/ref_chosen": -62.35459899902344, + "logps/ref_rejected": -104.56210327148438, + "logps/rejected": -1256.8623046875, + "loss": 0.9761, + "margin_dpo/margin_mean": 605.8823852539062, + "margin_dpo/margin_std": 696.2559814453125, + "step": 552 + }, + { + "KL/chosen_KL_mean": -627.032958984375, + "KL/mean": -778.630859375, + "KL/rejected_KL_mean": -930.2286987304688, + "KL/std": 506.2027587890625, + "epoch": 0.8120411160058737, + "fcm_dpo/beta": 0.0010049683041870594, + "fcm_dpo/delta": 0.0984039306640625, + "fcm_dpo/margin": 303.19573974609375, + "fcm_dpo/q_t": 0.4325829744338989, + "grad_norm": 25.391754150390625, + "learning_rate": 5.2839396041230415e-08, + "logits/chosen": -0.9180362224578857, + "logits/rejected": -0.9116028547286987, + "logps/chosen": -695.291748046875, + "logps/ref_chosen": -68.25881958007812, + "logps/ref_rejected": -98.0971450805664, + "logps/rejected": -1028.325927734375, + "loss": 1.1778, + "margin_dpo/margin_mean": 303.1957702636719, + "margin_dpo/margin_std": 562.311279296875, + "step": 553 + }, + { + "KL/chosen_KL_mean": -641.80712890625, + "KL/mean": -855.0924072265625, + "KL/rejected_KL_mean": -1068.3775634765625, + "KL/std": 544.1128540039062, + "epoch": 0.8135095447870778, + "fcm_dpo/beta": 0.001014210982248187, + "fcm_dpo/delta": -0.034694697707891464, + "fcm_dpo/margin": 426.5704040527344, + "fcm_dpo/q_t": 0.40570682287216187, + "grad_norm": 55.65289306640625, + "learning_rate": 5.205293880283551e-08, + "logits/chosen": -0.9369876980781555, + "logits/rejected": -0.9231326580047607, + "logps/chosen": -709.7548217773438, + "logps/ref_chosen": -67.94767761230469, + "logps/ref_rejected": -89.78272247314453, + "logps/rejected": -1158.1602783203125, + "loss": 1.1142, + "margin_dpo/margin_mean": 426.5704345703125, + "margin_dpo/margin_std": 692.4230346679688, + "step": 554 + }, + { + "KL/chosen_KL_mean": -657.1424560546875, + "KL/mean": -902.9049072265625, + "KL/rejected_KL_mean": -1148.66748046875, + "KL/std": 580.0269165039062, + "epoch": 0.8149779735682819, + "fcm_dpo/beta": 0.0009933705441653728, + "fcm_dpo/delta": -0.09272074699401855, + "fcm_dpo/margin": 491.5249938964844, + "fcm_dpo/q_t": 0.3950349688529968, + "grad_norm": 36.912017822265625, + "learning_rate": 5.127169765359515e-08, + "logits/chosen": -0.9536832571029663, + "logits/rejected": -1.00516676902771, + "logps/chosen": -710.472900390625, + "logps/ref_chosen": -53.33049011230469, + "logps/ref_rejected": -108.47937774658203, + "logps/rejected": -1257.146728515625, + "loss": 1.0769, + "margin_dpo/margin_mean": 491.5250244140625, + "margin_dpo/margin_std": 748.8701171875, + "step": 555 + }, + { + "KL/chosen_KL_mean": -631.5589599609375, + "KL/mean": -781.698974609375, + "KL/rejected_KL_mean": -931.8389892578125, + "KL/std": 445.9133605957031, + "epoch": 0.8164464023494861, + "fcm_dpo/beta": 0.0010029294062405825, + "fcm_dpo/delta": 0.10188616812229156, + "fcm_dpo/margin": 300.2801513671875, + "fcm_dpo/q_t": 0.43013256788253784, + "grad_norm": 37.66986083984375, + "learning_rate": 5.049569317994012e-08, + "logits/chosen": -0.9434751272201538, + "logits/rejected": -0.9371851086616516, + "logps/chosen": -690.203369140625, + "logps/ref_chosen": -58.64447021484375, + "logps/ref_rejected": -101.34040832519531, + "logps/rejected": -1033.179443359375, + "loss": 1.1549, + "margin_dpo/margin_mean": 300.2801513671875, + "margin_dpo/margin_std": 445.45648193359375, + "step": 556 + }, + { + "KL/chosen_KL_mean": -681.3212890625, + "KL/mean": -898.03466796875, + "KL/rejected_KL_mean": -1114.748046875, + "KL/std": 606.8817749023438, + "epoch": 0.8179148311306902, + "fcm_dpo/beta": 0.00100015162024647, + "fcm_dpo/delta": -0.03522220626473427, + "fcm_dpo/margin": 433.4267883300781, + "fcm_dpo/q_t": 0.4047701060771942, + "grad_norm": 59.12196731567383, + "learning_rate": 4.9724945830310144e-08, + "logits/chosen": -0.974113404750824, + "logits/rejected": -1.007190227508545, + "logps/chosen": -749.1619873046875, + "logps/ref_chosen": -67.84066009521484, + "logps/ref_rejected": -109.93965911865234, + "logps/rejected": -1224.687744140625, + "loss": 1.107, + "margin_dpo/margin_mean": 433.4267883300781, + "margin_dpo/margin_std": 684.0028076171875, + "step": 557 + }, + { + "KL/chosen_KL_mean": -605.5770263671875, + "KL/mean": -900.3006591796875, + "KL/rejected_KL_mean": -1195.0242919921875, + "KL/std": 560.5341186523438, + "epoch": 0.8193832599118943, + "fcm_dpo/beta": 0.0009742493275552988, + "fcm_dpo/delta": -0.18480078876018524, + "fcm_dpo/margin": 589.447265625, + "fcm_dpo/q_t": 0.36719027161598206, + "grad_norm": 32.32392120361328, + "learning_rate": 4.8959475914614554e-08, + "logits/chosen": -0.9915221929550171, + "logits/rejected": -1.0101500749588013, + "logps/chosen": -667.9453125, + "logps/ref_chosen": -62.36824035644531, + "logps/ref_rejected": -102.16102600097656, + "logps/rejected": -1297.185302734375, + "loss": 0.9754, + "margin_dpo/margin_mean": 589.447265625, + "margin_dpo/margin_std": 615.0665283203125, + "step": 558 + }, + { + "KL/chosen_KL_mean": -699.533935546875, + "KL/mean": -948.122802734375, + "KL/rejected_KL_mean": -1196.711669921875, + "KL/std": 582.9520263671875, + "epoch": 0.8208516886930984, + "fcm_dpo/beta": 0.0009549415553919971, + "fcm_dpo/delta": -0.0784287303686142, + "fcm_dpo/margin": 497.1776428222656, + "fcm_dpo/q_t": 0.3929804563522339, + "grad_norm": 28.45130157470703, + "learning_rate": 4.8199303603697614e-08, + "logits/chosen": -1.1178151369094849, + "logits/rejected": -1.1277766227722168, + "logps/chosen": -760.2862548828125, + "logps/ref_chosen": -60.752323150634766, + "logps/ref_rejected": -93.44229125976562, + "logps/rejected": -1290.15380859375, + "loss": 1.0501, + "margin_dpo/margin_mean": 497.17767333984375, + "margin_dpo/margin_std": 634.4929809570312, + "step": 559 + }, + { + "KL/chosen_KL_mean": -626.045166015625, + "KL/mean": -804.8916015625, + "KL/rejected_KL_mean": -983.7379760742188, + "KL/std": 515.1402587890625, + "epoch": 0.8223201174743024, + "fcm_dpo/beta": 0.0009533166885375977, + "fcm_dpo/delta": 0.06086999550461769, + "fcm_dpo/margin": 357.6928405761719, + "fcm_dpo/q_t": 0.42201805114746094, + "grad_norm": 31.18995475769043, + "learning_rate": 4.7444448928806615e-08, + "logits/chosen": -0.8733669519424438, + "logits/rejected": -0.856816828250885, + "logps/chosen": -684.1489868164062, + "logps/ref_chosen": -58.10382080078125, + "logps/ref_rejected": -79.99122619628906, + "logps/rejected": -1063.729248046875, + "loss": 1.1422, + "margin_dpo/margin_mean": 357.69287109375, + "margin_dpo/margin_std": 554.28125, + "step": 560 + }, + { + "KL/chosen_KL_mean": -734.6890258789062, + "KL/mean": -880.0928344726562, + "KL/rejected_KL_mean": -1025.49658203125, + "KL/std": 505.81610107421875, + "epoch": 0.8237885462555066, + "fcm_dpo/beta": 0.0009805468143895268, + "fcm_dpo/delta": 0.11781884729862213, + "fcm_dpo/margin": 290.80755615234375, + "fcm_dpo/q_t": 0.43384015560150146, + "grad_norm": 41.6833610534668, + "learning_rate": 4.669493178106432e-08, + "logits/chosen": -1.0193910598754883, + "logits/rejected": -1.037698745727539, + "logps/chosen": -785.6019287109375, + "logps/ref_chosen": -50.912879943847656, + "logps/ref_rejected": -99.06856536865234, + "logps/rejected": -1124.565185546875, + "loss": 1.208, + "margin_dpo/margin_mean": 290.8075256347656, + "margin_dpo/margin_std": 622.505615234375, + "step": 561 + }, + { + "KL/chosen_KL_mean": -690.031982421875, + "KL/mean": -897.793701171875, + "KL/rejected_KL_mean": -1105.555419921875, + "KL/std": 561.4027099609375, + "epoch": 0.8252569750367107, + "fcm_dpo/beta": 0.0009777405066415668, + "fcm_dpo/delta": -0.007167506963014603, + "fcm_dpo/margin": 415.52349853515625, + "fcm_dpo/q_t": 0.40837323665618896, + "grad_norm": 40.292320251464844, + "learning_rate": 4.5950771910944596e-08, + "logits/chosen": -1.024315357208252, + "logits/rejected": -1.0398998260498047, + "logps/chosen": -749.496337890625, + "logps/ref_chosen": -59.46440124511719, + "logps/ref_rejected": -96.54266357421875, + "logps/rejected": -1202.09814453125, + "loss": 1.1004, + "margin_dpo/margin_mean": 415.52349853515625, + "margin_dpo/margin_std": 602.928955078125, + "step": 562 + }, + { + "KL/chosen_KL_mean": -751.924072265625, + "KL/mean": -913.3809204101562, + "KL/rejected_KL_mean": -1074.837890625, + "KL/std": 594.7562255859375, + "epoch": 0.8267254038179148, + "fcm_dpo/beta": 0.0009785356232896447, + "fcm_dpo/delta": -0.04188579320907593, + "fcm_dpo/margin": 322.9136657714844, + "fcm_dpo/q_t": 0.4237578213214874, + "grad_norm": 40.284427642822266, + "learning_rate": 4.521198892775202e-08, + "logits/chosen": -1.0153368711471558, + "logits/rejected": -1.0244905948638916, + "logps/chosen": -812.5322265625, + "logps/ref_chosen": -60.60819625854492, + "logps/ref_rejected": -94.56770324707031, + "logps/rejected": -1169.405517578125, + "loss": 1.2111, + "margin_dpo/margin_mean": 322.9136657714844, + "margin_dpo/margin_std": 696.208251953125, + "step": 563 + }, + { + "KL/chosen_KL_mean": -686.2769775390625, + "KL/mean": -883.5341796875, + "KL/rejected_KL_mean": -1080.7913818359375, + "KL/std": 529.4177856445312, + "epoch": 0.8281938325991189, + "fcm_dpo/beta": 0.0009786732262000442, + "fcm_dpo/delta": 0.01432707067579031, + "fcm_dpo/margin": 394.514404296875, + "fcm_dpo/q_t": 0.41156822443008423, + "grad_norm": 40.814979553222656, + "learning_rate": 4.447860229910544e-08, + "logits/chosen": -1.0610636472702026, + "logits/rejected": -1.053609013557434, + "logps/chosen": -760.5452880859375, + "logps/ref_chosen": -74.26837921142578, + "logps/ref_rejected": -93.23818969726562, + "logps/rejected": -1174.029541015625, + "loss": 1.0964, + "margin_dpo/margin_mean": 394.514404296875, + "margin_dpo/margin_std": 508.09417724609375, + "step": 564 + }, + { + "KL/chosen_KL_mean": -716.1942138671875, + "KL/mean": -929.130126953125, + "KL/rejected_KL_mean": -1142.0660400390625, + "KL/std": 609.8633422851562, + "epoch": 0.8296622613803231, + "fcm_dpo/beta": 0.0009748205775395036, + "fcm_dpo/delta": -0.015806902199983597, + "fcm_dpo/margin": 425.871826171875, + "fcm_dpo/q_t": 0.41027140617370605, + "grad_norm": 42.958003997802734, + "learning_rate": 4.375063135042445e-08, + "logits/chosen": -0.9631332159042358, + "logits/rejected": -0.9660812616348267, + "logps/chosen": -785.214111328125, + "logps/ref_chosen": -69.0199203491211, + "logps/ref_rejected": -85.7789306640625, + "logps/rejected": -1227.844970703125, + "loss": 1.127, + "margin_dpo/margin_mean": 425.871826171875, + "margin_dpo/margin_std": 730.54736328125, + "step": 565 + }, + { + "KL/chosen_KL_mean": -707.080322265625, + "KL/mean": -945.5238647460938, + "KL/rejected_KL_mean": -1183.967529296875, + "KL/std": 644.3311767578125, + "epoch": 0.8311306901615272, + "fcm_dpo/beta": 0.000970390741713345, + "fcm_dpo/delta": -0.06614132225513458, + "fcm_dpo/margin": 476.88720703125, + "fcm_dpo/q_t": 0.3978080153465271, + "grad_norm": 32.570377349853516, + "learning_rate": 4.3028095264420525e-08, + "logits/chosen": -1.0035604238510132, + "logits/rejected": -1.0268689393997192, + "logps/chosen": -773.6256103515625, + "logps/ref_chosen": -66.5453109741211, + "logps/ref_rejected": -103.86932373046875, + "logps/rejected": -1287.8367919921875, + "loss": 1.0997, + "margin_dpo/margin_mean": 476.88720703125, + "margin_dpo/margin_std": 747.5025634765625, + "step": 566 + }, + { + "KL/chosen_KL_mean": -654.31689453125, + "KL/mean": -836.738037109375, + "KL/rejected_KL_mean": -1019.1592407226562, + "KL/std": 439.9637756347656, + "epoch": 0.8325991189427313, + "fcm_dpo/beta": 0.000967850093729794, + "fcm_dpo/delta": 0.048623181879520416, + "fcm_dpo/margin": 364.84228515625, + "fcm_dpo/q_t": 0.4172418713569641, + "grad_norm": 42.263118743896484, + "learning_rate": 4.231101308059165e-08, + "logits/chosen": -1.077162504196167, + "logits/rejected": -1.0844173431396484, + "logps/chosen": -707.1751708984375, + "logps/ref_chosen": -52.85829544067383, + "logps/ref_rejected": -85.37095642089844, + "logps/rejected": -1104.5302734375, + "loss": 1.1145, + "margin_dpo/margin_mean": 364.8423156738281, + "margin_dpo/margin_std": 478.48101806640625, + "step": 567 + }, + { + "KL/chosen_KL_mean": -648.8751220703125, + "KL/mean": -889.5942993164062, + "KL/rejected_KL_mean": -1130.3134765625, + "KL/std": 514.7445068359375, + "epoch": 0.8340675477239354, + "fcm_dpo/beta": 0.0009604596998542547, + "fcm_dpo/delta": -0.06550342589616776, + "fcm_dpo/margin": 481.43841552734375, + "fcm_dpo/q_t": 0.3923587203025818, + "grad_norm": 30.015487670898438, + "learning_rate": 4.1599403694720145e-08, + "logits/chosen": -1.0153778791427612, + "logits/rejected": -1.0532267093658447, + "logps/chosen": -694.0675048828125, + "logps/ref_chosen": -45.1923828125, + "logps/ref_rejected": -89.09236907958984, + "logps/rejected": -1219.40576171875, + "loss": 1.0371, + "margin_dpo/margin_mean": 481.43841552734375, + "margin_dpo/margin_std": 534.50341796875, + "step": 568 + }, + { + "KL/chosen_KL_mean": -744.4910278320312, + "KL/mean": -949.3118896484375, + "KL/rejected_KL_mean": -1154.1328125, + "KL/std": 672.5451049804688, + "epoch": 0.8355359765051396, + "fcm_dpo/beta": 0.0009647671831771731, + "fcm_dpo/delta": 0.00415463000535965, + "fcm_dpo/margin": 409.6417236328125, + "fcm_dpo/q_t": 0.410520076751709, + "grad_norm": 49.2674446105957, + "learning_rate": 4.089328585837512e-08, + "logits/chosen": -1.025818109512329, + "logits/rejected": -1.0322705507278442, + "logps/chosen": -808.2116088867188, + "logps/ref_chosen": -63.72056198120117, + "logps/ref_rejected": -79.10325622558594, + "logps/rejected": -1233.236083984375, + "loss": 1.134, + "margin_dpo/margin_mean": 409.6417236328125, + "margin_dpo/margin_std": 681.7449951171875, + "step": 569 + }, + { + "KL/chosen_KL_mean": -687.3922119140625, + "KL/mean": -874.5309448242188, + "KL/rejected_KL_mean": -1061.669677734375, + "KL/std": 541.8402709960938, + "epoch": 0.8370044052863436, + "fcm_dpo/beta": 0.0009631971479393542, + "fcm_dpo/delta": 0.040986284613609314, + "fcm_dpo/margin": 374.2774963378906, + "fcm_dpo/q_t": 0.4191049039363861, + "grad_norm": 29.400341033935547, + "learning_rate": 4.019267817841834e-08, + "logits/chosen": -1.1264129877090454, + "logits/rejected": -1.1218767166137695, + "logps/chosen": -749.0067749023438, + "logps/ref_chosen": -61.61454391479492, + "logps/ref_rejected": -82.14186096191406, + "logps/rejected": -1143.8115234375, + "loss": 1.1329, + "margin_dpo/margin_mean": 374.2774963378906, + "margin_dpo/margin_std": 585.3759155273438, + "step": 570 + }, + { + "KL/chosen_KL_mean": -716.3717041015625, + "KL/mean": -937.341796875, + "KL/rejected_KL_mean": -1158.3118896484375, + "KL/std": 546.9036865234375, + "epoch": 0.8384728340675477, + "fcm_dpo/beta": 0.0009610787965357304, + "fcm_dpo/delta": -0.02597730979323387, + "fcm_dpo/margin": 441.94012451171875, + "fcm_dpo/q_t": 0.40491753816604614, + "grad_norm": 41.382713317871094, + "learning_rate": 3.9497599116513705e-08, + "logits/chosen": -1.0316365957260132, + "logits/rejected": -1.0433616638183594, + "logps/chosen": -769.42578125, + "logps/ref_chosen": -53.05406188964844, + "logps/ref_rejected": -91.33682250976562, + "logps/rejected": -1249.648681640625, + "loss": 1.1022, + "margin_dpo/margin_mean": 441.94012451171875, + "margin_dpo/margin_std": 674.6705322265625, + "step": 571 + }, + { + "KL/chosen_KL_mean": -752.6365966796875, + "KL/mean": -983.4873046875, + "KL/rejected_KL_mean": -1214.3380126953125, + "KL/std": 640.98876953125, + "epoch": 0.8399412628487518, + "fcm_dpo/beta": 0.000953345384914428, + "fcm_dpo/delta": -0.04224724695086479, + "fcm_dpo/margin": 461.7014465332031, + "fcm_dpo/q_t": 0.4049929678440094, + "grad_norm": 28.851036071777344, + "learning_rate": 3.880806698864086e-08, + "logits/chosen": -1.074343204498291, + "logits/rejected": -1.1045624017715454, + "logps/chosen": -801.095947265625, + "logps/ref_chosen": -48.45928955078125, + "logps/ref_rejected": -83.55703735351562, + "logps/rejected": -1297.89501953125, + "loss": 1.1089, + "margin_dpo/margin_mean": 461.7014465332031, + "margin_dpo/margin_std": 761.991943359375, + "step": 572 + }, + { + "KL/chosen_KL_mean": -734.09912109375, + "KL/mean": -932.6853637695312, + "KL/rejected_KL_mean": -1131.271728515625, + "KL/std": 566.801513671875, + "epoch": 0.8414096916299559, + "fcm_dpo/beta": 0.0009574309224262834, + "fcm_dpo/delta": 0.020482124760746956, + "fcm_dpo/margin": 397.17266845703125, + "fcm_dpo/q_t": 0.4144536852836609, + "grad_norm": 25.593852996826172, + "learning_rate": 3.812409996461275e-08, + "logits/chosen": -1.0817201137542725, + "logits/rejected": -1.092029333114624, + "logps/chosen": -785.7216796875, + "logps/ref_chosen": -51.62262725830078, + "logps/ref_rejected": -85.32499694824219, + "logps/rejected": -1216.5966796875, + "loss": 1.108, + "margin_dpo/margin_mean": 397.17266845703125, + "margin_dpo/margin_std": 568.3775634765625, + "step": 573 + }, + { + "KL/chosen_KL_mean": -666.8729248046875, + "KL/mean": -884.59521484375, + "KL/rejected_KL_mean": -1102.317626953125, + "KL/std": 510.1865234375, + "epoch": 0.8428781204111601, + "fcm_dpo/beta": 0.0009572736453264952, + "fcm_dpo/delta": -0.017637627199292183, + "fcm_dpo/margin": 435.4447021484375, + "fcm_dpo/q_t": 0.40490391850471497, + "grad_norm": 34.41420364379883, + "learning_rate": 3.74457160675965e-08, + "logits/chosen": -1.0834131240844727, + "logits/rejected": -1.1097838878631592, + "logps/chosen": -717.9173583984375, + "logps/ref_chosen": -51.04446029663086, + "logps/ref_rejected": -92.80640411376953, + "logps/rejected": -1195.1240234375, + "loss": 1.0865, + "margin_dpo/margin_mean": 435.4447021484375, + "margin_dpo/margin_std": 589.404296875, + "step": 574 + }, + { + "KL/chosen_KL_mean": -747.3386840820312, + "KL/mean": -951.7550659179688, + "KL/rejected_KL_mean": -1156.17138671875, + "KL/std": 525.8989868164062, + "epoch": 0.8443465491923642, + "fcm_dpo/beta": 0.000948374392464757, + "fcm_dpo/delta": 0.011579148471355438, + "fcm_dpo/margin": 408.832763671875, + "fcm_dpo/q_t": 0.4119390845298767, + "grad_norm": 41.16646957397461, + "learning_rate": 3.677293317363864e-08, + "logits/chosen": -0.9548108577728271, + "logits/rejected": -0.960533618927002, + "logps/chosen": -819.1287841796875, + "logps/ref_chosen": -71.7901382446289, + "logps/ref_rejected": -95.38619995117188, + "logps/rejected": -1251.5576171875, + "loss": 1.1358, + "margin_dpo/margin_mean": 408.8327941894531, + "margin_dpo/margin_std": 670.8654174804688, + "step": 575 + }, + { + "KL/chosen_KL_mean": -732.2178955078125, + "KL/mean": -887.31591796875, + "KL/rejected_KL_mean": -1042.4139404296875, + "KL/std": 495.1169128417969, + "epoch": 0.8458149779735683, + "fcm_dpo/beta": 0.0009677187772467732, + "fcm_dpo/delta": 0.10300955176353455, + "fcm_dpo/margin": 310.19610595703125, + "fcm_dpo/q_t": 0.4320542812347412, + "grad_norm": 33.871307373046875, + "learning_rate": 3.6105769011194224e-08, + "logits/chosen": -1.1345970630645752, + "logits/rejected": -1.1627776622772217, + "logps/chosen": -786.4808349609375, + "logps/ref_chosen": -54.262962341308594, + "logps/ref_rejected": -100.75428009033203, + "logps/rejected": -1143.168212890625, + "loss": 1.1832, + "margin_dpo/margin_mean": 310.1961364746094, + "margin_dpo/margin_std": 572.8515014648438, + "step": 576 + }, + { + "KL/chosen_KL_mean": -644.5338134765625, + "KL/mean": -845.4898681640625, + "KL/rejected_KL_mean": -1046.4459228515625, + "KL/std": 548.8078002929688, + "epoch": 0.8472834067547724, + "fcm_dpo/beta": 0.0009760315297171474, + "fcm_dpo/delta": 0.008000888861715794, + "fcm_dpo/margin": 401.9121398925781, + "fcm_dpo/q_t": 0.4116186499595642, + "grad_norm": 26.741127014160156, + "learning_rate": 3.5444241160659304e-08, + "logits/chosen": -1.0345063209533691, + "logits/rejected": -1.0221607685089111, + "logps/chosen": -706.4434814453125, + "logps/ref_chosen": -61.909706115722656, + "logps/ref_rejected": -84.07069396972656, + "logps/rejected": -1130.5166015625, + "loss": 1.1172, + "margin_dpo/margin_mean": 401.912109375, + "margin_dpo/margin_std": 588.481201171875, + "step": 577 + }, + { + "KL/chosen_KL_mean": -641.31494140625, + "KL/mean": -864.2884521484375, + "KL/rejected_KL_mean": -1087.2618408203125, + "KL/std": 547.829833984375, + "epoch": 0.8487518355359766, + "fcm_dpo/beta": 0.0009675461915321648, + "fcm_dpo/delta": -0.033737167716026306, + "fcm_dpo/margin": 445.94696044921875, + "fcm_dpo/q_t": 0.40107935667037964, + "grad_norm": 35.5023078918457, + "learning_rate": 3.478836705390808e-08, + "logits/chosen": -0.9391261339187622, + "logits/rejected": -0.9697315692901611, + "logps/chosen": -690.57861328125, + "logps/ref_chosen": -49.26368713378906, + "logps/ref_rejected": -83.4362564086914, + "logps/rejected": -1170.6981201171875, + "loss": 1.0675, + "margin_dpo/margin_mean": 445.94696044921875, + "margin_dpo/margin_std": 543.1402587890625, + "step": 578 + }, + { + "KL/chosen_KL_mean": -739.6239013671875, + "KL/mean": -874.08740234375, + "KL/rejected_KL_mean": -1008.5507202148438, + "KL/std": 546.8580932617188, + "epoch": 0.8502202643171806, + "fcm_dpo/beta": 0.000979724689386785, + "fcm_dpo/delta": 0.04294705390930176, + "fcm_dpo/margin": 268.92694091796875, + "fcm_dpo/q_t": 0.4389345049858093, + "grad_norm": 53.29568099975586, + "learning_rate": 3.41381639738331e-08, + "logits/chosen": -0.9918534755706787, + "logits/rejected": -0.9905188083648682, + "logps/chosen": -798.509765625, + "logps/ref_chosen": -58.88581848144531, + "logps/ref_rejected": -94.78762817382812, + "logps/rejected": -1103.33837890625, + "loss": 1.2205, + "margin_dpo/margin_mean": 268.92694091796875, + "margin_dpo/margin_std": 589.2688598632812, + "step": 579 + }, + { + "KL/chosen_KL_mean": -540.1065673828125, + "KL/mean": -791.6300048828125, + "KL/rejected_KL_mean": -1043.1533203125, + "KL/std": 603.1106567382812, + "epoch": 0.8516886930983847, + "fcm_dpo/beta": 0.0009632025612518191, + "fcm_dpo/delta": -0.08945266157388687, + "fcm_dpo/margin": 503.046875, + "fcm_dpo/q_t": 0.3941301107406616, + "grad_norm": 30.890201568603516, + "learning_rate": 3.349364905389032e-08, + "logits/chosen": -0.8625004291534424, + "logits/rejected": -0.8941880464553833, + "logps/chosen": -588.8133544921875, + "logps/ref_chosen": -48.70683670043945, + "logps/ref_rejected": -81.7583999633789, + "logps/rejected": -1124.911865234375, + "loss": 1.0525, + "margin_dpo/margin_mean": 503.046875, + "margin_dpo/margin_std": 688.0292358398438, + "step": 580 + }, + { + "KL/chosen_KL_mean": -729.04736328125, + "KL/mean": -905.5509033203125, + "KL/rejected_KL_mean": -1082.054443359375, + "KL/std": 574.6087036132812, + "epoch": 0.8531571218795888, + "fcm_dpo/beta": 0.0009719936642795801, + "fcm_dpo/delta": 0.05880071595311165, + "fcm_dpo/margin": 353.0070495605469, + "fcm_dpo/q_t": 0.42395222187042236, + "grad_norm": 40.021026611328125, + "learning_rate": 3.285483927764726e-08, + "logits/chosen": -1.0945156812667847, + "logits/rejected": -1.103161334991455, + "logps/chosen": -791.2696533203125, + "logps/ref_chosen": -62.22235107421875, + "logps/ref_rejected": -91.73568725585938, + "logps/rejected": -1173.7901611328125, + "loss": 1.1562, + "margin_dpo/margin_mean": 353.0070495605469, + "margin_dpo/margin_std": 629.962158203125, + "step": 581 + }, + { + "KL/chosen_KL_mean": -639.3220825195312, + "KL/mean": -835.397216796875, + "KL/rejected_KL_mean": -1031.472412109375, + "KL/std": 496.10870361328125, + "epoch": 0.8546255506607929, + "fcm_dpo/beta": 0.0009659301722422242, + "fcm_dpo/delta": -0.08082351088523865, + "fcm_dpo/margin": 392.1502990722656, + "fcm_dpo/q_t": 0.4117897152900696, + "grad_norm": 30.24727439880371, + "learning_rate": 3.222175147833556e-08, + "logits/chosen": -1.0156798362731934, + "logits/rejected": -1.039165735244751, + "logps/chosen": -697.55078125, + "logps/ref_chosen": -58.228660583496094, + "logps/ref_rejected": -110.06959533691406, + "logps/rejected": -1141.5419921875, + "loss": 1.1133, + "margin_dpo/margin_mean": 392.1502990722656, + "margin_dpo/margin_std": 531.6431884765625, + "step": 582 + }, + { + "KL/chosen_KL_mean": -728.5455322265625, + "KL/mean": -847.5494384765625, + "KL/rejected_KL_mean": -966.553466796875, + "KL/std": 537.0501098632812, + "epoch": 0.856093979441997, + "fcm_dpo/beta": 0.0009584878571331501, + "fcm_dpo/delta": 0.0019017525482922792, + "fcm_dpo/margin": 238.00784301757812, + "fcm_dpo/q_t": 0.44876495003700256, + "grad_norm": 62.559593200683594, + "learning_rate": 3.159440233840763e-08, + "logits/chosen": -0.9803950190544128, + "logits/rejected": -0.9772003293037415, + "logps/chosen": -785.408447265625, + "logps/ref_chosen": -56.86286163330078, + "logps/ref_rejected": -88.4039306640625, + "logps/rejected": -1054.957275390625, + "loss": 1.2647, + "margin_dpo/margin_mean": 238.00784301757812, + "margin_dpo/margin_std": 635.57080078125, + "step": 583 + }, + { + "KL/chosen_KL_mean": -625.9443359375, + "KL/mean": -869.571044921875, + "KL/rejected_KL_mean": -1113.19775390625, + "KL/std": 543.7821044921875, + "epoch": 0.8575624082232012, + "fcm_dpo/beta": 0.0009496349957771599, + "fcm_dpo/delta": -0.0657280907034874, + "fcm_dpo/margin": 487.2535400390625, + "fcm_dpo/q_t": 0.39483463764190674, + "grad_norm": 34.74457931518555, + "learning_rate": 3.0972808389096635e-08, + "logits/chosen": -1.0310046672821045, + "logits/rejected": -1.0454175472259521, + "logps/chosen": -682.844970703125, + "logps/ref_chosen": -56.90068054199219, + "logps/ref_rejected": -97.63606262207031, + "logps/rejected": -1210.833984375, + "loss": 1.0459, + "margin_dpo/margin_mean": 487.2535400390625, + "margin_dpo/margin_std": 577.2208862304688, + "step": 584 + }, + { + "KL/chosen_KL_mean": -723.0252075195312, + "KL/mean": -949.249267578125, + "KL/rejected_KL_mean": -1175.473388671875, + "KL/std": 627.5142822265625, + "epoch": 0.8590308370044053, + "fcm_dpo/beta": 0.0009398453403264284, + "fcm_dpo/delta": -0.02655157260596752, + "fcm_dpo/margin": 452.44818115234375, + "fcm_dpo/q_t": 0.4046263098716736, + "grad_norm": 30.642621994018555, + "learning_rate": 3.035698600998121e-08, + "logits/chosen": -1.0402522087097168, + "logits/rejected": -1.065436840057373, + "logps/chosen": -783.9991455078125, + "logps/ref_chosen": -60.973968505859375, + "logps/ref_rejected": -84.16952514648438, + "logps/rejected": -1259.6429443359375, + "loss": 1.1158, + "margin_dpo/margin_mean": 452.44818115234375, + "margin_dpo/margin_std": 744.0001220703125, + "step": 585 + }, + { + "KL/chosen_KL_mean": -759.1077880859375, + "KL/mean": -908.213134765625, + "KL/rejected_KL_mean": -1057.318359375, + "KL/std": 538.911865234375, + "epoch": 0.8604992657856094, + "fcm_dpo/beta": 0.0009562649065628648, + "fcm_dpo/delta": 0.1183374673128128, + "fcm_dpo/margin": 298.2107238769531, + "fcm_dpo/q_t": 0.4345587491989136, + "grad_norm": 30.652240753173828, + "learning_rate": 2.974695142855388e-08, + "logits/chosen": -1.0284502506256104, + "logits/rejected": -1.0488755702972412, + "logps/chosen": -815.96337890625, + "logps/ref_chosen": -56.85559844970703, + "logps/ref_rejected": -91.80261993408203, + "logps/rejected": -1149.12109375, + "loss": 1.1994, + "margin_dpo/margin_mean": 298.2107238769531, + "margin_dpo/margin_std": 598.6849365234375, + "step": 586 + }, + { + "KL/chosen_KL_mean": -528.1431884765625, + "KL/mean": -738.4869995117188, + "KL/rejected_KL_mean": -948.830810546875, + "KL/std": 569.4769897460938, + "epoch": 0.8619676945668135, + "fcm_dpo/beta": 0.0009647482074797153, + "fcm_dpo/delta": -0.006192212924361229, + "fcm_dpo/margin": 420.68768310546875, + "fcm_dpo/q_t": 0.40699630975723267, + "grad_norm": 45.825714111328125, + "learning_rate": 2.9142720719793122e-08, + "logits/chosen": -1.0657103061676025, + "logits/rejected": -1.09328293800354, + "logps/chosen": -572.8347778320312, + "logps/ref_chosen": -44.69159698486328, + "logps/ref_rejected": -82.62385559082031, + "logps/rejected": -1031.4547119140625, + "loss": 1.0913, + "margin_dpo/margin_mean": 420.68768310546875, + "margin_dpo/margin_std": 569.4257202148438, + "step": 587 + }, + { + "KL/chosen_KL_mean": -687.0555419921875, + "KL/mean": -863.8863525390625, + "KL/rejected_KL_mean": -1040.7171630859375, + "KL/std": 487.860107421875, + "epoch": 0.8634361233480177, + "fcm_dpo/beta": 0.0009668685379438102, + "fcm_dpo/delta": 0.05999944359064102, + "fcm_dpo/margin": 353.66156005859375, + "fcm_dpo/q_t": 0.42055660486221313, + "grad_norm": 26.90322494506836, + "learning_rate": 2.8544309805740018e-08, + "logits/chosen": -1.039747714996338, + "logits/rejected": -1.066466212272644, + "logps/chosen": -737.3505249023438, + "logps/ref_chosen": -50.29494857788086, + "logps/ref_rejected": -107.36988067626953, + "logps/rejected": -1148.087158203125, + "loss": 1.1351, + "margin_dpo/margin_mean": 353.66156005859375, + "margin_dpo/margin_std": 518.2518310546875, + "step": 588 + }, + { + "KL/chosen_KL_mean": -697.592529296875, + "KL/mean": -931.67236328125, + "KL/rejected_KL_mean": -1165.752197265625, + "KL/std": 557.1414184570312, + "epoch": 0.8649045521292217, + "fcm_dpo/beta": 0.0009669238934293389, + "fcm_dpo/delta": -0.055137749761343, + "fcm_dpo/margin": 468.15966796875, + "fcm_dpo/q_t": 0.39669230580329895, + "grad_norm": 27.10498046875, + "learning_rate": 2.7951734455078786e-08, + "logits/chosen": -1.0181684494018555, + "logits/rejected": -1.0344040393829346, + "logps/chosen": -757.5223999023438, + "logps/ref_chosen": -59.929908752441406, + "logps/ref_rejected": -111.65534973144531, + "logps/rejected": -1277.407470703125, + "loss": 1.058, + "margin_dpo/margin_mean": 468.1596984863281, + "margin_dpo/margin_std": 587.922607421875, + "step": 589 + }, + { + "KL/chosen_KL_mean": -597.85400390625, + "KL/mean": -826.746826171875, + "KL/rejected_KL_mean": -1055.6396484375, + "KL/std": 536.9008178710938, + "epoch": 0.8663729809104258, + "fcm_dpo/beta": 0.0009563218918628991, + "fcm_dpo/delta": -0.039607785642147064, + "fcm_dpo/margin": 457.78570556640625, + "fcm_dpo/q_t": 0.40053310990333557, + "grad_norm": 27.430288314819336, + "learning_rate": 2.736501028272095e-08, + "logits/chosen": -0.9699843525886536, + "logits/rejected": -0.9988424777984619, + "logps/chosen": -653.663818359375, + "logps/ref_chosen": -55.80979537963867, + "logps/ref_rejected": -106.06282043457031, + "logps/rejected": -1161.7025146484375, + "loss": 1.0661, + "margin_dpo/margin_mean": 457.78570556640625, + "margin_dpo/margin_std": 581.5162353515625, + "step": 590 + }, + { + "KL/chosen_KL_mean": -660.5640869140625, + "KL/mean": -869.3895874023438, + "KL/rejected_KL_mean": -1078.215087890625, + "KL/std": 518.4139404296875, + "epoch": 0.8678414096916299, + "fcm_dpo/beta": 0.0009555625729262829, + "fcm_dpo/delta": 0.0009453542297706008, + "fcm_dpo/margin": 417.65093994140625, + "fcm_dpo/q_t": 0.4076194763183594, + "grad_norm": 31.656789779663086, + "learning_rate": 2.678415274939408e-08, + "logits/chosen": -1.0377655029296875, + "logits/rejected": -1.0326879024505615, + "logps/chosen": -716.8046875, + "logps/ref_chosen": -56.24061965942383, + "logps/ref_rejected": -83.78629302978516, + "logps/rejected": -1162.0013427734375, + "loss": 1.0991, + "margin_dpo/margin_mean": 417.65093994140625, + "margin_dpo/margin_std": 582.1824951171875, + "step": 591 + }, + { + "KL/chosen_KL_mean": -691.4828491210938, + "KL/mean": -881.25439453125, + "KL/rejected_KL_mean": -1071.02587890625, + "KL/std": 527.1260375976562, + "epoch": 0.869309838472834, + "fcm_dpo/beta": 0.0009605808882042766, + "fcm_dpo/delta": 0.03677193447947502, + "fcm_dpo/margin": 379.5429992675781, + "fcm_dpo/q_t": 0.41771793365478516, + "grad_norm": 32.95262908935547, + "learning_rate": 2.6209177161234442e-08, + "logits/chosen": -1.0429775714874268, + "logits/rejected": -1.0484647750854492, + "logps/chosen": -739.423095703125, + "logps/ref_chosen": -47.94025421142578, + "logps/ref_rejected": -75.73287963867188, + "logps/rejected": -1146.7587890625, + "loss": 1.1662, + "margin_dpo/margin_mean": 379.54296875, + "margin_dpo/margin_std": 707.2916870117188, + "step": 592 + }, + { + "KL/chosen_KL_mean": -653.00244140625, + "KL/mean": -799.3251953125, + "KL/rejected_KL_mean": -945.64794921875, + "KL/std": 570.3946533203125, + "epoch": 0.8707782672540382, + "fcm_dpo/beta": 0.0009675570763647556, + "fcm_dpo/delta": 0.024251248687505722, + "fcm_dpo/margin": 292.6455383300781, + "fcm_dpo/q_t": 0.4363827705383301, + "grad_norm": 49.979095458984375, + "learning_rate": 2.564009866938349e-08, + "logits/chosen": -0.8947024345397949, + "logits/rejected": -0.8848444819450378, + "logps/chosen": -701.6932373046875, + "logps/ref_chosen": -48.690757751464844, + "logps/ref_rejected": -60.90800094604492, + "logps/rejected": -1006.555908203125, + "loss": 1.2117, + "margin_dpo/margin_mean": 292.6455383300781, + "margin_dpo/margin_std": 629.4410400390625, + "step": 593 + }, + { + "KL/chosen_KL_mean": -645.703369140625, + "KL/mean": -829.5958251953125, + "KL/rejected_KL_mean": -1013.4883422851562, + "KL/std": 561.0531616210938, + "epoch": 0.8722466960352423, + "fcm_dpo/beta": 0.0009798401733860373, + "fcm_dpo/delta": 0.03994458168745041, + "fcm_dpo/margin": 367.78497314453125, + "fcm_dpo/q_t": 0.41849082708358765, + "grad_norm": 37.234134674072266, + "learning_rate": 2.5076932269588708e-08, + "logits/chosen": -1.017820954322815, + "logits/rejected": -1.0110870599746704, + "logps/chosen": -700.6382446289062, + "logps/ref_chosen": -54.93488693237305, + "logps/ref_rejected": -86.09967803955078, + "logps/rejected": -1099.5880126953125, + "loss": 1.1434, + "margin_dpo/margin_mean": 367.7850036621094, + "margin_dpo/margin_std": 599.2181396484375, + "step": 594 + }, + { + "KL/chosen_KL_mean": -591.552001953125, + "KL/mean": -793.7593994140625, + "KL/rejected_KL_mean": -995.966796875, + "KL/std": 504.74029541015625, + "epoch": 0.8737151248164464, + "fcm_dpo/beta": 0.0009743094560690224, + "fcm_dpo/delta": 0.006154121831059456, + "fcm_dpo/margin": 404.4147033691406, + "fcm_dpo/q_t": 0.41215771436691284, + "grad_norm": 41.969970703125, + "learning_rate": 2.451969280180849e-08, + "logits/chosen": -1.0084481239318848, + "logits/rejected": -1.0286178588867188, + "logps/chosen": -640.972412109375, + "logps/ref_chosen": -49.4204216003418, + "logps/ref_rejected": -80.62731170654297, + "logps/rejected": -1076.593994140625, + "loss": 1.1, + "margin_dpo/margin_mean": 404.41473388671875, + "margin_dpo/margin_std": 568.5491333007812, + "step": 595 + }, + { + "KL/chosen_KL_mean": -683.7548828125, + "KL/mean": -831.98193359375, + "KL/rejected_KL_mean": -980.2089233398438, + "KL/std": 516.1655883789062, + "epoch": 0.8751835535976505, + "fcm_dpo/beta": 0.000993602559901774, + "fcm_dpo/delta": 0.10849238932132721, + "fcm_dpo/margin": 296.4541015625, + "fcm_dpo/q_t": 0.43519163131713867, + "grad_norm": 55.20982360839844, + "learning_rate": 2.396839494982103e-08, + "logits/chosen": -0.9886128306388855, + "logits/rejected": -0.9605743885040283, + "logps/chosen": -743.546630859375, + "logps/ref_chosen": -59.791683197021484, + "logps/ref_rejected": -80.09111785888672, + "logps/rejected": -1060.300048828125, + "loss": 1.1997, + "margin_dpo/margin_mean": 296.4541015625, + "margin_dpo/margin_std": 616.732421875, + "step": 596 + }, + { + "KL/chosen_KL_mean": -663.219482421875, + "KL/mean": -916.369384765625, + "KL/rejected_KL_mean": -1169.5191650390625, + "KL/std": 611.7984619140625, + "epoch": 0.8766519823788547, + "fcm_dpo/beta": 0.0009732224280014634, + "fcm_dpo/delta": -0.10066782683134079, + "fcm_dpo/margin": 506.2997131347656, + "fcm_dpo/q_t": 0.3910676836967468, + "grad_norm": 28.543447494506836, + "learning_rate": 2.3423053240837514e-08, + "logits/chosen": -0.9715480208396912, + "logits/rejected": -1.0194578170776367, + "logps/chosen": -720.4802856445312, + "logps/ref_chosen": -57.26078796386719, + "logps/ref_rejected": -100.6937255859375, + "logps/rejected": -1270.212890625, + "loss": 1.0574, + "margin_dpo/margin_mean": 506.2997131347656, + "margin_dpo/margin_std": 674.1318359375, + "step": 597 + }, + { + "KL/chosen_KL_mean": -661.1876831054688, + "KL/mean": -860.0836791992188, + "KL/rejected_KL_mean": -1058.979736328125, + "KL/std": 523.8168334960938, + "epoch": 0.8781204111600588, + "fcm_dpo/beta": 0.0009721480309963226, + "fcm_dpo/delta": 0.012626536190509796, + "fcm_dpo/margin": 397.7920227050781, + "fcm_dpo/q_t": 0.4108089506626129, + "grad_norm": 44.60902404785156, + "learning_rate": 2.2883682045119062e-08, + "logits/chosen": -0.9934415817260742, + "logits/rejected": -1.0047008991241455, + "logps/chosen": -713.7061767578125, + "logps/ref_chosen": -52.51850509643555, + "logps/ref_rejected": -89.44385528564453, + "logps/rejected": -1148.423583984375, + "loss": 1.1157, + "margin_dpo/margin_mean": 397.79205322265625, + "margin_dpo/margin_std": 574.5714721679688, + "step": 598 + }, + { + "KL/chosen_KL_mean": -680.6580810546875, + "KL/mean": -848.9474487304688, + "KL/rejected_KL_mean": -1017.2368774414062, + "KL/std": 514.0728149414062, + "epoch": 0.8795888399412628, + "fcm_dpo/beta": 0.0009692448657006025, + "fcm_dpo/delta": -0.05783551558852196, + "fcm_dpo/margin": 336.578857421875, + "fcm_dpo/q_t": 0.42119812965393066, + "grad_norm": 29.521316528320312, + "learning_rate": 2.2350295575598367e-08, + "logits/chosen": -0.9816111326217651, + "logits/rejected": -0.9900449514389038, + "logps/chosen": -730.460693359375, + "logps/ref_chosen": -49.802677154541016, + "logps/ref_rejected": -82.978515625, + "logps/rejected": -1100.2154541015625, + "loss": 1.1461, + "margin_dpo/margin_mean": 336.578857421875, + "margin_dpo/margin_std": 485.81683349609375, + "step": 599 + }, + { + "KL/chosen_KL_mean": -738.8037109375, + "KL/mean": -900.9039306640625, + "KL/rejected_KL_mean": -1063.004150390625, + "KL/std": 517.9166870117188, + "epoch": 0.8810572687224669, + "fcm_dpo/beta": 0.0009796018712222576, + "fcm_dpo/delta": 0.08520510792732239, + "fcm_dpo/margin": 324.20037841796875, + "fcm_dpo/q_t": 0.42857182025909424, + "grad_norm": 27.683170318603516, + "learning_rate": 2.1822907887504932e-08, + "logits/chosen": -1.0804599523544312, + "logits/rejected": -1.078963279724121, + "logps/chosen": -805.2385864257812, + "logps/ref_chosen": -66.43487548828125, + "logps/ref_rejected": -85.45649719238281, + "logps/rejected": -1148.4605712890625, + "loss": 1.1791, + "margin_dpo/margin_mean": 324.20037841796875, + "margin_dpo/margin_std": 615.3521118164062, + "step": 600 + }, + { + "KL/chosen_KL_mean": -733.8780517578125, + "KL/mean": -939.5074462890625, + "KL/rejected_KL_mean": -1145.13671875, + "KL/std": 553.6764526367188, + "epoch": 0.882525697503671, + "fcm_dpo/beta": 0.0009836689569056034, + "fcm_dpo/delta": -0.004790919832885265, + "fcm_dpo/margin": 411.2587890625, + "fcm_dpo/q_t": 0.4059777557849884, + "grad_norm": 35.56853103637695, + "learning_rate": 2.1301532877994742e-08, + "logits/chosen": -0.9942201972007751, + "logits/rejected": -1.0163451433181763, + "logps/chosen": -793.01171875, + "logps/ref_chosen": -59.13361358642578, + "logps/ref_rejected": -94.69093322753906, + "logps/rejected": -1239.8277587890625, + "loss": 1.0895, + "margin_dpo/margin_mean": 411.2587890625, + "margin_dpo/margin_std": 544.517822265625, + "step": 601 + }, + { + "KL/chosen_KL_mean": -508.48651123046875, + "KL/mean": -759.9006958007812, + "KL/rejected_KL_mean": -1011.3148803710938, + "KL/std": 511.07110595703125, + "epoch": 0.8839941262848752, + "fcm_dpo/beta": 0.0009767541196197271, + "fcm_dpo/delta": -0.09598802030086517, + "fcm_dpo/margin": 502.8282775878906, + "fcm_dpo/q_t": 0.3871699869632721, + "grad_norm": 64.79503631591797, + "learning_rate": 2.0786184285784298e-08, + "logits/chosen": -1.037233591079712, + "logits/rejected": -1.0741159915924072, + "logps/chosen": -557.080078125, + "logps/ref_chosen": -48.59352111816406, + "logps/ref_rejected": -87.6685562133789, + "logps/rejected": -1098.9833984375, + "loss": 1.0188, + "margin_dpo/margin_mean": 502.8282775878906, + "margin_dpo/margin_std": 533.3328857421875, + "step": 602 + }, + { + "KL/chosen_KL_mean": -641.906494140625, + "KL/mean": -867.3243408203125, + "KL/rejected_KL_mean": -1092.7423095703125, + "KL/std": 565.2948608398438, + "epoch": 0.8854625550660793, + "fcm_dpo/beta": 0.0009601364727132022, + "fcm_dpo/delta": -0.03442168980836868, + "fcm_dpo/margin": 450.83575439453125, + "fcm_dpo/q_t": 0.4039689302444458, + "grad_norm": 37.1284065246582, + "learning_rate": 2.0276875690788204e-08, + "logits/chosen": -1.0195807218551636, + "logits/rejected": -1.0123958587646484, + "logps/chosen": -712.3211059570312, + "logps/ref_chosen": -70.41461944580078, + "logps/ref_rejected": -100.32559967041016, + "logps/rejected": -1193.06787109375, + "loss": 1.0902, + "margin_dpo/margin_mean": 450.83575439453125, + "margin_dpo/margin_std": 660.8984375, + "step": 603 + }, + { + "KL/chosen_KL_mean": -633.6109619140625, + "KL/mean": -884.2783813476562, + "KL/rejected_KL_mean": -1134.94580078125, + "KL/std": 557.851318359375, + "epoch": 0.8869309838472834, + "fcm_dpo/beta": 0.0009481116430833936, + "fcm_dpo/delta": -0.07910436391830444, + "fcm_dpo/margin": 501.3348693847656, + "fcm_dpo/q_t": 0.39480096101760864, + "grad_norm": 41.052913665771484, + "learning_rate": 1.977362051376158e-08, + "logits/chosen": -1.0104937553405762, + "logits/rejected": -1.0476266145706177, + "logps/chosen": -680.0690307617188, + "logps/ref_chosen": -46.45808029174805, + "logps/ref_rejected": -91.8544921875, + "logps/rejected": -1226.80029296875, + "loss": 1.0613, + "margin_dpo/margin_mean": 501.33489990234375, + "margin_dpo/margin_std": 684.626708984375, + "step": 604 + }, + { + "KL/chosen_KL_mean": -654.0040283203125, + "KL/mean": -836.632568359375, + "KL/rejected_KL_mean": -1019.260986328125, + "KL/std": 520.555908203125, + "epoch": 0.8883994126284875, + "fcm_dpo/beta": 0.0009504579938948154, + "fcm_dpo/delta": 0.05476874113082886, + "fcm_dpo/margin": 365.2569580078125, + "fcm_dpo/q_t": 0.4232047498226166, + "grad_norm": 32.279541015625, + "learning_rate": 1.9276432015946446e-08, + "logits/chosen": -0.9521446228027344, + "logits/rejected": -0.9669671654701233, + "logps/chosen": -720.2533569335938, + "logps/ref_chosen": -66.24933624267578, + "logps/ref_rejected": -102.30496978759766, + "logps/rejected": -1121.56591796875, + "loss": 1.1459, + "margin_dpo/margin_mean": 365.2569885253906, + "margin_dpo/margin_std": 619.3516845703125, + "step": 605 + }, + { + "KL/chosen_KL_mean": -664.3292236328125, + "KL/mean": -865.0908203125, + "KL/rejected_KL_mean": -1065.852294921875, + "KL/std": 528.2071533203125, + "epoch": 0.8898678414096917, + "fcm_dpo/beta": 0.0009606323437765241, + "fcm_dpo/delta": 0.014511629939079285, + "fcm_dpo/margin": 401.52313232421875, + "fcm_dpo/q_t": 0.41221147775650024, + "grad_norm": 27.56992530822754, + "learning_rate": 1.8785323298722093e-08, + "logits/chosen": -0.9551470875740051, + "logits/rejected": -0.9689816236495972, + "logps/chosen": -719.1483764648438, + "logps/ref_chosen": -54.819122314453125, + "logps/ref_rejected": -98.37146759033203, + "logps/rejected": -1164.223876953125, + "loss": 1.105, + "margin_dpo/margin_mean": 401.52313232421875, + "margin_dpo/margin_std": 560.071533203125, + "step": 606 + }, + { + "KL/chosen_KL_mean": -685.24560546875, + "KL/mean": -856.268310546875, + "KL/rejected_KL_mean": -1027.291015625, + "KL/std": 536.4832763671875, + "epoch": 0.8913362701908958, + "fcm_dpo/beta": 0.0009705802076496184, + "fcm_dpo/delta": 0.0699785053730011, + "fcm_dpo/margin": 342.0454406738281, + "fcm_dpo/q_t": 0.42473822832107544, + "grad_norm": 26.594741821289062, + "learning_rate": 1.8300307303259904e-08, + "logits/chosen": -0.9825999140739441, + "logits/rejected": -0.9769987463951111, + "logps/chosen": -743.32958984375, + "logps/ref_chosen": -58.08403778076172, + "logps/ref_rejected": -79.777099609375, + "logps/rejected": -1107.068115234375, + "loss": 1.1528, + "margin_dpo/margin_mean": 342.0454406738281, + "margin_dpo/margin_std": 565.7429809570312, + "step": 607 + }, + { + "KL/chosen_KL_mean": -613.8583374023438, + "KL/mean": -814.1548461914062, + "KL/rejected_KL_mean": -1014.4512939453125, + "KL/std": 486.152099609375, + "epoch": 0.8928046989720999, + "fcm_dpo/beta": 0.0009749716846272349, + "fcm_dpo/delta": 0.009674161672592163, + "fcm_dpo/margin": 400.5929260253906, + "fcm_dpo/q_t": 0.4095849096775055, + "grad_norm": 30.251602172851562, + "learning_rate": 1.7821396810182437e-08, + "logits/chosen": -1.0167486667633057, + "logits/rejected": -1.030979871749878, + "logps/chosen": -671.3092041015625, + "logps/ref_chosen": -57.450836181640625, + "logps/ref_rejected": -94.77339172363281, + "logps/rejected": -1109.224609375, + "loss": 1.0893, + "margin_dpo/margin_mean": 400.5929260253906, + "margin_dpo/margin_std": 500.50164794921875, + "step": 608 + }, + { + "KL/chosen_KL_mean": -640.0119018554688, + "KL/mean": -893.2958984375, + "KL/rejected_KL_mean": -1146.579833984375, + "KL/std": 658.148193359375, + "epoch": 0.8942731277533039, + "fcm_dpo/beta": 0.0009604240767657757, + "fcm_dpo/delta": -0.09091140329837799, + "fcm_dpo/margin": 506.5679016113281, + "fcm_dpo/q_t": 0.3968961536884308, + "grad_norm": 33.4229736328125, + "learning_rate": 1.7348604439226617e-08, + "logits/chosen": -1.0928289890289307, + "logits/rejected": -1.119450569152832, + "logps/chosen": -698.8172607421875, + "logps/ref_chosen": -58.805355072021484, + "logps/ref_rejected": -88.81600952148438, + "logps/rejected": -1235.3958740234375, + "loss": 1.0712, + "margin_dpo/margin_mean": 506.56793212890625, + "margin_dpo/margin_std": 784.350830078125, + "step": 609 + }, + { + "KL/chosen_KL_mean": -626.9141845703125, + "KL/mean": -787.075927734375, + "KL/rejected_KL_mean": -947.2376708984375, + "KL/std": 506.7711486816406, + "epoch": 0.895741556534508, + "fcm_dpo/beta": 0.0009702660609036684, + "fcm_dpo/delta": 0.09187015891075134, + "fcm_dpo/margin": 320.3234558105469, + "fcm_dpo/q_t": 0.42758795619010925, + "grad_norm": 41.30014419555664, + "learning_rate": 1.6881942648911074e-08, + "logits/chosen": -0.9679499864578247, + "logits/rejected": -0.9463798999786377, + "logps/chosen": -692.6092529296875, + "logps/ref_chosen": -65.69503784179688, + "logps/ref_rejected": -83.40538787841797, + "logps/rejected": -1030.64306640625, + "loss": 1.1736, + "margin_dpo/margin_mean": 320.3234558105469, + "margin_dpo/margin_std": 581.2178955078125, + "step": 610 + }, + { + "KL/chosen_KL_mean": -670.1181640625, + "KL/mean": -940.070068359375, + "KL/rejected_KL_mean": -1210.02197265625, + "KL/std": 670.6271362304688, + "epoch": 0.8972099853157122, + "fcm_dpo/beta": 0.0009553628042340279, + "fcm_dpo/delta": -0.12215965986251831, + "fcm_dpo/margin": 539.90380859375, + "fcm_dpo/q_t": 0.3883088231086731, + "grad_norm": 30.76310920715332, + "learning_rate": 1.6421423736208e-08, + "logits/chosen": -1.0270860195159912, + "logits/rejected": -1.0751309394836426, + "logps/chosen": -722.7176513671875, + "logps/ref_chosen": -52.59946823120117, + "logps/ref_rejected": -86.33099365234375, + "logps/rejected": -1296.35302734375, + "loss": 1.0465, + "margin_dpo/margin_mean": 539.90380859375, + "margin_dpo/margin_std": 748.833251953125, + "step": 611 + }, + { + "KL/chosen_KL_mean": -699.7198486328125, + "KL/mean": -904.0280151367188, + "KL/rejected_KL_mean": -1108.336181640625, + "KL/std": 525.3094482421875, + "epoch": 0.8986784140969163, + "fcm_dpo/beta": 0.0009539818856865168, + "fcm_dpo/delta": 0.010405594483017921, + "fcm_dpo/margin": 408.61627197265625, + "fcm_dpo/q_t": 0.410278856754303, + "grad_norm": 27.917463302612305, + "learning_rate": 1.5967059836219042e-08, + "logits/chosen": -1.0200650691986084, + "logits/rejected": -1.0243524312973022, + "logps/chosen": -759.0435791015625, + "logps/ref_chosen": -59.32372283935547, + "logps/ref_rejected": -88.31239318847656, + "logps/rejected": -1196.6485595703125, + "loss": 1.0998, + "margin_dpo/margin_mean": 408.61627197265625, + "margin_dpo/margin_std": 555.7114868164062, + "step": 612 + }, + { + "KL/chosen_KL_mean": -628.8876342773438, + "KL/mean": -882.475341796875, + "KL/rejected_KL_mean": -1136.06298828125, + "KL/std": 600.5887451171875, + "epoch": 0.9001468428781204, + "fcm_dpo/beta": 0.000938057666644454, + "fcm_dpo/delta": -0.07989558577537537, + "fcm_dpo/margin": 507.1754150390625, + "fcm_dpo/q_t": 0.39130979776382446, + "grad_norm": 32.273929595947266, + "learning_rate": 1.551886292185553e-08, + "logits/chosen": -1.0217537879943848, + "logits/rejected": -1.0764918327331543, + "logps/chosen": -688.6175537109375, + "logps/ref_chosen": -59.72996520996094, + "logps/ref_rejected": -105.10752868652344, + "logps/rejected": -1241.1705322265625, + "loss": 1.0397, + "margin_dpo/margin_mean": 507.1754150390625, + "margin_dpo/margin_std": 607.134033203125, + "step": 613 + }, + { + "KL/chosen_KL_mean": -696.5354614257812, + "KL/mean": -941.188720703125, + "KL/rejected_KL_mean": -1185.8419189453125, + "KL/std": 581.6685180664062, + "epoch": 0.9016152716593245, + "fcm_dpo/beta": 0.0009298705263063312, + "fcm_dpo/delta": -0.057572945952415466, + "fcm_dpo/margin": 489.3064270019531, + "fcm_dpo/q_t": 0.39804306626319885, + "grad_norm": 43.64583206176758, + "learning_rate": 1.507684480352292e-08, + "logits/chosen": -1.0031187534332275, + "logits/rejected": -1.0775550603866577, + "logps/chosen": -749.4744262695312, + "logps/ref_chosen": -52.93898010253906, + "logps/ref_rejected": -104.67938232421875, + "logps/rejected": -1290.521240234375, + "loss": 1.0728, + "margin_dpo/margin_mean": 489.30645751953125, + "margin_dpo/margin_std": 680.9266357421875, + "step": 614 + }, + { + "KL/chosen_KL_mean": -656.3577880859375, + "KL/mean": -862.1884155273438, + "KL/rejected_KL_mean": -1068.01904296875, + "KL/std": 608.759033203125, + "epoch": 0.9030837004405287, + "fcm_dpo/beta": 0.0009312764159403741, + "fcm_dpo/delta": 0.017031406983733177, + "fcm_dpo/margin": 411.66131591796875, + "fcm_dpo/q_t": 0.412952184677124, + "grad_norm": 26.297256469726562, + "learning_rate": 1.4641017128809801e-08, + "logits/chosen": -0.9910403490066528, + "logits/rejected": -1.014068365097046, + "logps/chosen": -722.175048828125, + "logps/ref_chosen": -65.81727600097656, + "logps/ref_rejected": -95.17749786376953, + "logps/rejected": -1163.196533203125, + "loss": 1.1233, + "margin_dpo/margin_mean": 411.6612854003906, + "margin_dpo/margin_std": 651.5823974609375, + "step": 615 + }, + { + "KL/chosen_KL_mean": -774.6954345703125, + "KL/mean": -941.8838500976562, + "KL/rejected_KL_mean": -1109.0721435546875, + "KL/std": 509.840576171875, + "epoch": 0.9045521292217328, + "fcm_dpo/beta": 0.0009417695691809058, + "fcm_dpo/delta": 0.08782128244638443, + "fcm_dpo/margin": 334.37677001953125, + "fcm_dpo/q_t": 0.4281091094017029, + "grad_norm": 34.39405822753906, + "learning_rate": 1.4211391382180637e-08, + "logits/chosen": -1.0712862014770508, + "logits/rejected": -1.0602033138275146, + "logps/chosen": -839.8283081054688, + "logps/ref_chosen": -65.13285827636719, + "logps/ref_rejected": -74.70050048828125, + "logps/rejected": -1183.772705078125, + "loss": 1.1659, + "margin_dpo/margin_mean": 334.37677001953125, + "margin_dpo/margin_std": 575.5633544921875, + "step": 616 + }, + { + "KL/chosen_KL_mean": -732.1382446289062, + "KL/mean": -852.7117919921875, + "KL/rejected_KL_mean": -973.285400390625, + "KL/std": 490.1700439453125, + "epoch": 0.9060205580029369, + "fcm_dpo/beta": 0.0009709987789392471, + "fcm_dpo/delta": 0.16959968209266663, + "fcm_dpo/margin": 241.1470489501953, + "fcm_dpo/q_t": 0.44701701402664185, + "grad_norm": 53.658565521240234, + "learning_rate": 1.378797888467345e-08, + "logits/chosen": -0.9488894939422607, + "logits/rejected": -0.9184377789497375, + "logps/chosen": -795.143798828125, + "logps/ref_chosen": -63.005550384521484, + "logps/ref_rejected": -64.234130859375, + "logps/rejected": -1037.51953125, + "loss": 1.2385, + "margin_dpo/margin_mean": 241.14703369140625, + "margin_dpo/margin_std": 569.4755859375, + "step": 617 + }, + { + "KL/chosen_KL_mean": -763.865478515625, + "KL/mean": -1025.0372314453125, + "KL/rejected_KL_mean": -1286.208984375, + "KL/std": 651.3458251953125, + "epoch": 0.9074889867841409, + "fcm_dpo/beta": 0.0009685006225481629, + "fcm_dpo/delta": -0.11168282479047775, + "fcm_dpo/margin": 522.3434448242188, + "fcm_dpo/q_t": 0.3915684223175049, + "grad_norm": 40.38612747192383, + "learning_rate": 1.3370790793601371e-08, + "logits/chosen": -1.0099037885665894, + "logits/rejected": -1.0500290393829346, + "logps/chosen": -830.9668579101562, + "logps/ref_chosen": -67.10134887695312, + "logps/ref_rejected": -92.15340423583984, + "logps/rejected": -1378.3623046875, + "loss": 1.0849, + "margin_dpo/margin_mean": 522.3434448242188, + "margin_dpo/margin_std": 827.5892333984375, + "step": 618 + }, + { + "KL/chosen_KL_mean": -751.3358154296875, + "KL/mean": -951.3638916015625, + "KL/rejected_KL_mean": -1151.3919677734375, + "KL/std": 605.0140380859375, + "epoch": 0.908957415565345, + "fcm_dpo/beta": 0.0009594704024493694, + "fcm_dpo/delta": 0.016789617016911507, + "fcm_dpo/margin": 400.0561828613281, + "fcm_dpo/q_t": 0.42001599073410034, + "grad_norm": 49.994224548339844, + "learning_rate": 1.2959838102258535e-08, + "logits/chosen": -0.9973533153533936, + "logits/rejected": -1.0120331048965454, + "logps/chosen": -807.3140869140625, + "logps/ref_chosen": -55.978233337402344, + "logps/ref_rejected": -93.1854019165039, + "logps/rejected": -1244.577392578125, + "loss": 1.1707, + "margin_dpo/margin_mean": 400.0561828613281, + "margin_dpo/margin_std": 802.4068603515625, + "step": 619 + }, + { + "KL/chosen_KL_mean": -691.8986206054688, + "KL/mean": -882.22509765625, + "KL/rejected_KL_mean": -1072.551513671875, + "KL/std": 538.706787109375, + "epoch": 0.9104258443465492, + "fcm_dpo/beta": 0.000961203477345407, + "fcm_dpo/delta": 0.035247065126895905, + "fcm_dpo/margin": 380.6529846191406, + "fcm_dpo/q_t": 0.4174911379814148, + "grad_norm": 34.35021209716797, + "learning_rate": 1.2555131639630567e-08, + "logits/chosen": -1.0476036071777344, + "logits/rejected": -1.0572441816329956, + "logps/chosen": -751.6961059570312, + "logps/ref_chosen": -59.79750061035156, + "logps/ref_rejected": -78.41075134277344, + "logps/rejected": -1150.96240234375, + "loss": 1.1326, + "margin_dpo/margin_mean": 380.6529846191406, + "margin_dpo/margin_std": 605.3323974609375, + "step": 620 + }, + { + "KL/chosen_KL_mean": -665.3763427734375, + "KL/mean": -940.9721069335938, + "KL/rejected_KL_mean": -1216.567626953125, + "KL/std": 632.556884765625, + "epoch": 0.9118942731277533, + "fcm_dpo/beta": 0.0009547668742015958, + "fcm_dpo/delta": -0.13341151177883148, + "fcm_dpo/margin": 551.19140625, + "fcm_dpo/q_t": 0.3814903199672699, + "grad_norm": 39.83711624145508, + "learning_rate": 1.2156682070109086e-08, + "logits/chosen": -1.0708098411560059, + "logits/rejected": -1.1203954219818115, + "logps/chosen": -719.3101196289062, + "logps/ref_chosen": -53.93375778198242, + "logps/ref_rejected": -88.36951446533203, + "logps/rejected": -1304.937255859375, + "loss": 1.033, + "margin_dpo/margin_mean": 551.19140625, + "margin_dpo/margin_std": 702.85986328125, + "step": 621 + }, + { + "KL/chosen_KL_mean": -658.5285034179688, + "KL/mean": -857.1796264648438, + "KL/rejected_KL_mean": -1055.8306884765625, + "KL/std": 493.7017822265625, + "epoch": 0.9133627019089574, + "fcm_dpo/beta": 0.0009404352167621255, + "fcm_dpo/delta": 0.0270434208214283, + "fcm_dpo/margin": 397.30218505859375, + "fcm_dpo/q_t": 0.4160274565219879, + "grad_norm": 30.787620544433594, + "learning_rate": 1.1764499893210878e-08, + "logits/chosen": -0.9209400415420532, + "logits/rejected": -0.9098290205001831, + "logps/chosen": -718.8143310546875, + "logps/ref_chosen": -60.28582000732422, + "logps/ref_rejected": -85.51873779296875, + "logps/rejected": -1141.349365234375, + "loss": 1.1242, + "margin_dpo/margin_mean": 397.30218505859375, + "margin_dpo/margin_std": 618.2388916015625, + "step": 622 + }, + { + "KL/chosen_KL_mean": -722.2666015625, + "KL/mean": -879.1347045898438, + "KL/rejected_KL_mean": -1036.0028076171875, + "KL/std": 512.3851318359375, + "epoch": 0.9148311306901615, + "fcm_dpo/beta": 0.0009606959065422416, + "fcm_dpo/delta": 0.10166360437870026, + "fcm_dpo/margin": 313.7361755371094, + "fcm_dpo/q_t": 0.4334968328475952, + "grad_norm": 33.62225341796875, + "learning_rate": 1.1378595443300998e-08, + "logits/chosen": -1.0529344081878662, + "logits/rejected": -1.0536704063415527, + "logps/chosen": -786.423583984375, + "logps/ref_chosen": -64.1569595336914, + "logps/ref_rejected": -85.08304595947266, + "logps/rejected": -1121.0859375, + "loss": 1.1853, + "margin_dpo/margin_mean": 313.7362060546875, + "margin_dpo/margin_std": 610.1846923828125, + "step": 623 + }, + { + "KL/chosen_KL_mean": -710.2718505859375, + "KL/mean": -944.1239013671875, + "KL/rejected_KL_mean": -1177.9759521484375, + "KL/std": 529.1799926757812, + "epoch": 0.9162995594713657, + "fcm_dpo/beta": 0.0009594388538971543, + "fcm_dpo/delta": -0.05099187046289444, + "fcm_dpo/margin": 467.7041015625, + "fcm_dpo/q_t": 0.3950585722923279, + "grad_norm": 39.309574127197266, + "learning_rate": 1.0998978889320582e-08, + "logits/chosen": -1.0544100999832153, + "logits/rejected": -1.0604016780853271, + "logps/chosen": -782.1905517578125, + "logps/ref_chosen": -71.91862487792969, + "logps/ref_rejected": -97.13203430175781, + "logps/rejected": -1275.1080322265625, + "loss": 1.0585, + "margin_dpo/margin_mean": 467.7041015625, + "margin_dpo/margin_std": 573.2549438476562, + "step": 624 + }, + { + "KL/chosen_KL_mean": -676.450927734375, + "KL/mean": -938.8895263671875, + "KL/rejected_KL_mean": -1201.328125, + "KL/std": 594.9174194335938, + "epoch": 0.9177679882525698, + "fcm_dpo/beta": 0.0009445177856832743, + "fcm_dpo/delta": -0.10063250362873077, + "fcm_dpo/margin": 524.8773193359375, + "fcm_dpo/q_t": 0.38663381338119507, + "grad_norm": 58.24129867553711, + "learning_rate": 1.0625660234518913e-08, + "logits/chosen": -1.006543755531311, + "logits/rejected": -1.0352264642715454, + "logps/chosen": -734.79296875, + "logps/ref_chosen": -58.342071533203125, + "logps/ref_rejected": -86.09038543701172, + "logps/rejected": -1287.4185791015625, + "loss": 1.0137, + "margin_dpo/margin_mean": 524.8773193359375, + "margin_dpo/margin_std": 560.148193359375, + "step": 625 + }, + { + "KL/chosen_KL_mean": -826.4632568359375, + "KL/mean": -968.705810546875, + "KL/rejected_KL_mean": -1110.9483642578125, + "KL/std": 636.0460815429688, + "epoch": 0.9192364170337739, + "fcm_dpo/beta": 0.0009557833545841277, + "fcm_dpo/delta": 0.1316283643245697, + "fcm_dpo/margin": 284.48504638671875, + "fcm_dpo/q_t": 0.4362901449203491, + "grad_norm": 32.43234634399414, + "learning_rate": 1.0258649316189721e-08, + "logits/chosen": -0.9522177577018738, + "logits/rejected": -0.9446706771850586, + "logps/chosen": -901.575927734375, + "logps/ref_chosen": -75.11260986328125, + "logps/ref_rejected": -99.188720703125, + "logps/rejected": -1210.136962890625, + "loss": 1.2195, + "margin_dpo/margin_mean": 284.48504638671875, + "margin_dpo/margin_std": 614.84765625, + "step": 626 + }, + { + "KL/chosen_KL_mean": -572.4993286132812, + "KL/mean": -865.5345458984375, + "KL/rejected_KL_mean": -1158.569580078125, + "KL/std": 690.389892578125, + "epoch": 0.920704845814978, + "fcm_dpo/beta": 0.000945016392506659, + "fcm_dpo/delta": -0.162668839097023, + "fcm_dpo/margin": 586.0703125, + "fcm_dpo/q_t": 0.3842451572418213, + "grad_norm": 24.69363784790039, + "learning_rate": 9.897955805412e-09, + "logits/chosen": -0.8964744806289673, + "logits/rejected": -0.9713860154151917, + "logps/chosen": -620.2424926757812, + "logps/ref_chosen": -47.74314880371094, + "logps/ref_rejected": -106.75448608398438, + "logps/rejected": -1265.32421875, + "loss": 1.0279, + "margin_dpo/margin_mean": 586.0703125, + "margin_dpo/margin_std": 790.52783203125, + "step": 627 + }, + { + "KL/chosen_KL_mean": -741.4865112304688, + "KL/mean": -962.6798706054688, + "KL/rejected_KL_mean": -1183.873291015625, + "KL/std": 572.581787109375, + "epoch": 0.922173274596182, + "fcm_dpo/beta": 0.0009279233636334538, + "fcm_dpo/delta": -0.011030579917132854, + "fcm_dpo/margin": 442.3868408203125, + "fcm_dpo/q_t": 0.40770232677459717, + "grad_norm": 28.13714599609375, + "learning_rate": 9.543589206795238e-09, + "logits/chosen": -1.0725154876708984, + "logits/rejected": -1.0900166034698486, + "logps/chosen": -801.66943359375, + "logps/ref_chosen": -60.182945251464844, + "logps/ref_rejected": -101.55467224121094, + "logps/rejected": -1285.427978515625, + "loss": 1.1039, + "margin_dpo/margin_mean": 442.3868408203125, + "margin_dpo/margin_std": 658.2325439453125, + "step": 628 + }, + { + "KL/chosen_KL_mean": -733.53369140625, + "KL/mean": -936.6685791015625, + "KL/rejected_KL_mean": -1139.8037109375, + "KL/std": 552.74169921875, + "epoch": 0.9236417033773862, + "fcm_dpo/beta": 0.0009324135025963187, + "fcm_dpo/delta": 0.02201123535633087, + "fcm_dpo/margin": 406.26995849609375, + "fcm_dpo/q_t": 0.4115867018699646, + "grad_norm": 34.70335006713867, + "learning_rate": 9.19555885822887e-09, + "logits/chosen": -1.0210623741149902, + "logits/rejected": -1.0353336334228516, + "logps/chosen": -797.7471923828125, + "logps/ref_chosen": -64.21354675292969, + "logps/ref_rejected": -91.65367126464844, + "logps/rejected": -1231.457275390625, + "loss": 1.1046, + "margin_dpo/margin_mean": 406.26995849609375, + "margin_dpo/margin_std": 546.7474365234375, + "step": 629 + }, + { + "KL/chosen_KL_mean": -653.820068359375, + "KL/mean": -769.8485107421875, + "KL/rejected_KL_mean": -885.8768920898438, + "KL/std": 560.4996337890625, + "epoch": 0.9251101321585903, + "fcm_dpo/beta": 0.0009379271068610251, + "fcm_dpo/delta": 0.051444362848997116, + "fcm_dpo/margin": 232.05682373046875, + "fcm_dpo/q_t": 0.45478296279907227, + "grad_norm": 52.160728454589844, + "learning_rate": 8.85387393063622e-09, + "logits/chosen": -1.0045530796051025, + "logits/rejected": -0.9794071912765503, + "logps/chosen": -713.111083984375, + "logps/ref_chosen": -59.29100036621094, + "logps/ref_rejected": -83.59829711914062, + "logps/rejected": -969.4752197265625, + "loss": 1.2718, + "margin_dpo/margin_mean": 232.05682373046875, + "margin_dpo/margin_std": 672.6798095703125, + "step": 630 + }, + { + "KL/chosen_KL_mean": -792.3543701171875, + "KL/mean": -974.973388671875, + "KL/rejected_KL_mean": -1157.592529296875, + "KL/std": 561.6439208984375, + "epoch": 0.9265785609397944, + "fcm_dpo/beta": 0.0009518619626760483, + "fcm_dpo/delta": 0.05409633368253708, + "fcm_dpo/margin": 365.2381286621094, + "fcm_dpo/q_t": 0.42036306858062744, + "grad_norm": 34.219573974609375, + "learning_rate": 8.518543427732949e-09, + "logits/chosen": -1.0938163995742798, + "logits/rejected": -1.1026105880737305, + "logps/chosen": -851.8079833984375, + "logps/ref_chosen": -59.45360565185547, + "logps/ref_rejected": -80.95156860351562, + "logps/rejected": -1238.5440673828125, + "loss": 1.1567, + "margin_dpo/margin_mean": 365.2381286621094, + "margin_dpo/margin_std": 643.0179443359375, + "step": 631 + }, + { + "KL/chosen_KL_mean": -682.728515625, + "KL/mean": -877.140625, + "KL/rejected_KL_mean": -1071.552734375, + "KL/std": 512.9617919921875, + "epoch": 0.9280469897209985, + "fcm_dpo/beta": 0.0009528810624033213, + "fcm_dpo/delta": 0.03032829239964485, + "fcm_dpo/margin": 388.82421875, + "fcm_dpo/q_t": 0.4152371883392334, + "grad_norm": 43.099708557128906, + "learning_rate": 8.189576185789637e-09, + "logits/chosen": -1.0412629842758179, + "logits/rejected": -1.0417115688323975, + "logps/chosen": -744.080078125, + "logps/ref_chosen": -61.35155487060547, + "logps/ref_rejected": -86.16017150878906, + "logps/rejected": -1157.712890625, + "loss": 1.1352, + "margin_dpo/margin_mean": 388.82421875, + "margin_dpo/margin_std": 626.7635498046875, + "step": 632 + }, + { + "KL/chosen_KL_mean": -747.4510498046875, + "KL/mean": -891.1580810546875, + "KL/rejected_KL_mean": -1034.8651123046875, + "KL/std": 504.61749267578125, + "epoch": 0.9295154185022027, + "fcm_dpo/beta": 0.0009792209602892399, + "fcm_dpo/delta": 0.12146103382110596, + "fcm_dpo/margin": 287.41412353515625, + "fcm_dpo/q_t": 0.43543291091918945, + "grad_norm": 47.98980712890625, + "learning_rate": 7.866980873399015e-09, + "logits/chosen": -1.0630054473876953, + "logits/rejected": -1.0733153820037842, + "logps/chosen": -804.729248046875, + "logps/ref_chosen": -57.27816390991211, + "logps/ref_rejected": -91.58395385742188, + "logps/rejected": -1126.4490966796875, + "loss": 1.2016, + "margin_dpo/margin_mean": 287.41412353515625, + "margin_dpo/margin_std": 580.196533203125, + "step": 633 + }, + { + "KL/chosen_KL_mean": -855.8317260742188, + "KL/mean": -977.2432861328125, + "KL/rejected_KL_mean": -1098.655029296875, + "KL/std": 611.3326416015625, + "epoch": 0.9309838472834068, + "fcm_dpo/beta": 0.0009960609022527933, + "fcm_dpo/delta": 0.06913906335830688, + "fcm_dpo/margin": 242.82333374023438, + "fcm_dpo/q_t": 0.44638699293136597, + "grad_norm": 35.45087814331055, + "learning_rate": 7.550765991247654e-09, + "logits/chosen": -0.9742704033851624, + "logits/rejected": -0.9662094116210938, + "logps/chosen": -922.45068359375, + "logps/ref_chosen": -66.61896514892578, + "logps/ref_rejected": -107.12564849853516, + "logps/rejected": -1205.78076171875, + "loss": 1.2382, + "margin_dpo/margin_mean": 242.82333374023438, + "margin_dpo/margin_std": 585.6327514648438, + "step": 634 + }, + { + "KL/chosen_KL_mean": -741.16796875, + "KL/mean": -916.81396484375, + "KL/rejected_KL_mean": -1092.4599609375, + "KL/std": 625.1411743164062, + "epoch": 0.9324522760646109, + "fcm_dpo/beta": 0.0010028297547250986, + "fcm_dpo/delta": 0.04947870969772339, + "fcm_dpo/margin": 351.2921142578125, + "fcm_dpo/q_t": 0.42215800285339355, + "grad_norm": 38.723793029785156, + "learning_rate": 7.240939871891699e-09, + "logits/chosen": -1.0590343475341797, + "logits/rejected": -1.0402554273605347, + "logps/chosen": -815.1234741210938, + "logps/ref_chosen": -73.95551300048828, + "logps/ref_rejected": -82.50045776367188, + "logps/rejected": -1174.96044921875, + "loss": 1.1545, + "margin_dpo/margin_mean": 351.2921142578125, + "margin_dpo/margin_std": 629.5093994140625, + "step": 635 + }, + { + "KL/chosen_KL_mean": -695.1683959960938, + "KL/mean": -907.8634033203125, + "KL/rejected_KL_mean": -1120.558349609375, + "KL/std": 626.8406372070312, + "epoch": 0.933920704845815, + "fcm_dpo/beta": 0.0010084551759064198, + "fcm_dpo/delta": -0.03108617290854454, + "fcm_dpo/margin": 425.3899841308594, + "fcm_dpo/q_t": 0.40682026743888855, + "grad_norm": 27.428804397583008, + "learning_rate": 6.937510679537628e-09, + "logits/chosen": -0.974394679069519, + "logits/rejected": -0.9779649972915649, + "logps/chosen": -754.7973022460938, + "logps/ref_chosen": -59.628910064697266, + "logps/ref_rejected": -81.97883605957031, + "logps/rejected": -1202.537109375, + "loss": 1.0923, + "margin_dpo/margin_mean": 425.3899841308594, + "margin_dpo/margin_std": 645.3447265625, + "step": 636 + }, + { + "KL/chosen_KL_mean": -712.531005859375, + "KL/mean": -942.2177124023438, + "KL/rejected_KL_mean": -1171.904541015625, + "KL/std": 613.010009765625, + "epoch": 0.9353891336270191, + "fcm_dpo/beta": 0.0009865246247500181, + "fcm_dpo/delta": -0.0564747154712677, + "fcm_dpo/margin": 459.3734436035156, + "fcm_dpo/q_t": 0.3993714153766632, + "grad_norm": 29.097070693969727, + "learning_rate": 6.640486409826785e-09, + "logits/chosen": -1.068098545074463, + "logits/rejected": -1.115422248840332, + "logps/chosen": -762.1837158203125, + "logps/ref_chosen": -49.652687072753906, + "logps/ref_rejected": -98.40513610839844, + "logps/rejected": -1270.3095703125, + "loss": 1.0719, + "margin_dpo/margin_mean": 459.3734130859375, + "margin_dpo/margin_std": 634.1755981445312, + "step": 637 + }, + { + "KL/chosen_KL_mean": -675.932373046875, + "KL/mean": -855.3536987304688, + "KL/rejected_KL_mean": -1034.775146484375, + "KL/std": 574.1265869140625, + "epoch": 0.9368575624082232, + "fcm_dpo/beta": 0.0009809336625039577, + "fcm_dpo/delta": -0.07716827094554901, + "fcm_dpo/margin": 358.84271240234375, + "fcm_dpo/q_t": 0.41367873549461365, + "grad_norm": 43.48149490356445, + "learning_rate": 6.349874889624962e-09, + "logits/chosen": -0.966257631778717, + "logits/rejected": -0.9517063498497009, + "logps/chosen": -734.0889892578125, + "logps/ref_chosen": -58.156639099121094, + "logps/ref_rejected": -79.3014907836914, + "logps/rejected": -1114.0765380859375, + "loss": 1.1675, + "margin_dpo/margin_mean": 358.84271240234375, + "margin_dpo/margin_std": 657.916748046875, + "step": 638 + }, + { + "KL/chosen_KL_mean": -934.8937377929688, + "KL/mean": -1017.8052368164062, + "KL/rejected_KL_mean": -1100.716796875, + "KL/std": 551.710205078125, + "epoch": 0.9383259911894273, + "fcm_dpo/beta": 0.0009733641054481268, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 165.8230743408203, + "fcm_dpo/q_t": 0.4636532962322235, + "grad_norm": 106.04683685302734, + "learning_rate": 6.065683776815933e-09, + "logits/chosen": -0.9813928604125977, + "logits/rejected": -0.9263367652893066, + "logps/chosen": -1007.2169189453125, + "logps/ref_chosen": -72.32319641113281, + "logps/ref_rejected": -74.2749252319336, + "logps/rejected": -1174.99169921875, + "loss": 1.3435, + "margin_dpo/margin_mean": 165.82305908203125, + "margin_dpo/margin_std": 706.1915893554688, + "step": 639 + }, + { + "KL/chosen_KL_mean": -715.442626953125, + "KL/mean": -994.8519897460938, + "KL/rejected_KL_mean": -1274.2613525390625, + "KL/std": 640.503173828125, + "epoch": 0.9397944199706314, + "fcm_dpo/beta": 0.0009570815600454807, + "fcm_dpo/delta": -0.14221924543380737, + "fcm_dpo/margin": 558.8187255859375, + "fcm_dpo/q_t": 0.38426363468170166, + "grad_norm": 39.72331237792969, + "learning_rate": 5.7879205600998296e-09, + "logits/chosen": -0.9474629163742065, + "logits/rejected": -0.9777064919471741, + "logps/chosen": -771.5770263671875, + "logps/ref_chosen": -56.13436508178711, + "logps/ref_rejected": -108.60014343261719, + "logps/rejected": -1382.8614501953125, + "loss": 1.0338, + "margin_dpo/margin_mean": 558.8187255859375, + "margin_dpo/margin_std": 753.1952514648438, + "step": 640 + }, + { + "KL/chosen_KL_mean": -827.5257568359375, + "KL/mean": -1006.5942993164062, + "KL/rejected_KL_mean": -1185.662841796875, + "KL/std": 546.6756591796875, + "epoch": 0.9412628487518355, + "fcm_dpo/beta": 0.0009527778020128608, + "fcm_dpo/delta": 0.060891155153512955, + "fcm_dpo/margin": 358.13702392578125, + "fcm_dpo/q_t": 0.42465877532958984, + "grad_norm": 35.67609405517578, + "learning_rate": 5.516592558795746e-09, + "logits/chosen": -1.007719874382019, + "logits/rejected": -1.0168031454086304, + "logps/chosen": -892.5226440429688, + "logps/ref_chosen": -64.99689483642578, + "logps/ref_rejected": -86.99232482910156, + "logps/rejected": -1272.6551513671875, + "loss": 1.1809, + "margin_dpo/margin_mean": 358.1370544433594, + "margin_dpo/margin_std": 718.3710327148438, + "step": 641 + }, + { + "KL/chosen_KL_mean": -792.3043212890625, + "KL/mean": -1011.9464721679688, + "KL/rejected_KL_mean": -1231.588623046875, + "KL/std": 722.4783935546875, + "epoch": 0.9427312775330396, + "fcm_dpo/beta": 0.0009587721433490515, + "fcm_dpo/delta": -0.022414878010749817, + "fcm_dpo/margin": 439.2843017578125, + "fcm_dpo/q_t": 0.41409242153167725, + "grad_norm": 38.891204833984375, + "learning_rate": 5.251706922648868e-09, + "logits/chosen": -0.9521088600158691, + "logits/rejected": -0.9873976707458496, + "logps/chosen": -857.9935302734375, + "logps/ref_chosen": -65.68924713134766, + "logps/ref_rejected": -110.24205017089844, + "logps/rejected": -1341.830810546875, + "loss": 1.1499, + "margin_dpo/margin_mean": 439.2843017578125, + "margin_dpo/margin_std": 870.07275390625, + "step": 642 + }, + { + "KL/chosen_KL_mean": -706.8209228515625, + "KL/mean": -878.863037109375, + "KL/rejected_KL_mean": -1050.905029296875, + "KL/std": 530.0516357421875, + "epoch": 0.9441997063142438, + "fcm_dpo/beta": 0.0009463735623285174, + "fcm_dpo/delta": -0.036860737949609756, + "fcm_dpo/margin": 344.0841979980469, + "fcm_dpo/q_t": 0.42428651452064514, + "grad_norm": 37.40614700317383, + "learning_rate": 4.993270631642038e-09, + "logits/chosen": -1.0797677040100098, + "logits/rejected": -1.0802643299102783, + "logps/chosen": -758.7708740234375, + "logps/ref_chosen": -51.94999694824219, + "logps/ref_rejected": -87.46833801269531, + "logps/rejected": -1138.3734130859375, + "loss": 1.1498, + "margin_dpo/margin_mean": 344.0841979980469, + "margin_dpo/margin_std": 523.859130859375, + "step": 643 + }, + { + "KL/chosen_KL_mean": -694.5718994140625, + "KL/mean": -873.572021484375, + "KL/rejected_KL_mean": -1052.5721435546875, + "KL/std": 631.7380981445312, + "epoch": 0.9456681350954479, + "fcm_dpo/beta": 0.000954576360527426, + "fcm_dpo/delta": 0.06034265458583832, + "fcm_dpo/margin": 358.000244140625, + "fcm_dpo/q_t": 0.4248698949813843, + "grad_norm": 45.14137649536133, + "learning_rate": 4.741290495811873e-09, + "logits/chosen": -0.9851275682449341, + "logits/rejected": -0.9928478002548218, + "logps/chosen": -753.589599609375, + "logps/ref_chosen": -59.017662048339844, + "logps/ref_rejected": -87.13668823242188, + "logps/rejected": -1139.708740234375, + "loss": 1.1823, + "margin_dpo/margin_mean": 358.000244140625, + "margin_dpo/margin_std": 717.85693359375, + "step": 644 + }, + { + "KL/chosen_KL_mean": -724.6256103515625, + "KL/mean": -813.5394287109375, + "KL/rejected_KL_mean": -902.4532470703125, + "KL/std": 482.83013916015625, + "epoch": 0.947136563876652, + "fcm_dpo/beta": 0.0009710404556244612, + "fcm_dpo/delta": 0.06831113994121552, + "fcm_dpo/margin": 177.82762145996094, + "fcm_dpo/q_t": 0.4625673294067383, + "grad_norm": 88.68705749511719, + "learning_rate": 4.495773155069299e-09, + "logits/chosen": -0.9806017875671387, + "logits/rejected": -0.9682430028915405, + "logps/chosen": -780.501708984375, + "logps/ref_chosen": -55.87602233886719, + "logps/ref_rejected": -97.78080749511719, + "logps/rejected": -1000.2340698242188, + "loss": 1.3219, + "margin_dpo/margin_mean": 177.8275909423828, + "margin_dpo/margin_std": 645.446533203125, + "step": 645 + }, + { + "KL/chosen_KL_mean": -697.8182373046875, + "KL/mean": -850.4000244140625, + "KL/rejected_KL_mean": -1002.981689453125, + "KL/std": 472.5347900390625, + "epoch": 0.9486049926578561, + "fcm_dpo/beta": 0.0009829029440879822, + "fcm_dpo/delta": 0.1032671183347702, + "fcm_dpo/margin": 305.16351318359375, + "fcm_dpo/q_t": 0.432457834482193, + "grad_norm": 52.068904876708984, + "learning_rate": 4.256725079024553e-09, + "logits/chosen": -1.0154389142990112, + "logits/rejected": -0.9988906383514404, + "logps/chosen": -759.093994140625, + "logps/ref_chosen": -61.275787353515625, + "logps/ref_rejected": -77.50580596923828, + "logps/rejected": -1080.487548828125, + "loss": 1.1842, + "margin_dpo/margin_mean": 305.1635437011719, + "margin_dpo/margin_std": 565.9290771484375, + "step": 646 + }, + { + "KL/chosen_KL_mean": -608.2819213867188, + "KL/mean": -799.4105224609375, + "KL/rejected_KL_mean": -990.5390625, + "KL/std": 541.8460693359375, + "epoch": 0.9500734214390602, + "fcm_dpo/beta": 0.0009943554177880287, + "fcm_dpo/delta": 0.02068711817264557, + "fcm_dpo/margin": 382.257080078125, + "fcm_dpo/q_t": 0.41246411204338074, + "grad_norm": 27.42867088317871, + "learning_rate": 4.024152566816791e-09, + "logits/chosen": -0.8789236545562744, + "logits/rejected": -0.9076966047286987, + "logps/chosen": -663.1343383789062, + "logps/ref_chosen": -54.8524169921875, + "logps/ref_rejected": -93.5194091796875, + "logps/rejected": -1084.0584716796875, + "loss": 1.1078, + "margin_dpo/margin_mean": 382.257080078125, + "margin_dpo/margin_std": 527.3770751953125, + "step": 647 + }, + { + "KL/chosen_KL_mean": -657.1648559570312, + "KL/mean": -943.3175659179688, + "KL/rejected_KL_mean": -1229.47021484375, + "KL/std": 657.721435546875, + "epoch": 0.9515418502202643, + "fcm_dpo/beta": 0.0009720301604829729, + "fcm_dpo/delta": -0.16539156436920166, + "fcm_dpo/margin": 572.3054809570312, + "fcm_dpo/q_t": 0.38105693459510803, + "grad_norm": 28.24399757385254, + "learning_rate": 3.798061746947995e-09, + "logits/chosen": -1.045109510421753, + "logits/rejected": -1.1041361093521118, + "logps/chosen": -711.3363037109375, + "logps/ref_chosen": -54.17146682739258, + "logps/ref_rejected": -98.7127914428711, + "logps/rejected": -1328.18310546875, + "loss": 1.0232, + "margin_dpo/margin_mean": 572.3054809570312, + "margin_dpo/margin_std": 778.92529296875, + "step": 648 + }, + { + "KL/chosen_KL_mean": -681.6015625, + "KL/mean": -813.9896240234375, + "KL/rejected_KL_mean": -946.3775634765625, + "KL/std": 507.69940185546875, + "epoch": 0.9530102790014684, + "fcm_dpo/beta": 0.000985685153864324, + "fcm_dpo/delta": 0.1423780918121338, + "fcm_dpo/margin": 264.7760009765625, + "fcm_dpo/q_t": 0.4443369209766388, + "grad_norm": 29.93989372253418, + "learning_rate": 3.5784585771215235e-09, + "logits/chosen": -1.052908182144165, + "logits/rejected": -1.044716238975525, + "logps/chosen": -744.0819091796875, + "logps/ref_chosen": -62.480350494384766, + "logps/ref_rejected": -80.07717895507812, + "logps/rejected": -1026.454833984375, + "loss": 1.2366, + "margin_dpo/margin_mean": 264.7760009765625, + "margin_dpo/margin_std": 629.158935546875, + "step": 649 + }, + { + "KL/chosen_KL_mean": -741.3469848632812, + "KL/mean": -952.0729370117188, + "KL/rejected_KL_mean": -1162.798828125, + "KL/std": 635.6630859375, + "epoch": 0.9544787077826725, + "fcm_dpo/beta": 0.0009861743310466409, + "fcm_dpo/delta": -0.016445815563201904, + "fcm_dpo/margin": 421.45196533203125, + "fcm_dpo/q_t": 0.40908634662628174, + "grad_norm": 31.88619041442871, + "learning_rate": 3.3653488440851253e-09, + "logits/chosen": -1.0458638668060303, + "logits/rejected": -1.0673601627349854, + "logps/chosen": -797.4398193359375, + "logps/ref_chosen": -56.09281921386719, + "logps/ref_rejected": -98.26483917236328, + "logps/rejected": -1261.063720703125, + "loss": 1.129, + "margin_dpo/margin_mean": 421.45196533203125, + "margin_dpo/margin_std": 725.9293212890625, + "step": 650 + }, + { + "KL/chosen_KL_mean": -497.8349304199219, + "KL/mean": -769.283203125, + "KL/rejected_KL_mean": -1040.7314453125, + "KL/std": 584.7640380859375, + "epoch": 0.9559471365638766, + "fcm_dpo/beta": 0.000967178144492209, + "fcm_dpo/delta": -0.13220591843128204, + "fcm_dpo/margin": 542.8966064453125, + "fcm_dpo/q_t": 0.3810211718082428, + "grad_norm": 43.920989990234375, + "learning_rate": 3.158738163478475e-09, + "logits/chosen": -1.0429898500442505, + "logits/rejected": -1.0994410514831543, + "logps/chosen": -541.2603759765625, + "logps/ref_chosen": -43.42544937133789, + "logps/ref_rejected": -99.95791625976562, + "logps/rejected": -1140.689453125, + "loss": 1.0016, + "margin_dpo/margin_mean": 542.8966064453125, + "margin_dpo/margin_std": 594.7532958984375, + "step": 651 + }, + { + "KL/chosen_KL_mean": -632.5731201171875, + "KL/mean": -839.5629272460938, + "KL/rejected_KL_mean": -1046.552734375, + "KL/std": 598.0744018554688, + "epoch": 0.9574155653450808, + "fcm_dpo/beta": 0.0009628928382880986, + "fcm_dpo/delta": 0.0014214273542165756, + "fcm_dpo/margin": 413.9795837402344, + "fcm_dpo/q_t": 0.41178420186042786, + "grad_norm": 32.01892852783203, + "learning_rate": 2.9586319796851555e-09, + "logits/chosen": -1.0343468189239502, + "logits/rejected": -1.0592379570007324, + "logps/chosen": -695.14990234375, + "logps/ref_chosen": -62.57680892944336, + "logps/ref_rejected": -111.76779174804688, + "logps/rejected": -1158.320556640625, + "loss": 1.1247, + "margin_dpo/margin_mean": 413.9796142578125, + "margin_dpo/margin_std": 687.262939453125, + "step": 652 + }, + { + "KL/chosen_KL_mean": -769.5801391601562, + "KL/mean": -955.7160034179688, + "KL/rejected_KL_mean": -1141.851806640625, + "KL/std": 637.4415283203125, + "epoch": 0.9588839941262849, + "fcm_dpo/beta": 0.0009692892199382186, + "fcm_dpo/delta": 0.04052945226430893, + "fcm_dpo/margin": 372.2716064453125, + "fcm_dpo/q_t": 0.4203724265098572, + "grad_norm": 33.566246032714844, + "learning_rate": 2.7650355656892166e-09, + "logits/chosen": -1.0515234470367432, + "logits/rejected": -1.0738110542297363, + "logps/chosen": -830.693115234375, + "logps/ref_chosen": -61.11295700073242, + "logps/ref_rejected": -103.24960327148438, + "logps/rejected": -1245.101318359375, + "loss": 1.1492, + "margin_dpo/margin_mean": 372.2716369628906, + "margin_dpo/margin_std": 651.8836669921875, + "step": 653 + }, + { + "KL/chosen_KL_mean": -688.090087890625, + "KL/mean": -868.0014038085938, + "KL/rejected_KL_mean": -1047.9127197265625, + "KL/std": 508.9238586425781, + "epoch": 0.960352422907489, + "fcm_dpo/beta": 0.0009763325797393918, + "fcm_dpo/delta": 0.050495948642492294, + "fcm_dpo/margin": 359.82257080078125, + "fcm_dpo/q_t": 0.4218224287033081, + "grad_norm": 31.80510711669922, + "learning_rate": 2.577954022936174e-09, + "logits/chosen": -1.045449137687683, + "logits/rejected": -1.0607552528381348, + "logps/chosen": -749.8182373046875, + "logps/ref_chosen": -61.7281379699707, + "logps/ref_rejected": -98.7738037109375, + "logps/rejected": -1146.6865234375, + "loss": 1.1428, + "margin_dpo/margin_mean": 359.8226013183594, + "margin_dpo/margin_std": 591.5650634765625, + "step": 654 + }, + { + "KL/chosen_KL_mean": -654.7279663085938, + "KL/mean": -843.5281982421875, + "KL/rejected_KL_mean": -1032.328369140625, + "KL/std": 518.3046264648438, + "epoch": 0.9618208516886931, + "fcm_dpo/beta": 0.0009844074957072735, + "fcm_dpo/delta": 0.029366791248321533, + "fcm_dpo/margin": 377.60040283203125, + "fcm_dpo/q_t": 0.41697460412979126, + "grad_norm": 26.179494857788086, + "learning_rate": 2.397392281198729e-09, + "logits/chosen": -1.0613682270050049, + "logits/rejected": -1.1010310649871826, + "logps/chosen": -704.3048095703125, + "logps/ref_chosen": -49.576812744140625, + "logps/ref_rejected": -98.29183197021484, + "logps/rejected": -1130.6201171875, + "loss": 1.1326, + "margin_dpo/margin_mean": 377.6003723144531, + "margin_dpo/margin_std": 612.6904296875, + "step": 655 + }, + { + "KL/chosen_KL_mean": -706.594970703125, + "KL/mean": -1030.3953857421875, + "KL/rejected_KL_mean": -1354.19580078125, + "KL/std": 699.4261474609375, + "epoch": 0.9632892804698973, + "fcm_dpo/beta": 0.0009554917924106121, + "fcm_dpo/delta": -0.23265045881271362, + "fcm_dpo/margin": 647.600830078125, + "fcm_dpo/q_t": 0.364484578371048, + "grad_norm": 79.35772705078125, + "learning_rate": 2.223355098446622e-09, + "logits/chosen": -0.9079943299293518, + "logits/rejected": -0.978103518486023, + "logps/chosen": -759.1444091796875, + "logps/ref_chosen": -52.54943084716797, + "logps/ref_rejected": -113.67464447021484, + "logps/rejected": -1467.870361328125, + "loss": 0.957, + "margin_dpo/margin_mean": 647.600830078125, + "margin_dpo/margin_std": 704.768310546875, + "step": 656 + }, + { + "KL/chosen_KL_mean": -660.0347290039062, + "KL/mean": -919.8479614257812, + "KL/rejected_KL_mean": -1179.6611328125, + "KL/std": 662.580810546875, + "epoch": 0.9647577092511013, + "fcm_dpo/beta": 0.0009273520554415882, + "fcm_dpo/delta": -0.08619820326566696, + "fcm_dpo/margin": 519.6265258789062, + "fcm_dpo/q_t": 0.3917388916015625, + "grad_norm": 33.481868743896484, + "learning_rate": 2.055847060721566e-09, + "logits/chosen": -1.0952857732772827, + "logits/rejected": -1.1410545110702515, + "logps/chosen": -706.7352294921875, + "logps/ref_chosen": -46.700538635253906, + "logps/ref_rejected": -97.91487121582031, + "logps/rejected": -1277.5760498046875, + "loss": 1.051, + "margin_dpo/margin_mean": 519.6265258789062, + "margin_dpo/margin_std": 686.9745483398438, + "step": 657 + }, + { + "KL/chosen_KL_mean": -716.8538818359375, + "KL/mean": -926.1987915039062, + "KL/rejected_KL_mean": -1135.543701171875, + "KL/std": 523.1241455078125, + "epoch": 0.9662261380323054, + "fcm_dpo/beta": 0.0009218085906468332, + "fcm_dpo/delta": 0.014132943004369736, + "fcm_dpo/margin": 418.6898193359375, + "fcm_dpo/q_t": 0.4112616181373596, + "grad_norm": 32.001991271972656, + "learning_rate": 1.8948725820160662e-09, + "logits/chosen": -1.0506086349487305, + "logits/rejected": -1.0856657028198242, + "logps/chosen": -777.8120727539062, + "logps/ref_chosen": -60.95820999145508, + "logps/ref_rejected": -95.93949127197266, + "logps/rejected": -1231.483154296875, + "loss": 1.1103, + "margin_dpo/margin_mean": 418.6898193359375, + "margin_dpo/margin_std": 594.6806640625, + "step": 658 + }, + { + "KL/chosen_KL_mean": -614.8621826171875, + "KL/mean": -816.10400390625, + "KL/rejected_KL_mean": -1017.3458251953125, + "KL/std": 519.0294189453125, + "epoch": 0.9676945668135095, + "fcm_dpo/beta": 0.0009279932710342109, + "fcm_dpo/delta": 0.02739275060594082, + "fcm_dpo/margin": 402.483642578125, + "fcm_dpo/q_t": 0.4154208302497864, + "grad_norm": 38.490875244140625, + "learning_rate": 1.7404359041573723e-09, + "logits/chosen": -0.9568224549293518, + "logits/rejected": -0.9347273111343384, + "logps/chosen": -691.6051635742188, + "logps/ref_chosen": -76.74298095703125, + "logps/ref_rejected": -87.4709701538086, + "logps/rejected": -1104.8167724609375, + "loss": 1.1164, + "margin_dpo/margin_mean": 402.483642578125, + "margin_dpo/margin_std": 586.9424438476562, + "step": 659 + }, + { + "KL/chosen_KL_mean": -670.32568359375, + "KL/mean": -932.9974365234375, + "KL/rejected_KL_mean": -1195.6693115234375, + "KL/std": 613.5252685546875, + "epoch": 0.9691629955947136, + "fcm_dpo/beta": 0.0009214339079335332, + "fcm_dpo/delta": -0.08826512098312378, + "fcm_dpo/margin": 525.3435668945312, + "fcm_dpo/q_t": 0.3903365135192871, + "grad_norm": 45.50758743286133, + "learning_rate": 1.592541096695571e-09, + "logits/chosen": -1.0583207607269287, + "logits/rejected": -1.0816309452056885, + "logps/chosen": -729.3735961914062, + "logps/ref_chosen": -59.04788589477539, + "logps/ref_rejected": -75.96005249023438, + "logps/rejected": -1271.62939453125, + "loss": 1.0413, + "margin_dpo/margin_mean": 525.3435668945312, + "margin_dpo/margin_std": 652.117431640625, + "step": 660 + }, + { + "KL/chosen_KL_mean": -593.3245849609375, + "KL/mean": -826.2509765625, + "KL/rejected_KL_mean": -1059.177490234375, + "KL/std": 661.68408203125, + "epoch": 0.9706314243759178, + "fcm_dpo/beta": 0.0009160168119706213, + "fcm_dpo/delta": -0.028154436498880386, + "fcm_dpo/margin": 465.85284423828125, + "fcm_dpo/q_t": 0.40546107292175293, + "grad_norm": 44.195091247558594, + "learning_rate": 1.4511920567963908e-09, + "logits/chosen": -1.0162544250488281, + "logits/rejected": -1.0294699668884277, + "logps/chosen": -643.99853515625, + "logps/ref_chosen": -50.673973083496094, + "logps/ref_rejected": -86.00569152832031, + "logps/rejected": -1145.18310546875, + "loss": 1.0806, + "margin_dpo/margin_mean": 465.85284423828125, + "margin_dpo/margin_std": 654.1995849609375, + "step": 661 + }, + { + "KL/chosen_KL_mean": -686.9578857421875, + "KL/mean": -869.9920043945312, + "KL/rejected_KL_mean": -1053.026123046875, + "KL/std": 554.1969604492188, + "epoch": 0.9720998531571219, + "fcm_dpo/beta": 0.0009198813932016492, + "fcm_dpo/delta": 0.06546258926391602, + "fcm_dpo/margin": 366.068359375, + "fcm_dpo/q_t": 0.42443883419036865, + "grad_norm": 27.159738540649414, + "learning_rate": 1.3163925091384532e-09, + "logits/chosen": -0.9714980125427246, + "logits/rejected": -0.9738001823425293, + "logps/chosen": -756.2188720703125, + "logps/ref_chosen": -69.26106262207031, + "logps/ref_rejected": -89.05593872070312, + "logps/rejected": -1142.08203125, + "loss": 1.1657, + "margin_dpo/margin_mean": 366.068359375, + "margin_dpo/margin_std": 672.253662109375, + "step": 662 + }, + { + "KL/chosen_KL_mean": -672.4644775390625, + "KL/mean": -889.0924072265625, + "KL/rejected_KL_mean": -1105.72021484375, + "KL/std": 637.3165283203125, + "epoch": 0.973568281938326, + "fcm_dpo/beta": 0.0009213130106218159, + "fcm_dpo/delta": 0.0008019153028726578, + "fcm_dpo/margin": 433.2557373046875, + "fcm_dpo/q_t": 0.4122500717639923, + "grad_norm": 26.556825637817383, + "learning_rate": 1.1881460058152382e-09, + "logits/chosen": -1.0173933506011963, + "logits/rejected": -1.0415921211242676, + "logps/chosen": -737.3433837890625, + "logps/ref_chosen": -64.87890625, + "logps/ref_rejected": -113.92536926269531, + "logps/rejected": -1219.6456298828125, + "loss": 1.1271, + "margin_dpo/margin_mean": 433.25579833984375, + "margin_dpo/margin_std": 735.6624145507812, + "step": 663 + }, + { + "KL/chosen_KL_mean": -696.9286499023438, + "KL/mean": -946.314208984375, + "KL/rejected_KL_mean": -1195.69970703125, + "KL/std": 625.337158203125, + "epoch": 0.9750367107195301, + "fcm_dpo/beta": 0.0009112852858379483, + "fcm_dpo/delta": -0.057732854038476944, + "fcm_dpo/margin": 498.7710266113281, + "fcm_dpo/q_t": 0.39850109815597534, + "grad_norm": 29.249494552612305, + "learning_rate": 1.066455926241383e-09, + "logits/chosen": -1.0290577411651611, + "logits/rejected": -1.0633100271224976, + "logps/chosen": -757.817138671875, + "logps/ref_chosen": -60.88847351074219, + "logps/ref_rejected": -105.521728515625, + "logps/rejected": -1301.221435546875, + "loss": 1.0681, + "margin_dpo/margin_mean": 498.77105712890625, + "margin_dpo/margin_std": 673.9359130859375, + "step": 664 + }, + { + "KL/chosen_KL_mean": -624.91357421875, + "KL/mean": -831.52734375, + "KL/rejected_KL_mean": -1038.14111328125, + "KL/std": 521.3961791992188, + "epoch": 0.9765051395007343, + "fcm_dpo/beta": 0.0009156306041404605, + "fcm_dpo/delta": 0.02248411625623703, + "fcm_dpo/margin": 413.22760009765625, + "fcm_dpo/q_t": 0.4127081632614136, + "grad_norm": 39.16621780395508, + "learning_rate": 9.513254770636137e-10, + "logits/chosen": -1.1201207637786865, + "logits/rejected": -1.1449217796325684, + "logps/chosen": -685.4776611328125, + "logps/ref_chosen": -60.56413269042969, + "logps/ref_rejected": -84.80882263183594, + "logps/rejected": -1122.949951171875, + "loss": 1.0981, + "margin_dpo/margin_mean": 413.22760009765625, + "margin_dpo/margin_std": 524.672607421875, + "step": 665 + }, + { + "KL/chosen_KL_mean": -664.80712890625, + "KL/mean": -876.2521362304688, + "KL/rejected_KL_mean": -1087.697021484375, + "KL/std": 528.6064453125, + "epoch": 0.9779735682819384, + "fcm_dpo/beta": 0.0009172533173114061, + "fcm_dpo/delta": 0.012579199858009815, + "fcm_dpo/margin": 422.88995361328125, + "fcm_dpo/q_t": 0.41190439462661743, + "grad_norm": 25.72242546081543, + "learning_rate": 8.427576920763956e-10, + "logits/chosen": -0.9221373200416565, + "logits/rejected": -0.9332491755485535, + "logps/chosen": -729.2271118164062, + "logps/ref_chosen": -64.41996002197266, + "logps/ref_rejected": -95.8916244506836, + "logps/rejected": -1183.5887451171875, + "loss": 1.106, + "margin_dpo/margin_mean": 422.88995361328125, + "margin_dpo/margin_std": 591.49951171875, + "step": 666 + }, + { + "KL/chosen_KL_mean": -747.1198120117188, + "KL/mean": -997.51904296875, + "KL/rejected_KL_mean": -1247.918212890625, + "KL/std": 593.6838989257812, + "epoch": 0.9794419970631424, + "fcm_dpo/beta": 0.0009089080849662423, + "fcm_dpo/delta": -0.058049269020557404, + "fcm_dpo/margin": 500.79840087890625, + "fcm_dpo/q_t": 0.3967137038707733, + "grad_norm": 35.70097732543945, + "learning_rate": 7.407554321417764e-10, + "logits/chosen": -0.9862961769104004, + "logits/rejected": -0.9898433089256287, + "logps/chosen": -816.3968505859375, + "logps/ref_chosen": -69.27702331542969, + "logps/ref_rejected": -87.83549499511719, + "logps/rejected": -1335.753662109375, + "loss": 1.0614, + "margin_dpo/margin_mean": 500.79840087890625, + "margin_dpo/margin_std": 649.6275634765625, + "step": 667 + }, + { + "KL/chosen_KL_mean": -799.2650756835938, + "KL/mean": -969.44189453125, + "KL/rejected_KL_mean": -1139.61865234375, + "KL/std": 626.5878295898438, + "epoch": 0.9809104258443465, + "fcm_dpo/beta": 0.0009242600062862039, + "fcm_dpo/delta": 0.08742087334394455, + "fcm_dpo/margin": 340.3535461425781, + "fcm_dpo/q_t": 0.43164360523223877, + "grad_norm": 46.50619888305664, + "learning_rate": 6.453213851142225e-10, + "logits/chosen": -1.027015209197998, + "logits/rejected": -1.0318031311035156, + "logps/chosen": -871.8690795898438, + "logps/ref_chosen": -72.60400390625, + "logps/ref_rejected": -103.73905944824219, + "logps/rejected": -1243.357666015625, + "loss": 1.2061, + "margin_dpo/margin_mean": 340.35357666015625, + "margin_dpo/margin_std": 745.7200927734375, + "step": 668 + }, + { + "KL/chosen_KL_mean": -607.0186157226562, + "KL/mean": -848.4479370117188, + "KL/rejected_KL_mean": -1089.877197265625, + "KL/std": 578.434326171875, + "epoch": 0.9823788546255506, + "fcm_dpo/beta": 0.0009196557221002877, + "fcm_dpo/delta": -0.04609519988298416, + "fcm_dpo/margin": 482.8586120605469, + "fcm_dpo/q_t": 0.39873257279396057, + "grad_norm": 24.051504135131836, + "learning_rate": 5.564580657695939e-10, + "logits/chosen": -1.0002648830413818, + "logits/rejected": -1.0088210105895996, + "logps/chosen": -653.135009765625, + "logps/ref_chosen": -46.116416931152344, + "logps/ref_rejected": -77.92434692382812, + "logps/rejected": -1167.801513671875, + "loss": 1.0728, + "margin_dpo/margin_mean": 482.8586120605469, + "margin_dpo/margin_std": 649.423583984375, + "step": 669 + }, + { + "KL/chosen_KL_mean": -589.9717407226562, + "KL/mean": -834.8968505859375, + "KL/rejected_KL_mean": -1079.822021484375, + "KL/std": 548.0260009765625, + "epoch": 0.9838472834067548, + "fcm_dpo/beta": 0.0009132723789662123, + "fcm_dpo/delta": -0.04978980869054794, + "fcm_dpo/margin": 489.85028076171875, + "fcm_dpo/q_t": 0.3985205292701721, + "grad_norm": 27.57679557800293, + "learning_rate": 4.741678157389739e-10, + "logits/chosen": -0.9369679689407349, + "logits/rejected": -0.956099271774292, + "logps/chosen": -652.3175048828125, + "logps/ref_chosen": -62.34575271606445, + "logps/ref_rejected": -96.9405517578125, + "logps/rejected": -1176.7625732421875, + "loss": 1.0764, + "margin_dpo/margin_mean": 489.85028076171875, + "margin_dpo/margin_std": 666.9021606445312, + "step": 670 + }, + { + "KL/chosen_KL_mean": -739.0850219726562, + "KL/mean": -940.6197509765625, + "KL/rejected_KL_mean": -1142.154541015625, + "KL/std": 546.3319091796875, + "epoch": 0.9853157121879589, + "fcm_dpo/beta": 0.0009144209325313568, + "fcm_dpo/delta": 0.03224237263202667, + "fcm_dpo/margin": 403.0694580078125, + "fcm_dpo/q_t": 0.4150552749633789, + "grad_norm": 31.614513397216797, + "learning_rate": 3.9845280344705245e-10, + "logits/chosen": -1.047501564025879, + "logits/rejected": -1.0776722431182861, + "logps/chosen": -787.0851440429688, + "logps/ref_chosen": -48.00010681152344, + "logps/ref_rejected": -83.81932067871094, + "logps/rejected": -1225.973876953125, + "loss": 1.1359, + "margin_dpo/margin_mean": 403.0694885253906, + "margin_dpo/margin_std": 654.9969482421875, + "step": 671 + }, + { + "KL/chosen_KL_mean": -817.5101318359375, + "KL/mean": -1022.951904296875, + "KL/rejected_KL_mean": -1228.3935546875, + "KL/std": 672.7301025390625, + "epoch": 0.986784140969163, + "fcm_dpo/beta": 0.0009142364142462611, + "fcm_dpo/delta": 0.02527567557990551, + "fcm_dpo/margin": 410.8834228515625, + "fcm_dpo/q_t": 0.4172729551792145, + "grad_norm": 50.31674575805664, + "learning_rate": 3.293150240547549e-10, + "logits/chosen": -1.097043514251709, + "logits/rejected": -1.1035444736480713, + "logps/chosen": -876.0934448242188, + "logps/ref_chosen": -58.58328628540039, + "logps/ref_rejected": -93.14015197753906, + "logps/rejected": -1321.53369140625, + "loss": 1.1531, + "margin_dpo/margin_mean": 410.8834228515625, + "margin_dpo/margin_std": 742.9523315429688, + "step": 672 + }, + { + "KL/chosen_KL_mean": -726.111572265625, + "KL/mean": -917.39892578125, + "KL/rejected_KL_mean": -1108.686279296875, + "KL/std": 555.8173828125, + "epoch": 0.9882525697503671, + "fcm_dpo/beta": 0.0009239012142643332, + "fcm_dpo/delta": 0.048241592943668365, + "fcm_dpo/margin": 382.5746765136719, + "fcm_dpo/q_t": 0.41999146342277527, + "grad_norm": 33.52194595336914, + "learning_rate": 2.6675629940689504e-10, + "logits/chosen": -1.051267147064209, + "logits/rejected": -1.0552277565002441, + "logps/chosen": -772.8348388671875, + "logps/ref_chosen": -46.72320556640625, + "logps/ref_rejected": -85.29623413085938, + "logps/rejected": -1193.982421875, + "loss": 1.1356, + "margin_dpo/margin_mean": 382.5746765136719, + "margin_dpo/margin_std": 613.573974609375, + "step": 673 + }, + { + "KL/chosen_KL_mean": -596.3148803710938, + "KL/mean": -841.9620971679688, + "KL/rejected_KL_mean": -1087.6092529296875, + "KL/std": 549.2061157226562, + "epoch": 0.9897209985315712, + "fcm_dpo/beta": 0.0009207893162965775, + "fcm_dpo/delta": -0.05484557896852493, + "fcm_dpo/margin": 491.2944641113281, + "fcm_dpo/q_t": 0.39986640214920044, + "grad_norm": 42.7830810546875, + "learning_rate": 2.1077827798404725e-10, + "logits/chosen": -0.9467175602912903, + "logits/rejected": -0.96770179271698, + "logps/chosen": -641.7603759765625, + "logps/ref_chosen": -45.445526123046875, + "logps/ref_rejected": -70.04593658447266, + "logps/rejected": -1157.6552734375, + "loss": 1.0662, + "margin_dpo/margin_mean": 491.29443359375, + "margin_dpo/margin_std": 659.54931640625, + "step": 674 + }, + { + "KL/chosen_KL_mean": -681.6663818359375, + "KL/mean": -935.557861328125, + "KL/rejected_KL_mean": -1189.44921875, + "KL/std": 612.2513427734375, + "epoch": 0.9911894273127754, + "fcm_dpo/beta": 0.0009013206581585109, + "fcm_dpo/delta": -0.06186992675065994, + "fcm_dpo/margin": 507.7829284667969, + "fcm_dpo/q_t": 0.3982793688774109, + "grad_norm": 28.38162612915039, + "learning_rate": 1.6138243485910863e-10, + "logits/chosen": -1.023393154144287, + "logits/rejected": -1.0347087383270264, + "logps/chosen": -725.8426513671875, + "logps/ref_chosen": -44.17628479003906, + "logps/ref_rejected": -74.09197998046875, + "logps/rejected": -1263.541259765625, + "loss": 1.0636, + "margin_dpo/margin_mean": 507.782958984375, + "margin_dpo/margin_std": 652.8712158203125, + "step": 675 + }, + { + "KL/chosen_KL_mean": -729.031005859375, + "KL/mean": -965.601318359375, + "KL/rejected_KL_mean": -1202.1717529296875, + "KL/std": 577.6213989257812, + "epoch": 0.9926578560939795, + "fcm_dpo/beta": 0.0009026783518493176, + "fcm_dpo/delta": -0.028323372825980186, + "fcm_dpo/margin": 473.1407775878906, + "fcm_dpo/q_t": 0.4024215042591095, + "grad_norm": 25.832805633544922, + "learning_rate": 1.1857007165852472e-10, + "logits/chosen": -0.9483187198638916, + "logits/rejected": -0.9624805450439453, + "logps/chosen": -800.4295043945312, + "logps/ref_chosen": -71.39852905273438, + "logps/ref_rejected": -88.3587646484375, + "logps/rejected": -1290.530517578125, + "loss": 1.0689, + "margin_dpo/margin_mean": 473.14080810546875, + "margin_dpo/margin_std": 579.1337280273438, + "step": 676 + }, + { + "KL/chosen_KL_mean": -734.3248291015625, + "KL/mean": -961.4959716796875, + "KL/rejected_KL_mean": -1188.667236328125, + "KL/std": 600.77294921875, + "epoch": 0.9941262848751835, + "fcm_dpo/beta": 0.0008973192889243364, + "fcm_dpo/delta": -0.008067594841122627, + "fcm_dpo/margin": 454.34246826171875, + "fcm_dpo/q_t": 0.41222789883613586, + "grad_norm": 29.9565372467041, + "learning_rate": 8.23423165278725e-11, + "logits/chosen": -1.0780959129333496, + "logits/rejected": -1.0737848281860352, + "logps/chosen": -790.8522338867188, + "logps/ref_chosen": -56.527435302734375, + "logps/ref_rejected": -78.22654724121094, + "logps/rejected": -1266.893798828125, + "loss": 1.1093, + "margin_dpo/margin_mean": 454.34246826171875, + "margin_dpo/margin_std": 722.945068359375, + "step": 677 + }, + { + "KL/chosen_KL_mean": -620.3502197265625, + "KL/mean": -886.8961181640625, + "KL/rejected_KL_mean": -1153.44189453125, + "KL/std": 635.0799560546875, + "epoch": 0.9955947136563876, + "fcm_dpo/beta": 0.0008914459031075239, + "fcm_dpo/delta": -0.07900315523147583, + "fcm_dpo/margin": 533.0916137695312, + "fcm_dpo/q_t": 0.39344462752342224, + "grad_norm": 33.316654205322266, + "learning_rate": 5.270012410216185e-11, + "logits/chosen": -0.9998750686645508, + "logits/rejected": -1.036036491394043, + "logps/chosen": -666.4847412109375, + "logps/ref_chosen": -46.13447570800781, + "logps/ref_rejected": -80.60462951660156, + "logps/rejected": -1234.0465087890625, + "loss": 1.058, + "margin_dpo/margin_mean": 533.0916137695312, + "margin_dpo/margin_std": 715.0037841796875, + "step": 678 + }, + { + "KL/chosen_KL_mean": -701.3464965820312, + "KL/mean": -892.486328125, + "KL/rejected_KL_mean": -1083.626220703125, + "KL/std": 525.0972900390625, + "epoch": 0.9970631424375918, + "fcm_dpo/beta": 0.0008914553327485919, + "fcm_dpo/delta": 0.06134221330285072, + "fcm_dpo/margin": 382.27972412109375, + "fcm_dpo/q_t": 0.42303356528282166, + "grad_norm": 31.741161346435547, + "learning_rate": 2.9644275480772416e-11, + "logits/chosen": -1.020527720451355, + "logits/rejected": -1.010463833808899, + "logps/chosen": -751.6414184570312, + "logps/ref_chosen": -50.294921875, + "logps/ref_rejected": -76.59813690185547, + "logps/rejected": -1160.224365234375, + "loss": 1.1415, + "margin_dpo/margin_mean": 382.2797546386719, + "margin_dpo/margin_std": 604.84912109375, + "step": 679 + }, + { + "KL/chosen_KL_mean": -712.27880859375, + "KL/mean": -966.59423828125, + "KL/rejected_KL_mean": -1220.90966796875, + "KL/std": 689.5088500976562, + "epoch": 0.9985315712187959, + "fcm_dpo/beta": 0.0008838686626404524, + "fcm_dpo/delta": -0.05259976163506508, + "fcm_dpo/margin": 508.6307678222656, + "fcm_dpo/q_t": 0.3975260853767395, + "grad_norm": 40.12180709838867, + "learning_rate": 1.31753782067201e-11, + "logits/chosen": -1.00642728805542, + "logits/rejected": -1.0368506908416748, + "logps/chosen": -789.1945190429688, + "logps/ref_chosen": -76.91569519042969, + "logps/ref_rejected": -112.384765625, + "logps/rejected": -1333.29443359375, + "loss": 1.0861, + "margin_dpo/margin_mean": 508.6307678222656, + "margin_dpo/margin_std": 746.896484375, + "step": 680 + }, + { + "KL/chosen_KL_mean": -710.1919555664062, + "KL/mean": -909.7540283203125, + "KL/rejected_KL_mean": -1109.316162109375, + "KL/std": 567.23486328125, + "epoch": 1.0, + "fcm_dpo/beta": 0.0008977074176073074, + "fcm_dpo/delta": 0.04187752678990364, + "fcm_dpo/margin": 399.12420654296875, + "fcm_dpo/q_t": 0.4188354015350342, + "grad_norm": 33.870479583740234, + "learning_rate": 3.2938662507808745e-12, + "logits/chosen": -1.0663830041885376, + "logits/rejected": -1.0852856636047363, + "logps/chosen": -771.1492309570312, + "logps/ref_chosen": -60.957279205322266, + "logps/ref_rejected": -88.55797576904297, + "logps/rejected": -1197.8740234375, + "loss": 1.1392, + "margin_dpo/margin_mean": 399.1241760253906, + "margin_dpo/margin_std": 622.114501953125, + "step": 681 + }, + { + "epoch": 1.0, + "step": 681, + "total_flos": 0.0, + "train_loss": 1.0911195537242244, + "train_runtime": 1739.0324, + "train_samples_per_second": 25.07, + "train_steps_per_second": 0.392 + } + ], + "logging_steps": 1, + "max_steps": 681, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}