From 3958945219e597218b49798f3c149ce00d266365 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Fri, 22 May 2026 21:14:21 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: W-61/llama3-hh-helpful-qt045-b0p5-20260429-085449 Source: Original Platform --- .gitattributes | 36 + README.md | 62 + all_results.json | 9 + config.json | 29 + generation_config.json | 9 + margin_logs/margins.jsonl | 681 ++ margin_logs/step_0000001.npy | 3 + margin_logs/step_0000002.npy | 3 + margin_logs/step_0000003.npy | 3 + margin_logs/step_0000004.npy | 3 + margin_logs/step_0000005.npy | 3 + margin_logs/step_0000006.npy | 3 + margin_logs/step_0000007.npy | 3 + margin_logs/step_0000008.npy | 3 + margin_logs/step_0000009.npy | 3 + margin_logs/step_0000010.npy | 3 + margin_logs/step_0000011.npy | 3 + margin_logs/step_0000012.npy | 3 + margin_logs/step_0000013.npy | 3 + margin_logs/step_0000014.npy | 3 + margin_logs/step_0000015.npy | 3 + margin_logs/step_0000016.npy | 3 + margin_logs/step_0000017.npy | 3 + margin_logs/step_0000018.npy | 3 + margin_logs/step_0000019.npy | 3 + margin_logs/step_0000020.npy | 3 + margin_logs/step_0000021.npy | 3 + margin_logs/step_0000022.npy | 3 + margin_logs/step_0000023.npy | 3 + margin_logs/step_0000024.npy | 3 + margin_logs/step_0000025.npy | 3 + margin_logs/step_0000026.npy | 3 + margin_logs/step_0000027.npy | 3 + margin_logs/step_0000028.npy | 3 + margin_logs/step_0000029.npy | 3 + margin_logs/step_0000030.npy | 3 + margin_logs/step_0000031.npy | 3 + margin_logs/step_0000032.npy | 3 + margin_logs/step_0000033.npy | 3 + margin_logs/step_0000034.npy | 3 + margin_logs/step_0000035.npy | 3 + margin_logs/step_0000036.npy | 3 + margin_logs/step_0000037.npy | 3 + margin_logs/step_0000038.npy | 3 + margin_logs/step_0000039.npy | 3 + margin_logs/step_0000040.npy | 3 + margin_logs/step_0000041.npy | 3 + margin_logs/step_0000042.npy | 3 + margin_logs/step_0000043.npy | 3 + margin_logs/step_0000044.npy | 3 + margin_logs/step_0000045.npy | 3 + margin_logs/step_0000046.npy | 3 + margin_logs/step_0000047.npy | 3 + margin_logs/step_0000048.npy | 3 + margin_logs/step_0000049.npy | 3 + margin_logs/step_0000050.npy | 3 + margin_logs/step_0000051.npy | 3 + margin_logs/step_0000052.npy | 3 + margin_logs/step_0000053.npy | 3 + margin_logs/step_0000054.npy | 3 + margin_logs/step_0000055.npy | 3 + margin_logs/step_0000056.npy | 3 + margin_logs/step_0000057.npy | 3 + margin_logs/step_0000058.npy | 3 + margin_logs/step_0000059.npy | 3 + margin_logs/step_0000060.npy | 3 + margin_logs/step_0000061.npy | 3 + margin_logs/step_0000062.npy | 3 + margin_logs/step_0000063.npy | 3 + margin_logs/step_0000064.npy | 3 + margin_logs/step_0000065.npy | 3 + margin_logs/step_0000066.npy | 3 + margin_logs/step_0000067.npy | 3 + margin_logs/step_0000068.npy | 3 + margin_logs/step_0000069.npy | 3 + margin_logs/step_0000070.npy | 3 + margin_logs/step_0000071.npy | 3 + margin_logs/step_0000072.npy | 3 + margin_logs/step_0000073.npy | 3 + margin_logs/step_0000074.npy | 3 + margin_logs/step_0000075.npy | 3 + margin_logs/step_0000076.npy | 3 + margin_logs/step_0000077.npy | 3 + margin_logs/step_0000078.npy | 3 + margin_logs/step_0000079.npy | 3 + margin_logs/step_0000080.npy | 3 + margin_logs/step_0000081.npy | 3 + margin_logs/step_0000082.npy | 3 + margin_logs/step_0000083.npy | 3 + margin_logs/step_0000084.npy | 3 + margin_logs/step_0000085.npy | 3 + margin_logs/step_0000086.npy | 3 + margin_logs/step_0000087.npy | 3 + margin_logs/step_0000088.npy | 3 + margin_logs/step_0000089.npy | 3 + margin_logs/step_0000090.npy | 3 + margin_logs/step_0000091.npy | 3 + margin_logs/step_0000092.npy | 3 + margin_logs/step_0000093.npy | 3 + margin_logs/step_0000094.npy | 3 + margin_logs/step_0000095.npy | 3 + margin_logs/step_0000096.npy | 3 + margin_logs/step_0000097.npy | 3 + margin_logs/step_0000098.npy | 3 + margin_logs/step_0000099.npy | 3 + margin_logs/step_0000100.npy | 3 + margin_logs/step_0000101.npy | 3 + margin_logs/step_0000102.npy | 3 + margin_logs/step_0000103.npy | 3 + margin_logs/step_0000104.npy | 3 + margin_logs/step_0000105.npy | 3 + margin_logs/step_0000106.npy | 3 + margin_logs/step_0000107.npy | 3 + margin_logs/step_0000108.npy | 3 + margin_logs/step_0000109.npy | 3 + margin_logs/step_0000110.npy | 3 + margin_logs/step_0000111.npy | 3 + margin_logs/step_0000112.npy | 3 + margin_logs/step_0000113.npy | 3 + margin_logs/step_0000114.npy | 3 + margin_logs/step_0000115.npy | 3 + margin_logs/step_0000116.npy | 3 + margin_logs/step_0000117.npy | 3 + margin_logs/step_0000118.npy | 3 + margin_logs/step_0000119.npy | 3 + margin_logs/step_0000120.npy | 3 + margin_logs/step_0000121.npy | 3 + margin_logs/step_0000122.npy | 3 + margin_logs/step_0000123.npy | 3 + margin_logs/step_0000124.npy | 3 + margin_logs/step_0000125.npy | 3 + margin_logs/step_0000126.npy | 3 + margin_logs/step_0000127.npy | 3 + margin_logs/step_0000128.npy | 3 + margin_logs/step_0000129.npy | 3 + margin_logs/step_0000130.npy | 3 + margin_logs/step_0000131.npy | 3 + margin_logs/step_0000132.npy | 3 + margin_logs/step_0000133.npy | 3 + margin_logs/step_0000134.npy | 3 + margin_logs/step_0000135.npy | 3 + margin_logs/step_0000136.npy | 3 + margin_logs/step_0000137.npy | 3 + margin_logs/step_0000138.npy | 3 + margin_logs/step_0000139.npy | 3 + margin_logs/step_0000140.npy | 3 + margin_logs/step_0000141.npy | 3 + margin_logs/step_0000142.npy | 3 + margin_logs/step_0000143.npy | 3 + margin_logs/step_0000144.npy | 3 + margin_logs/step_0000145.npy | 3 + margin_logs/step_0000146.npy | 3 + margin_logs/step_0000147.npy | 3 + margin_logs/step_0000148.npy | 3 + margin_logs/step_0000149.npy | 3 + margin_logs/step_0000150.npy | 3 + margin_logs/step_0000151.npy | 3 + margin_logs/step_0000152.npy | 3 + margin_logs/step_0000153.npy | 3 + margin_logs/step_0000154.npy | 3 + margin_logs/step_0000155.npy | 3 + margin_logs/step_0000156.npy | 3 + margin_logs/step_0000157.npy | 3 + margin_logs/step_0000158.npy | 3 + margin_logs/step_0000159.npy | 3 + margin_logs/step_0000160.npy | 3 + margin_logs/step_0000161.npy | 3 + margin_logs/step_0000162.npy | 3 + margin_logs/step_0000163.npy | 3 + margin_logs/step_0000164.npy | 3 + margin_logs/step_0000165.npy | 3 + margin_logs/step_0000166.npy | 3 + margin_logs/step_0000167.npy | 3 + margin_logs/step_0000168.npy | 3 + margin_logs/step_0000169.npy | 3 + margin_logs/step_0000170.npy | 3 + margin_logs/step_0000171.npy | 3 + margin_logs/step_0000172.npy | 3 + margin_logs/step_0000173.npy | 3 + margin_logs/step_0000174.npy | 3 + margin_logs/step_0000175.npy | 3 + margin_logs/step_0000176.npy | 3 + margin_logs/step_0000177.npy | 3 + margin_logs/step_0000178.npy | 3 + margin_logs/step_0000179.npy | 3 + margin_logs/step_0000180.npy | 3 + margin_logs/step_0000181.npy | 3 + margin_logs/step_0000182.npy | 3 + margin_logs/step_0000183.npy | 3 + margin_logs/step_0000184.npy | 3 + margin_logs/step_0000185.npy | 3 + margin_logs/step_0000186.npy | 3 + margin_logs/step_0000187.npy | 3 + margin_logs/step_0000188.npy | 3 + margin_logs/step_0000189.npy | 3 + margin_logs/step_0000190.npy | 3 + margin_logs/step_0000191.npy | 3 + margin_logs/step_0000192.npy | 3 + margin_logs/step_0000193.npy | 3 + margin_logs/step_0000194.npy | 3 + margin_logs/step_0000195.npy | 3 + margin_logs/step_0000196.npy | 3 + margin_logs/step_0000197.npy | 3 + margin_logs/step_0000198.npy | 3 + margin_logs/step_0000199.npy | 3 + margin_logs/step_0000200.npy | 3 + margin_logs/step_0000201.npy | 3 + margin_logs/step_0000202.npy | 3 + margin_logs/step_0000203.npy | 3 + margin_logs/step_0000204.npy | 3 + margin_logs/step_0000205.npy | 3 + margin_logs/step_0000206.npy | 3 + margin_logs/step_0000207.npy | 3 + margin_logs/step_0000208.npy | 3 + margin_logs/step_0000209.npy | 3 + margin_logs/step_0000210.npy | 3 + margin_logs/step_0000211.npy | 3 + margin_logs/step_0000212.npy | 3 + margin_logs/step_0000213.npy | 3 + margin_logs/step_0000214.npy | 3 + margin_logs/step_0000215.npy | 3 + margin_logs/step_0000216.npy | 3 + margin_logs/step_0000217.npy | 3 + margin_logs/step_0000218.npy | 3 + margin_logs/step_0000219.npy | 3 + margin_logs/step_0000220.npy | 3 + margin_logs/step_0000221.npy | 3 + margin_logs/step_0000222.npy | 3 + margin_logs/step_0000223.npy | 3 + margin_logs/step_0000224.npy | 3 + margin_logs/step_0000225.npy | 3 + margin_logs/step_0000226.npy | 3 + margin_logs/step_0000227.npy | 3 + margin_logs/step_0000228.npy | 3 + margin_logs/step_0000229.npy | 3 + margin_logs/step_0000230.npy | 3 + margin_logs/step_0000231.npy | 3 + margin_logs/step_0000232.npy | 3 + margin_logs/step_0000233.npy | 3 + margin_logs/step_0000234.npy | 3 + margin_logs/step_0000235.npy | 3 + margin_logs/step_0000236.npy | 3 + margin_logs/step_0000237.npy | 3 + margin_logs/step_0000238.npy | 3 + margin_logs/step_0000239.npy | 3 + margin_logs/step_0000240.npy | 3 + margin_logs/step_0000241.npy | 3 + margin_logs/step_0000242.npy | 3 + margin_logs/step_0000243.npy | 3 + margin_logs/step_0000244.npy | 3 + margin_logs/step_0000245.npy | 3 + margin_logs/step_0000246.npy | 3 + margin_logs/step_0000247.npy | 3 + margin_logs/step_0000248.npy | 3 + margin_logs/step_0000249.npy | 3 + margin_logs/step_0000250.npy | 3 + margin_logs/step_0000251.npy | 3 + margin_logs/step_0000252.npy | 3 + margin_logs/step_0000253.npy | 3 + margin_logs/step_0000254.npy | 3 + margin_logs/step_0000255.npy | 3 + margin_logs/step_0000256.npy | 3 + margin_logs/step_0000257.npy | 3 + margin_logs/step_0000258.npy | 3 + margin_logs/step_0000259.npy | 3 + margin_logs/step_0000260.npy | 3 + margin_logs/step_0000261.npy | 3 + margin_logs/step_0000262.npy | 3 + margin_logs/step_0000263.npy | 3 + margin_logs/step_0000264.npy | 3 + margin_logs/step_0000265.npy | 3 + margin_logs/step_0000266.npy | 3 + margin_logs/step_0000267.npy | 3 + margin_logs/step_0000268.npy | 3 + margin_logs/step_0000269.npy | 3 + margin_logs/step_0000270.npy | 3 + margin_logs/step_0000271.npy | 3 + margin_logs/step_0000272.npy | 3 + margin_logs/step_0000273.npy | 3 + margin_logs/step_0000274.npy | 3 + margin_logs/step_0000275.npy | 3 + margin_logs/step_0000276.npy | 3 + margin_logs/step_0000277.npy | 3 + margin_logs/step_0000278.npy | 3 + margin_logs/step_0000279.npy | 3 + margin_logs/step_0000280.npy | 3 + margin_logs/step_0000281.npy | 3 + margin_logs/step_0000282.npy | 3 + margin_logs/step_0000283.npy | 3 + margin_logs/step_0000284.npy | 3 + margin_logs/step_0000285.npy | 3 + margin_logs/step_0000286.npy | 3 + margin_logs/step_0000287.npy | 3 + margin_logs/step_0000288.npy | 3 + margin_logs/step_0000289.npy | 3 + margin_logs/step_0000290.npy | 3 + margin_logs/step_0000291.npy | 3 + margin_logs/step_0000292.npy | 3 + margin_logs/step_0000293.npy | 3 + margin_logs/step_0000294.npy | 3 + margin_logs/step_0000295.npy | 3 + margin_logs/step_0000296.npy | 3 + margin_logs/step_0000297.npy | 3 + margin_logs/step_0000298.npy | 3 + margin_logs/step_0000299.npy | 3 + margin_logs/step_0000300.npy | 3 + margin_logs/step_0000301.npy | 3 + margin_logs/step_0000302.npy | 3 + margin_logs/step_0000303.npy | 3 + margin_logs/step_0000304.npy | 3 + margin_logs/step_0000305.npy | 3 + margin_logs/step_0000306.npy | 3 + margin_logs/step_0000307.npy | 3 + margin_logs/step_0000308.npy | 3 + margin_logs/step_0000309.npy | 3 + margin_logs/step_0000310.npy | 3 + margin_logs/step_0000311.npy | 3 + margin_logs/step_0000312.npy | 3 + margin_logs/step_0000313.npy | 3 + margin_logs/step_0000314.npy | 3 + margin_logs/step_0000315.npy | 3 + margin_logs/step_0000316.npy | 3 + margin_logs/step_0000317.npy | 3 + margin_logs/step_0000318.npy | 3 + margin_logs/step_0000319.npy | 3 + margin_logs/step_0000320.npy | 3 + margin_logs/step_0000321.npy | 3 + margin_logs/step_0000322.npy | 3 + margin_logs/step_0000323.npy | 3 + margin_logs/step_0000324.npy | 3 + margin_logs/step_0000325.npy | 3 + margin_logs/step_0000326.npy | 3 + margin_logs/step_0000327.npy | 3 + margin_logs/step_0000328.npy | 3 + margin_logs/step_0000329.npy | 3 + margin_logs/step_0000330.npy | 3 + margin_logs/step_0000331.npy | 3 + margin_logs/step_0000332.npy | 3 + margin_logs/step_0000333.npy | 3 + margin_logs/step_0000334.npy | 3 + margin_logs/step_0000335.npy | 3 + margin_logs/step_0000336.npy | 3 + margin_logs/step_0000337.npy | 3 + margin_logs/step_0000338.npy | 3 + margin_logs/step_0000339.npy | 3 + margin_logs/step_0000340.npy | 3 + margin_logs/step_0000341.npy | 3 + margin_logs/step_0000342.npy | 3 + margin_logs/step_0000343.npy | 3 + margin_logs/step_0000344.npy | 3 + margin_logs/step_0000345.npy | 3 + margin_logs/step_0000346.npy | 3 + margin_logs/step_0000347.npy | 3 + margin_logs/step_0000348.npy | 3 + margin_logs/step_0000349.npy | 3 + margin_logs/step_0000350.npy | 3 + margin_logs/step_0000351.npy | 3 + margin_logs/step_0000352.npy | 3 + margin_logs/step_0000353.npy | 3 + margin_logs/step_0000354.npy | 3 + margin_logs/step_0000355.npy | 3 + margin_logs/step_0000356.npy | 3 + margin_logs/step_0000357.npy | 3 + margin_logs/step_0000358.npy | 3 + margin_logs/step_0000359.npy | 3 + margin_logs/step_0000360.npy | 3 + margin_logs/step_0000361.npy | 3 + margin_logs/step_0000362.npy | 3 + margin_logs/step_0000363.npy | 3 + margin_logs/step_0000364.npy | 3 + margin_logs/step_0000365.npy | 3 + margin_logs/step_0000366.npy | 3 + margin_logs/step_0000367.npy | 3 + margin_logs/step_0000368.npy | 3 + margin_logs/step_0000369.npy | 3 + margin_logs/step_0000370.npy | 3 + margin_logs/step_0000371.npy | 3 + margin_logs/step_0000372.npy | 3 + margin_logs/step_0000373.npy | 3 + margin_logs/step_0000374.npy | 3 + margin_logs/step_0000375.npy | 3 + margin_logs/step_0000376.npy | 3 + margin_logs/step_0000377.npy | 3 + margin_logs/step_0000378.npy | 3 + margin_logs/step_0000379.npy | 3 + margin_logs/step_0000380.npy | 3 + margin_logs/step_0000381.npy | 3 + margin_logs/step_0000382.npy | 3 + margin_logs/step_0000383.npy | 3 + margin_logs/step_0000384.npy | 3 + margin_logs/step_0000385.npy | 3 + margin_logs/step_0000386.npy | 3 + margin_logs/step_0000387.npy | 3 + margin_logs/step_0000388.npy | 3 + margin_logs/step_0000389.npy | 3 + margin_logs/step_0000390.npy | 3 + margin_logs/step_0000391.npy | 3 + margin_logs/step_0000392.npy | 3 + margin_logs/step_0000393.npy | 3 + margin_logs/step_0000394.npy | 3 + margin_logs/step_0000395.npy | 3 + margin_logs/step_0000396.npy | 3 + margin_logs/step_0000397.npy | 3 + margin_logs/step_0000398.npy | 3 + margin_logs/step_0000399.npy | 3 + margin_logs/step_0000400.npy | 3 + margin_logs/step_0000401.npy | 3 + margin_logs/step_0000402.npy | 3 + margin_logs/step_0000403.npy | 3 + margin_logs/step_0000404.npy | 3 + margin_logs/step_0000405.npy | 3 + margin_logs/step_0000406.npy | 3 + margin_logs/step_0000407.npy | 3 + margin_logs/step_0000408.npy | 3 + margin_logs/step_0000409.npy | 3 + margin_logs/step_0000410.npy | 3 + margin_logs/step_0000411.npy | 3 + margin_logs/step_0000412.npy | 3 + margin_logs/step_0000413.npy | 3 + margin_logs/step_0000414.npy | 3 + margin_logs/step_0000415.npy | 3 + margin_logs/step_0000416.npy | 3 + margin_logs/step_0000417.npy | 3 + margin_logs/step_0000418.npy | 3 + margin_logs/step_0000419.npy | 3 + margin_logs/step_0000420.npy | 3 + margin_logs/step_0000421.npy | 3 + margin_logs/step_0000422.npy | 3 + margin_logs/step_0000423.npy | 3 + margin_logs/step_0000424.npy | 3 + margin_logs/step_0000425.npy | 3 + margin_logs/step_0000426.npy | 3 + margin_logs/step_0000427.npy | 3 + margin_logs/step_0000428.npy | 3 + margin_logs/step_0000429.npy | 3 + margin_logs/step_0000430.npy | 3 + margin_logs/step_0000431.npy | 3 + margin_logs/step_0000432.npy | 3 + margin_logs/step_0000433.npy | 3 + margin_logs/step_0000434.npy | 3 + margin_logs/step_0000435.npy | 3 + margin_logs/step_0000436.npy | 3 + margin_logs/step_0000437.npy | 3 + margin_logs/step_0000438.npy | 3 + margin_logs/step_0000439.npy | 3 + margin_logs/step_0000440.npy | 3 + margin_logs/step_0000441.npy | 3 + margin_logs/step_0000442.npy | 3 + margin_logs/step_0000443.npy | 3 + margin_logs/step_0000444.npy | 3 + margin_logs/step_0000445.npy | 3 + margin_logs/step_0000446.npy | 3 + margin_logs/step_0000447.npy | 3 + margin_logs/step_0000448.npy | 3 + margin_logs/step_0000449.npy | 3 + margin_logs/step_0000450.npy | 3 + margin_logs/step_0000451.npy | 3 + margin_logs/step_0000452.npy | 3 + margin_logs/step_0000453.npy | 3 + margin_logs/step_0000454.npy | 3 + margin_logs/step_0000455.npy | 3 + margin_logs/step_0000456.npy | 3 + margin_logs/step_0000457.npy | 3 + margin_logs/step_0000458.npy | 3 + margin_logs/step_0000459.npy | 3 + margin_logs/step_0000460.npy | 3 + margin_logs/step_0000461.npy | 3 + margin_logs/step_0000462.npy | 3 + margin_logs/step_0000463.npy | 3 + margin_logs/step_0000464.npy | 3 + margin_logs/step_0000465.npy | 3 + margin_logs/step_0000466.npy | 3 + margin_logs/step_0000467.npy | 3 + margin_logs/step_0000468.npy | 3 + margin_logs/step_0000469.npy | 3 + margin_logs/step_0000470.npy | 3 + margin_logs/step_0000471.npy | 3 + margin_logs/step_0000472.npy | 3 + margin_logs/step_0000473.npy | 3 + margin_logs/step_0000474.npy | 3 + margin_logs/step_0000475.npy | 3 + margin_logs/step_0000476.npy | 3 + margin_logs/step_0000477.npy | 3 + margin_logs/step_0000478.npy | 3 + margin_logs/step_0000479.npy | 3 + margin_logs/step_0000480.npy | 3 + margin_logs/step_0000481.npy | 3 + margin_logs/step_0000482.npy | 3 + margin_logs/step_0000483.npy | 3 + margin_logs/step_0000484.npy | 3 + margin_logs/step_0000485.npy | 3 + margin_logs/step_0000486.npy | 3 + margin_logs/step_0000487.npy | 3 + margin_logs/step_0000488.npy | 3 + margin_logs/step_0000489.npy | 3 + margin_logs/step_0000490.npy | 3 + margin_logs/step_0000491.npy | 3 + margin_logs/step_0000492.npy | 3 + margin_logs/step_0000493.npy | 3 + margin_logs/step_0000494.npy | 3 + margin_logs/step_0000495.npy | 3 + margin_logs/step_0000496.npy | 3 + margin_logs/step_0000497.npy | 3 + margin_logs/step_0000498.npy | 3 + margin_logs/step_0000499.npy | 3 + margin_logs/step_0000500.npy | 3 + margin_logs/step_0000501.npy | 3 + margin_logs/step_0000502.npy | 3 + margin_logs/step_0000503.npy | 3 + margin_logs/step_0000504.npy | 3 + margin_logs/step_0000505.npy | 3 + margin_logs/step_0000506.npy | 3 + margin_logs/step_0000507.npy | 3 + margin_logs/step_0000508.npy | 3 + margin_logs/step_0000509.npy | 3 + margin_logs/step_0000510.npy | 3 + margin_logs/step_0000511.npy | 3 + margin_logs/step_0000512.npy | 3 + margin_logs/step_0000513.npy | 3 + margin_logs/step_0000514.npy | 3 + margin_logs/step_0000515.npy | 3 + margin_logs/step_0000516.npy | 3 + margin_logs/step_0000517.npy | 3 + margin_logs/step_0000518.npy | 3 + margin_logs/step_0000519.npy | 3 + margin_logs/step_0000520.npy | 3 + margin_logs/step_0000521.npy | 3 + margin_logs/step_0000522.npy | 3 + margin_logs/step_0000523.npy | 3 + margin_logs/step_0000524.npy | 3 + margin_logs/step_0000525.npy | 3 + margin_logs/step_0000526.npy | 3 + margin_logs/step_0000527.npy | 3 + margin_logs/step_0000528.npy | 3 + margin_logs/step_0000529.npy | 3 + margin_logs/step_0000530.npy | 3 + margin_logs/step_0000531.npy | 3 + margin_logs/step_0000532.npy | 3 + margin_logs/step_0000533.npy | 3 + margin_logs/step_0000534.npy | 3 + margin_logs/step_0000535.npy | 3 + margin_logs/step_0000536.npy | 3 + margin_logs/step_0000537.npy | 3 + margin_logs/step_0000538.npy | 3 + margin_logs/step_0000539.npy | 3 + margin_logs/step_0000540.npy | 3 + margin_logs/step_0000541.npy | 3 + margin_logs/step_0000542.npy | 3 + margin_logs/step_0000543.npy | 3 + margin_logs/step_0000544.npy | 3 + margin_logs/step_0000545.npy | 3 + margin_logs/step_0000546.npy | 3 + margin_logs/step_0000547.npy | 3 + margin_logs/step_0000548.npy | 3 + margin_logs/step_0000549.npy | 3 + margin_logs/step_0000550.npy | 3 + margin_logs/step_0000551.npy | 3 + margin_logs/step_0000552.npy | 3 + margin_logs/step_0000553.npy | 3 + margin_logs/step_0000554.npy | 3 + margin_logs/step_0000555.npy | 3 + margin_logs/step_0000556.npy | 3 + margin_logs/step_0000557.npy | 3 + margin_logs/step_0000558.npy | 3 + margin_logs/step_0000559.npy | 3 + margin_logs/step_0000560.npy | 3 + margin_logs/step_0000561.npy | 3 + margin_logs/step_0000562.npy | 3 + margin_logs/step_0000563.npy | 3 + margin_logs/step_0000564.npy | 3 + margin_logs/step_0000565.npy | 3 + margin_logs/step_0000566.npy | 3 + margin_logs/step_0000567.npy | 3 + margin_logs/step_0000568.npy | 3 + margin_logs/step_0000569.npy | 3 + margin_logs/step_0000570.npy | 3 + margin_logs/step_0000571.npy | 3 + margin_logs/step_0000572.npy | 3 + margin_logs/step_0000573.npy | 3 + margin_logs/step_0000574.npy | 3 + margin_logs/step_0000575.npy | 3 + margin_logs/step_0000576.npy | 3 + margin_logs/step_0000577.npy | 3 + margin_logs/step_0000578.npy | 3 + margin_logs/step_0000579.npy | 3 + margin_logs/step_0000580.npy | 3 + margin_logs/step_0000581.npy | 3 + margin_logs/step_0000582.npy | 3 + margin_logs/step_0000583.npy | 3 + margin_logs/step_0000584.npy | 3 + margin_logs/step_0000585.npy | 3 + margin_logs/step_0000586.npy | 3 + margin_logs/step_0000587.npy | 3 + margin_logs/step_0000588.npy | 3 + margin_logs/step_0000589.npy | 3 + margin_logs/step_0000590.npy | 3 + margin_logs/step_0000591.npy | 3 + margin_logs/step_0000592.npy | 3 + margin_logs/step_0000593.npy | 3 + margin_logs/step_0000594.npy | 3 + margin_logs/step_0000595.npy | 3 + margin_logs/step_0000596.npy | 3 + margin_logs/step_0000597.npy | 3 + margin_logs/step_0000598.npy | 3 + margin_logs/step_0000599.npy | 3 + margin_logs/step_0000600.npy | 3 + margin_logs/step_0000601.npy | 3 + margin_logs/step_0000602.npy | 3 + margin_logs/step_0000603.npy | 3 + margin_logs/step_0000604.npy | 3 + margin_logs/step_0000605.npy | 3 + margin_logs/step_0000606.npy | 3 + margin_logs/step_0000607.npy | 3 + margin_logs/step_0000608.npy | 3 + margin_logs/step_0000609.npy | 3 + margin_logs/step_0000610.npy | 3 + margin_logs/step_0000611.npy | 3 + margin_logs/step_0000612.npy | 3 + margin_logs/step_0000613.npy | 3 + margin_logs/step_0000614.npy | 3 + margin_logs/step_0000615.npy | 3 + margin_logs/step_0000616.npy | 3 + margin_logs/step_0000617.npy | 3 + margin_logs/step_0000618.npy | 3 + margin_logs/step_0000619.npy | 3 + margin_logs/step_0000620.npy | 3 + margin_logs/step_0000621.npy | 3 + margin_logs/step_0000622.npy | 3 + margin_logs/step_0000623.npy | 3 + margin_logs/step_0000624.npy | 3 + margin_logs/step_0000625.npy | 3 + margin_logs/step_0000626.npy | 3 + margin_logs/step_0000627.npy | 3 + margin_logs/step_0000628.npy | 3 + margin_logs/step_0000629.npy | 3 + margin_logs/step_0000630.npy | 3 + margin_logs/step_0000631.npy | 3 + margin_logs/step_0000632.npy | 3 + margin_logs/step_0000633.npy | 3 + margin_logs/step_0000634.npy | 3 + margin_logs/step_0000635.npy | 3 + margin_logs/step_0000636.npy | 3 + margin_logs/step_0000637.npy | 3 + margin_logs/step_0000638.npy | 3 + margin_logs/step_0000639.npy | 3 + margin_logs/step_0000640.npy | 3 + margin_logs/step_0000641.npy | 3 + margin_logs/step_0000642.npy | 3 + margin_logs/step_0000643.npy | 3 + margin_logs/step_0000644.npy | 3 + margin_logs/step_0000645.npy | 3 + margin_logs/step_0000646.npy | 3 + margin_logs/step_0000647.npy | 3 + margin_logs/step_0000648.npy | 3 + margin_logs/step_0000649.npy | 3 + margin_logs/step_0000650.npy | 3 + margin_logs/step_0000651.npy | 3 + margin_logs/step_0000652.npy | 3 + margin_logs/step_0000653.npy | 3 + margin_logs/step_0000654.npy | 3 + margin_logs/step_0000655.npy | 3 + margin_logs/step_0000656.npy | 3 + margin_logs/step_0000657.npy | 3 + margin_logs/step_0000658.npy | 3 + margin_logs/step_0000659.npy | 3 + margin_logs/step_0000660.npy | 3 + margin_logs/step_0000661.npy | 3 + margin_logs/step_0000662.npy | 3 + margin_logs/step_0000663.npy | 3 + margin_logs/step_0000664.npy | 3 + margin_logs/step_0000665.npy | 3 + margin_logs/step_0000666.npy | 3 + margin_logs/step_0000667.npy | 3 + margin_logs/step_0000668.npy | 3 + margin_logs/step_0000669.npy | 3 + margin_logs/step_0000670.npy | 3 + margin_logs/step_0000671.npy | 3 + margin_logs/step_0000672.npy | 3 + margin_logs/step_0000673.npy | 3 + margin_logs/step_0000674.npy | 3 + margin_logs/step_0000675.npy | 3 + margin_logs/step_0000676.npy | 3 + margin_logs/step_0000677.npy | 3 + margin_logs/step_0000678.npy | 3 + margin_logs/step_0000679.npy | 3 + margin_logs/step_0000680.npy | 3 + margin_logs/step_0000681.npy | 3 + model-00001-of-00007.safetensors | 3 + model-00002-of-00007.safetensors | 3 + model-00003-of-00007.safetensors | 3 + model-00004-of-00007.safetensors | 3 + model-00005-of-00007.safetensors | 3 + model-00006-of-00007.safetensors | 3 + model-00007-of-00007.safetensors | 3 + model.safetensors.index.json | 298 + special_tokens_map.json | 23 + tokenizer.json | 3 + tokenizer_config.json | 2064 ++++ train.log | 1160 +++ train_results.json | 9 + trainer_state.json | 15706 +++++++++++++++++++++++++++++ 701 files changed, 22153 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 all_results.json create mode 100644 config.json create mode 100644 generation_config.json create mode 100644 margin_logs/margins.jsonl create mode 100644 margin_logs/step_0000001.npy create mode 100644 margin_logs/step_0000002.npy create mode 100644 margin_logs/step_0000003.npy create mode 100644 margin_logs/step_0000004.npy create mode 100644 margin_logs/step_0000005.npy create mode 100644 margin_logs/step_0000006.npy create mode 100644 margin_logs/step_0000007.npy create mode 100644 margin_logs/step_0000008.npy create mode 100644 margin_logs/step_0000009.npy create mode 100644 margin_logs/step_0000010.npy create mode 100644 margin_logs/step_0000011.npy create mode 100644 margin_logs/step_0000012.npy create mode 100644 margin_logs/step_0000013.npy create mode 100644 margin_logs/step_0000014.npy create mode 100644 margin_logs/step_0000015.npy create mode 100644 margin_logs/step_0000016.npy create mode 100644 margin_logs/step_0000017.npy create mode 100644 margin_logs/step_0000018.npy create mode 100644 margin_logs/step_0000019.npy create mode 100644 margin_logs/step_0000020.npy create mode 100644 margin_logs/step_0000021.npy create mode 100644 margin_logs/step_0000022.npy create mode 100644 margin_logs/step_0000023.npy create mode 100644 margin_logs/step_0000024.npy create mode 100644 margin_logs/step_0000025.npy create mode 100644 margin_logs/step_0000026.npy create mode 100644 margin_logs/step_0000027.npy create mode 100644 margin_logs/step_0000028.npy create mode 100644 margin_logs/step_0000029.npy create mode 100644 margin_logs/step_0000030.npy create mode 100644 margin_logs/step_0000031.npy create mode 100644 margin_logs/step_0000032.npy create mode 100644 margin_logs/step_0000033.npy create mode 100644 margin_logs/step_0000034.npy create mode 100644 margin_logs/step_0000035.npy create mode 100644 margin_logs/step_0000036.npy create mode 100644 margin_logs/step_0000037.npy create mode 100644 margin_logs/step_0000038.npy create mode 100644 margin_logs/step_0000039.npy create mode 100644 margin_logs/step_0000040.npy create mode 100644 margin_logs/step_0000041.npy create mode 100644 margin_logs/step_0000042.npy create mode 100644 margin_logs/step_0000043.npy create mode 100644 margin_logs/step_0000044.npy create mode 100644 margin_logs/step_0000045.npy create mode 100644 margin_logs/step_0000046.npy create mode 100644 margin_logs/step_0000047.npy create mode 100644 margin_logs/step_0000048.npy create mode 100644 margin_logs/step_0000049.npy create mode 100644 margin_logs/step_0000050.npy create mode 100644 margin_logs/step_0000051.npy create mode 100644 margin_logs/step_0000052.npy create mode 100644 margin_logs/step_0000053.npy create mode 100644 margin_logs/step_0000054.npy create mode 100644 margin_logs/step_0000055.npy create mode 100644 margin_logs/step_0000056.npy create mode 100644 margin_logs/step_0000057.npy create mode 100644 margin_logs/step_0000058.npy create mode 100644 margin_logs/step_0000059.npy create mode 100644 margin_logs/step_0000060.npy create mode 100644 margin_logs/step_0000061.npy create mode 100644 margin_logs/step_0000062.npy create mode 100644 margin_logs/step_0000063.npy create mode 100644 margin_logs/step_0000064.npy create mode 100644 margin_logs/step_0000065.npy create mode 100644 margin_logs/step_0000066.npy create mode 100644 margin_logs/step_0000067.npy create mode 100644 margin_logs/step_0000068.npy create mode 100644 margin_logs/step_0000069.npy create mode 100644 margin_logs/step_0000070.npy create mode 100644 margin_logs/step_0000071.npy create mode 100644 margin_logs/step_0000072.npy create mode 100644 margin_logs/step_0000073.npy create mode 100644 margin_logs/step_0000074.npy create mode 100644 margin_logs/step_0000075.npy create mode 100644 margin_logs/step_0000076.npy create mode 100644 margin_logs/step_0000077.npy create mode 100644 margin_logs/step_0000078.npy create mode 100644 margin_logs/step_0000079.npy create mode 100644 margin_logs/step_0000080.npy create mode 100644 margin_logs/step_0000081.npy create mode 100644 margin_logs/step_0000082.npy create mode 100644 margin_logs/step_0000083.npy create mode 100644 margin_logs/step_0000084.npy create mode 100644 margin_logs/step_0000085.npy create mode 100644 margin_logs/step_0000086.npy create mode 100644 margin_logs/step_0000087.npy create mode 100644 margin_logs/step_0000088.npy create mode 100644 margin_logs/step_0000089.npy create mode 100644 margin_logs/step_0000090.npy create mode 100644 margin_logs/step_0000091.npy create mode 100644 margin_logs/step_0000092.npy create mode 100644 margin_logs/step_0000093.npy create mode 100644 margin_logs/step_0000094.npy create mode 100644 margin_logs/step_0000095.npy create mode 100644 margin_logs/step_0000096.npy create mode 100644 margin_logs/step_0000097.npy create mode 100644 margin_logs/step_0000098.npy create mode 100644 margin_logs/step_0000099.npy create mode 100644 margin_logs/step_0000100.npy create mode 100644 margin_logs/step_0000101.npy create mode 100644 margin_logs/step_0000102.npy create mode 100644 margin_logs/step_0000103.npy create mode 100644 margin_logs/step_0000104.npy create mode 100644 margin_logs/step_0000105.npy create mode 100644 margin_logs/step_0000106.npy create mode 100644 margin_logs/step_0000107.npy create mode 100644 margin_logs/step_0000108.npy create mode 100644 margin_logs/step_0000109.npy create mode 100644 margin_logs/step_0000110.npy create mode 100644 margin_logs/step_0000111.npy create mode 100644 margin_logs/step_0000112.npy create mode 100644 margin_logs/step_0000113.npy create mode 100644 margin_logs/step_0000114.npy create mode 100644 margin_logs/step_0000115.npy create mode 100644 margin_logs/step_0000116.npy create mode 100644 margin_logs/step_0000117.npy create mode 100644 margin_logs/step_0000118.npy create mode 100644 margin_logs/step_0000119.npy create mode 100644 margin_logs/step_0000120.npy create mode 100644 margin_logs/step_0000121.npy create mode 100644 margin_logs/step_0000122.npy create mode 100644 margin_logs/step_0000123.npy create mode 100644 margin_logs/step_0000124.npy create mode 100644 margin_logs/step_0000125.npy create mode 100644 margin_logs/step_0000126.npy create mode 100644 margin_logs/step_0000127.npy create mode 100644 margin_logs/step_0000128.npy create mode 100644 margin_logs/step_0000129.npy create mode 100644 margin_logs/step_0000130.npy create mode 100644 margin_logs/step_0000131.npy create mode 100644 margin_logs/step_0000132.npy create mode 100644 margin_logs/step_0000133.npy create mode 100644 margin_logs/step_0000134.npy create mode 100644 margin_logs/step_0000135.npy create mode 100644 margin_logs/step_0000136.npy create mode 100644 margin_logs/step_0000137.npy create mode 100644 margin_logs/step_0000138.npy create mode 100644 margin_logs/step_0000139.npy create mode 100644 margin_logs/step_0000140.npy create mode 100644 margin_logs/step_0000141.npy create mode 100644 margin_logs/step_0000142.npy create mode 100644 margin_logs/step_0000143.npy create mode 100644 margin_logs/step_0000144.npy create mode 100644 margin_logs/step_0000145.npy create mode 100644 margin_logs/step_0000146.npy create mode 100644 margin_logs/step_0000147.npy create mode 100644 margin_logs/step_0000148.npy create mode 100644 margin_logs/step_0000149.npy create mode 100644 margin_logs/step_0000150.npy create mode 100644 margin_logs/step_0000151.npy create mode 100644 margin_logs/step_0000152.npy create mode 100644 margin_logs/step_0000153.npy create mode 100644 margin_logs/step_0000154.npy create mode 100644 margin_logs/step_0000155.npy create mode 100644 margin_logs/step_0000156.npy create mode 100644 margin_logs/step_0000157.npy create mode 100644 margin_logs/step_0000158.npy create mode 100644 margin_logs/step_0000159.npy create mode 100644 margin_logs/step_0000160.npy create mode 100644 margin_logs/step_0000161.npy create mode 100644 margin_logs/step_0000162.npy create mode 100644 margin_logs/step_0000163.npy create mode 100644 margin_logs/step_0000164.npy create mode 100644 margin_logs/step_0000165.npy create mode 100644 margin_logs/step_0000166.npy create mode 100644 margin_logs/step_0000167.npy create mode 100644 margin_logs/step_0000168.npy create mode 100644 margin_logs/step_0000169.npy create mode 100644 margin_logs/step_0000170.npy create mode 100644 margin_logs/step_0000171.npy create mode 100644 margin_logs/step_0000172.npy create mode 100644 margin_logs/step_0000173.npy create mode 100644 margin_logs/step_0000174.npy create mode 100644 margin_logs/step_0000175.npy create mode 100644 margin_logs/step_0000176.npy create mode 100644 margin_logs/step_0000177.npy create mode 100644 margin_logs/step_0000178.npy create mode 100644 margin_logs/step_0000179.npy create mode 100644 margin_logs/step_0000180.npy create mode 100644 margin_logs/step_0000181.npy create mode 100644 margin_logs/step_0000182.npy create mode 100644 margin_logs/step_0000183.npy create mode 100644 margin_logs/step_0000184.npy create mode 100644 margin_logs/step_0000185.npy create mode 100644 margin_logs/step_0000186.npy create mode 100644 margin_logs/step_0000187.npy create mode 100644 margin_logs/step_0000188.npy create mode 100644 margin_logs/step_0000189.npy create mode 100644 margin_logs/step_0000190.npy create mode 100644 margin_logs/step_0000191.npy create mode 100644 margin_logs/step_0000192.npy create mode 100644 margin_logs/step_0000193.npy create mode 100644 margin_logs/step_0000194.npy create mode 100644 margin_logs/step_0000195.npy create mode 100644 margin_logs/step_0000196.npy create mode 100644 margin_logs/step_0000197.npy create mode 100644 margin_logs/step_0000198.npy create mode 100644 margin_logs/step_0000199.npy create mode 100644 margin_logs/step_0000200.npy create mode 100644 margin_logs/step_0000201.npy create mode 100644 margin_logs/step_0000202.npy create mode 100644 margin_logs/step_0000203.npy create mode 100644 margin_logs/step_0000204.npy create mode 100644 margin_logs/step_0000205.npy create mode 100644 margin_logs/step_0000206.npy create mode 100644 margin_logs/step_0000207.npy create mode 100644 margin_logs/step_0000208.npy create mode 100644 margin_logs/step_0000209.npy create mode 100644 margin_logs/step_0000210.npy create mode 100644 margin_logs/step_0000211.npy create mode 100644 margin_logs/step_0000212.npy create mode 100644 margin_logs/step_0000213.npy create mode 100644 margin_logs/step_0000214.npy create mode 100644 margin_logs/step_0000215.npy create mode 100644 margin_logs/step_0000216.npy create mode 100644 margin_logs/step_0000217.npy create mode 100644 margin_logs/step_0000218.npy create mode 100644 margin_logs/step_0000219.npy create mode 100644 margin_logs/step_0000220.npy create mode 100644 margin_logs/step_0000221.npy create mode 100644 margin_logs/step_0000222.npy create mode 100644 margin_logs/step_0000223.npy create mode 100644 margin_logs/step_0000224.npy create mode 100644 margin_logs/step_0000225.npy create mode 100644 margin_logs/step_0000226.npy create mode 100644 margin_logs/step_0000227.npy create mode 100644 margin_logs/step_0000228.npy create mode 100644 margin_logs/step_0000229.npy create mode 100644 margin_logs/step_0000230.npy create mode 100644 margin_logs/step_0000231.npy create mode 100644 margin_logs/step_0000232.npy create mode 100644 margin_logs/step_0000233.npy create mode 100644 margin_logs/step_0000234.npy create mode 100644 margin_logs/step_0000235.npy create mode 100644 margin_logs/step_0000236.npy create mode 100644 margin_logs/step_0000237.npy create mode 100644 margin_logs/step_0000238.npy create mode 100644 margin_logs/step_0000239.npy create mode 100644 margin_logs/step_0000240.npy create mode 100644 margin_logs/step_0000241.npy create mode 100644 margin_logs/step_0000242.npy create mode 100644 margin_logs/step_0000243.npy create mode 100644 margin_logs/step_0000244.npy create mode 100644 margin_logs/step_0000245.npy create mode 100644 margin_logs/step_0000246.npy create mode 100644 margin_logs/step_0000247.npy create mode 100644 margin_logs/step_0000248.npy create mode 100644 margin_logs/step_0000249.npy create mode 100644 margin_logs/step_0000250.npy create mode 100644 margin_logs/step_0000251.npy create mode 100644 margin_logs/step_0000252.npy create mode 100644 margin_logs/step_0000253.npy create mode 100644 margin_logs/step_0000254.npy create mode 100644 margin_logs/step_0000255.npy create mode 100644 margin_logs/step_0000256.npy create mode 100644 margin_logs/step_0000257.npy create mode 100644 margin_logs/step_0000258.npy create mode 100644 margin_logs/step_0000259.npy create mode 100644 margin_logs/step_0000260.npy create mode 100644 margin_logs/step_0000261.npy create mode 100644 margin_logs/step_0000262.npy create mode 100644 margin_logs/step_0000263.npy create mode 100644 margin_logs/step_0000264.npy create mode 100644 margin_logs/step_0000265.npy create mode 100644 margin_logs/step_0000266.npy create mode 100644 margin_logs/step_0000267.npy create mode 100644 margin_logs/step_0000268.npy create mode 100644 margin_logs/step_0000269.npy create mode 100644 margin_logs/step_0000270.npy create mode 100644 margin_logs/step_0000271.npy create mode 100644 margin_logs/step_0000272.npy create mode 100644 margin_logs/step_0000273.npy create mode 100644 margin_logs/step_0000274.npy create mode 100644 margin_logs/step_0000275.npy create mode 100644 margin_logs/step_0000276.npy create mode 100644 margin_logs/step_0000277.npy create mode 100644 margin_logs/step_0000278.npy create mode 100644 margin_logs/step_0000279.npy create mode 100644 margin_logs/step_0000280.npy create mode 100644 margin_logs/step_0000281.npy create mode 100644 margin_logs/step_0000282.npy create mode 100644 margin_logs/step_0000283.npy create mode 100644 margin_logs/step_0000284.npy create mode 100644 margin_logs/step_0000285.npy create mode 100644 margin_logs/step_0000286.npy create mode 100644 margin_logs/step_0000287.npy create mode 100644 margin_logs/step_0000288.npy create mode 100644 margin_logs/step_0000289.npy create mode 100644 margin_logs/step_0000290.npy create mode 100644 margin_logs/step_0000291.npy create mode 100644 margin_logs/step_0000292.npy create mode 100644 margin_logs/step_0000293.npy create mode 100644 margin_logs/step_0000294.npy create mode 100644 margin_logs/step_0000295.npy create mode 100644 margin_logs/step_0000296.npy create mode 100644 margin_logs/step_0000297.npy create mode 100644 margin_logs/step_0000298.npy create mode 100644 margin_logs/step_0000299.npy create mode 100644 margin_logs/step_0000300.npy create mode 100644 margin_logs/step_0000301.npy create mode 100644 margin_logs/step_0000302.npy create mode 100644 margin_logs/step_0000303.npy create mode 100644 margin_logs/step_0000304.npy create mode 100644 margin_logs/step_0000305.npy create mode 100644 margin_logs/step_0000306.npy create mode 100644 margin_logs/step_0000307.npy create mode 100644 margin_logs/step_0000308.npy create mode 100644 margin_logs/step_0000309.npy create mode 100644 margin_logs/step_0000310.npy create mode 100644 margin_logs/step_0000311.npy create mode 100644 margin_logs/step_0000312.npy create mode 100644 margin_logs/step_0000313.npy create mode 100644 margin_logs/step_0000314.npy create mode 100644 margin_logs/step_0000315.npy create mode 100644 margin_logs/step_0000316.npy create mode 100644 margin_logs/step_0000317.npy create mode 100644 margin_logs/step_0000318.npy create mode 100644 margin_logs/step_0000319.npy create mode 100644 margin_logs/step_0000320.npy create mode 100644 margin_logs/step_0000321.npy create mode 100644 margin_logs/step_0000322.npy create mode 100644 margin_logs/step_0000323.npy create mode 100644 margin_logs/step_0000324.npy create mode 100644 margin_logs/step_0000325.npy create mode 100644 margin_logs/step_0000326.npy create mode 100644 margin_logs/step_0000327.npy create mode 100644 margin_logs/step_0000328.npy create mode 100644 margin_logs/step_0000329.npy create mode 100644 margin_logs/step_0000330.npy create mode 100644 margin_logs/step_0000331.npy create mode 100644 margin_logs/step_0000332.npy create mode 100644 margin_logs/step_0000333.npy create mode 100644 margin_logs/step_0000334.npy create mode 100644 margin_logs/step_0000335.npy create mode 100644 margin_logs/step_0000336.npy create mode 100644 margin_logs/step_0000337.npy create mode 100644 margin_logs/step_0000338.npy create mode 100644 margin_logs/step_0000339.npy create mode 100644 margin_logs/step_0000340.npy create mode 100644 margin_logs/step_0000341.npy create mode 100644 margin_logs/step_0000342.npy create mode 100644 margin_logs/step_0000343.npy create mode 100644 margin_logs/step_0000344.npy create mode 100644 margin_logs/step_0000345.npy create mode 100644 margin_logs/step_0000346.npy create mode 100644 margin_logs/step_0000347.npy create mode 100644 margin_logs/step_0000348.npy create mode 100644 margin_logs/step_0000349.npy create mode 100644 margin_logs/step_0000350.npy create mode 100644 margin_logs/step_0000351.npy create mode 100644 margin_logs/step_0000352.npy create mode 100644 margin_logs/step_0000353.npy create mode 100644 margin_logs/step_0000354.npy create mode 100644 margin_logs/step_0000355.npy create mode 100644 margin_logs/step_0000356.npy create mode 100644 margin_logs/step_0000357.npy create mode 100644 margin_logs/step_0000358.npy create mode 100644 margin_logs/step_0000359.npy create mode 100644 margin_logs/step_0000360.npy create mode 100644 margin_logs/step_0000361.npy create mode 100644 margin_logs/step_0000362.npy create mode 100644 margin_logs/step_0000363.npy create mode 100644 margin_logs/step_0000364.npy create mode 100644 margin_logs/step_0000365.npy create mode 100644 margin_logs/step_0000366.npy create mode 100644 margin_logs/step_0000367.npy create mode 100644 margin_logs/step_0000368.npy create mode 100644 margin_logs/step_0000369.npy create mode 100644 margin_logs/step_0000370.npy create mode 100644 margin_logs/step_0000371.npy create mode 100644 margin_logs/step_0000372.npy create mode 100644 margin_logs/step_0000373.npy create mode 100644 margin_logs/step_0000374.npy create mode 100644 margin_logs/step_0000375.npy create mode 100644 margin_logs/step_0000376.npy create mode 100644 margin_logs/step_0000377.npy create mode 100644 margin_logs/step_0000378.npy create mode 100644 margin_logs/step_0000379.npy create mode 100644 margin_logs/step_0000380.npy create mode 100644 margin_logs/step_0000381.npy create mode 100644 margin_logs/step_0000382.npy create mode 100644 margin_logs/step_0000383.npy create mode 100644 margin_logs/step_0000384.npy create mode 100644 margin_logs/step_0000385.npy create mode 100644 margin_logs/step_0000386.npy create mode 100644 margin_logs/step_0000387.npy create mode 100644 margin_logs/step_0000388.npy create mode 100644 margin_logs/step_0000389.npy create mode 100644 margin_logs/step_0000390.npy create mode 100644 margin_logs/step_0000391.npy create mode 100644 margin_logs/step_0000392.npy create mode 100644 margin_logs/step_0000393.npy create mode 100644 margin_logs/step_0000394.npy create mode 100644 margin_logs/step_0000395.npy create mode 100644 margin_logs/step_0000396.npy create mode 100644 margin_logs/step_0000397.npy create mode 100644 margin_logs/step_0000398.npy create mode 100644 margin_logs/step_0000399.npy create mode 100644 margin_logs/step_0000400.npy create mode 100644 margin_logs/step_0000401.npy create mode 100644 margin_logs/step_0000402.npy create mode 100644 margin_logs/step_0000403.npy create mode 100644 margin_logs/step_0000404.npy create mode 100644 margin_logs/step_0000405.npy create mode 100644 margin_logs/step_0000406.npy create mode 100644 margin_logs/step_0000407.npy create mode 100644 margin_logs/step_0000408.npy create mode 100644 margin_logs/step_0000409.npy create mode 100644 margin_logs/step_0000410.npy create mode 100644 margin_logs/step_0000411.npy create mode 100644 margin_logs/step_0000412.npy create mode 100644 margin_logs/step_0000413.npy create mode 100644 margin_logs/step_0000414.npy create mode 100644 margin_logs/step_0000415.npy create mode 100644 margin_logs/step_0000416.npy create mode 100644 margin_logs/step_0000417.npy create mode 100644 margin_logs/step_0000418.npy create mode 100644 margin_logs/step_0000419.npy create mode 100644 margin_logs/step_0000420.npy create mode 100644 margin_logs/step_0000421.npy create mode 100644 margin_logs/step_0000422.npy create mode 100644 margin_logs/step_0000423.npy create mode 100644 margin_logs/step_0000424.npy create mode 100644 margin_logs/step_0000425.npy create mode 100644 margin_logs/step_0000426.npy create mode 100644 margin_logs/step_0000427.npy create mode 100644 margin_logs/step_0000428.npy create mode 100644 margin_logs/step_0000429.npy create mode 100644 margin_logs/step_0000430.npy create mode 100644 margin_logs/step_0000431.npy create mode 100644 margin_logs/step_0000432.npy create mode 100644 margin_logs/step_0000433.npy create mode 100644 margin_logs/step_0000434.npy create mode 100644 margin_logs/step_0000435.npy create mode 100644 margin_logs/step_0000436.npy create mode 100644 margin_logs/step_0000437.npy create mode 100644 margin_logs/step_0000438.npy create mode 100644 margin_logs/step_0000439.npy create mode 100644 margin_logs/step_0000440.npy create mode 100644 margin_logs/step_0000441.npy create mode 100644 margin_logs/step_0000442.npy create mode 100644 margin_logs/step_0000443.npy create mode 100644 margin_logs/step_0000444.npy create mode 100644 margin_logs/step_0000445.npy create mode 100644 margin_logs/step_0000446.npy create mode 100644 margin_logs/step_0000447.npy create mode 100644 margin_logs/step_0000448.npy create mode 100644 margin_logs/step_0000449.npy create mode 100644 margin_logs/step_0000450.npy create mode 100644 margin_logs/step_0000451.npy create mode 100644 margin_logs/step_0000452.npy create mode 100644 margin_logs/step_0000453.npy create mode 100644 margin_logs/step_0000454.npy create mode 100644 margin_logs/step_0000455.npy create mode 100644 margin_logs/step_0000456.npy create mode 100644 margin_logs/step_0000457.npy create mode 100644 margin_logs/step_0000458.npy create mode 100644 margin_logs/step_0000459.npy create mode 100644 margin_logs/step_0000460.npy create mode 100644 margin_logs/step_0000461.npy create mode 100644 margin_logs/step_0000462.npy create mode 100644 margin_logs/step_0000463.npy create mode 100644 margin_logs/step_0000464.npy create mode 100644 margin_logs/step_0000465.npy create mode 100644 margin_logs/step_0000466.npy create mode 100644 margin_logs/step_0000467.npy create mode 100644 margin_logs/step_0000468.npy create mode 100644 margin_logs/step_0000469.npy create mode 100644 margin_logs/step_0000470.npy create mode 100644 margin_logs/step_0000471.npy create mode 100644 margin_logs/step_0000472.npy create mode 100644 margin_logs/step_0000473.npy create mode 100644 margin_logs/step_0000474.npy create mode 100644 margin_logs/step_0000475.npy create mode 100644 margin_logs/step_0000476.npy create mode 100644 margin_logs/step_0000477.npy create mode 100644 margin_logs/step_0000478.npy create mode 100644 margin_logs/step_0000479.npy create mode 100644 margin_logs/step_0000480.npy create mode 100644 margin_logs/step_0000481.npy create mode 100644 margin_logs/step_0000482.npy create mode 100644 margin_logs/step_0000483.npy create mode 100644 margin_logs/step_0000484.npy create mode 100644 margin_logs/step_0000485.npy create mode 100644 margin_logs/step_0000486.npy create mode 100644 margin_logs/step_0000487.npy create mode 100644 margin_logs/step_0000488.npy create mode 100644 margin_logs/step_0000489.npy create mode 100644 margin_logs/step_0000490.npy create mode 100644 margin_logs/step_0000491.npy create mode 100644 margin_logs/step_0000492.npy create mode 100644 margin_logs/step_0000493.npy create mode 100644 margin_logs/step_0000494.npy create mode 100644 margin_logs/step_0000495.npy create mode 100644 margin_logs/step_0000496.npy create mode 100644 margin_logs/step_0000497.npy create mode 100644 margin_logs/step_0000498.npy create mode 100644 margin_logs/step_0000499.npy create mode 100644 margin_logs/step_0000500.npy create mode 100644 margin_logs/step_0000501.npy create mode 100644 margin_logs/step_0000502.npy create mode 100644 margin_logs/step_0000503.npy create mode 100644 margin_logs/step_0000504.npy create mode 100644 margin_logs/step_0000505.npy create mode 100644 margin_logs/step_0000506.npy create mode 100644 margin_logs/step_0000507.npy create mode 100644 margin_logs/step_0000508.npy create mode 100644 margin_logs/step_0000509.npy create mode 100644 margin_logs/step_0000510.npy create mode 100644 margin_logs/step_0000511.npy create mode 100644 margin_logs/step_0000512.npy create mode 100644 margin_logs/step_0000513.npy create mode 100644 margin_logs/step_0000514.npy create mode 100644 margin_logs/step_0000515.npy create mode 100644 margin_logs/step_0000516.npy create mode 100644 margin_logs/step_0000517.npy create mode 100644 margin_logs/step_0000518.npy create mode 100644 margin_logs/step_0000519.npy create mode 100644 margin_logs/step_0000520.npy create mode 100644 margin_logs/step_0000521.npy create mode 100644 margin_logs/step_0000522.npy create mode 100644 margin_logs/step_0000523.npy create mode 100644 margin_logs/step_0000524.npy create mode 100644 margin_logs/step_0000525.npy create mode 100644 margin_logs/step_0000526.npy create mode 100644 margin_logs/step_0000527.npy create mode 100644 margin_logs/step_0000528.npy create mode 100644 margin_logs/step_0000529.npy create mode 100644 margin_logs/step_0000530.npy create mode 100644 margin_logs/step_0000531.npy create mode 100644 margin_logs/step_0000532.npy create mode 100644 margin_logs/step_0000533.npy create mode 100644 margin_logs/step_0000534.npy create mode 100644 margin_logs/step_0000535.npy create mode 100644 margin_logs/step_0000536.npy create mode 100644 margin_logs/step_0000537.npy create mode 100644 margin_logs/step_0000538.npy create mode 100644 margin_logs/step_0000539.npy create mode 100644 margin_logs/step_0000540.npy create mode 100644 margin_logs/step_0000541.npy create mode 100644 margin_logs/step_0000542.npy create mode 100644 margin_logs/step_0000543.npy create mode 100644 margin_logs/step_0000544.npy create mode 100644 margin_logs/step_0000545.npy create mode 100644 margin_logs/step_0000546.npy create mode 100644 margin_logs/step_0000547.npy create mode 100644 margin_logs/step_0000548.npy create mode 100644 margin_logs/step_0000549.npy create mode 100644 margin_logs/step_0000550.npy create mode 100644 margin_logs/step_0000551.npy create mode 100644 margin_logs/step_0000552.npy create mode 100644 margin_logs/step_0000553.npy create mode 100644 margin_logs/step_0000554.npy create mode 100644 margin_logs/step_0000555.npy create mode 100644 margin_logs/step_0000556.npy create mode 100644 margin_logs/step_0000557.npy create mode 100644 margin_logs/step_0000558.npy create mode 100644 margin_logs/step_0000559.npy create mode 100644 margin_logs/step_0000560.npy create mode 100644 margin_logs/step_0000561.npy create mode 100644 margin_logs/step_0000562.npy create mode 100644 margin_logs/step_0000563.npy create mode 100644 margin_logs/step_0000564.npy create mode 100644 margin_logs/step_0000565.npy create mode 100644 margin_logs/step_0000566.npy create mode 100644 margin_logs/step_0000567.npy create mode 100644 margin_logs/step_0000568.npy create mode 100644 margin_logs/step_0000569.npy create mode 100644 margin_logs/step_0000570.npy create mode 100644 margin_logs/step_0000571.npy create mode 100644 margin_logs/step_0000572.npy create mode 100644 margin_logs/step_0000573.npy create mode 100644 margin_logs/step_0000574.npy create mode 100644 margin_logs/step_0000575.npy create mode 100644 margin_logs/step_0000576.npy create mode 100644 margin_logs/step_0000577.npy create mode 100644 margin_logs/step_0000578.npy create mode 100644 margin_logs/step_0000579.npy create mode 100644 margin_logs/step_0000580.npy create mode 100644 margin_logs/step_0000581.npy create mode 100644 margin_logs/step_0000582.npy create mode 100644 margin_logs/step_0000583.npy create mode 100644 margin_logs/step_0000584.npy create mode 100644 margin_logs/step_0000585.npy create mode 100644 margin_logs/step_0000586.npy create mode 100644 margin_logs/step_0000587.npy create mode 100644 margin_logs/step_0000588.npy create mode 100644 margin_logs/step_0000589.npy create mode 100644 margin_logs/step_0000590.npy create mode 100644 margin_logs/step_0000591.npy create mode 100644 margin_logs/step_0000592.npy create mode 100644 margin_logs/step_0000593.npy create mode 100644 margin_logs/step_0000594.npy create mode 100644 margin_logs/step_0000595.npy create mode 100644 margin_logs/step_0000596.npy create mode 100644 margin_logs/step_0000597.npy create mode 100644 margin_logs/step_0000598.npy create mode 100644 margin_logs/step_0000599.npy create mode 100644 margin_logs/step_0000600.npy create mode 100644 margin_logs/step_0000601.npy create mode 100644 margin_logs/step_0000602.npy create mode 100644 margin_logs/step_0000603.npy create mode 100644 margin_logs/step_0000604.npy create mode 100644 margin_logs/step_0000605.npy create mode 100644 margin_logs/step_0000606.npy create mode 100644 margin_logs/step_0000607.npy create mode 100644 margin_logs/step_0000608.npy create mode 100644 margin_logs/step_0000609.npy create mode 100644 margin_logs/step_0000610.npy create mode 100644 margin_logs/step_0000611.npy create mode 100644 margin_logs/step_0000612.npy create mode 100644 margin_logs/step_0000613.npy create mode 100644 margin_logs/step_0000614.npy create mode 100644 margin_logs/step_0000615.npy create mode 100644 margin_logs/step_0000616.npy create mode 100644 margin_logs/step_0000617.npy create mode 100644 margin_logs/step_0000618.npy create mode 100644 margin_logs/step_0000619.npy create mode 100644 margin_logs/step_0000620.npy create mode 100644 margin_logs/step_0000621.npy create mode 100644 margin_logs/step_0000622.npy create mode 100644 margin_logs/step_0000623.npy create mode 100644 margin_logs/step_0000624.npy create mode 100644 margin_logs/step_0000625.npy create mode 100644 margin_logs/step_0000626.npy create mode 100644 margin_logs/step_0000627.npy create mode 100644 margin_logs/step_0000628.npy create mode 100644 margin_logs/step_0000629.npy create mode 100644 margin_logs/step_0000630.npy create mode 100644 margin_logs/step_0000631.npy create mode 100644 margin_logs/step_0000632.npy create mode 100644 margin_logs/step_0000633.npy create mode 100644 margin_logs/step_0000634.npy create mode 100644 margin_logs/step_0000635.npy create mode 100644 margin_logs/step_0000636.npy create mode 100644 margin_logs/step_0000637.npy create mode 100644 margin_logs/step_0000638.npy create mode 100644 margin_logs/step_0000639.npy create mode 100644 margin_logs/step_0000640.npy create mode 100644 margin_logs/step_0000641.npy create mode 100644 margin_logs/step_0000642.npy create mode 100644 margin_logs/step_0000643.npy create mode 100644 margin_logs/step_0000644.npy create mode 100644 margin_logs/step_0000645.npy create mode 100644 margin_logs/step_0000646.npy create mode 100644 margin_logs/step_0000647.npy create mode 100644 margin_logs/step_0000648.npy create mode 100644 margin_logs/step_0000649.npy create mode 100644 margin_logs/step_0000650.npy create mode 100644 margin_logs/step_0000651.npy create mode 100644 margin_logs/step_0000652.npy create mode 100644 margin_logs/step_0000653.npy create mode 100644 margin_logs/step_0000654.npy create mode 100644 margin_logs/step_0000655.npy create mode 100644 margin_logs/step_0000656.npy create mode 100644 margin_logs/step_0000657.npy create mode 100644 margin_logs/step_0000658.npy create mode 100644 margin_logs/step_0000659.npy create mode 100644 margin_logs/step_0000660.npy create mode 100644 margin_logs/step_0000661.npy create mode 100644 margin_logs/step_0000662.npy create mode 100644 margin_logs/step_0000663.npy create mode 100644 margin_logs/step_0000664.npy create mode 100644 margin_logs/step_0000665.npy create mode 100644 margin_logs/step_0000666.npy create mode 100644 margin_logs/step_0000667.npy create mode 100644 margin_logs/step_0000668.npy create mode 100644 margin_logs/step_0000669.npy create mode 100644 margin_logs/step_0000670.npy create mode 100644 margin_logs/step_0000671.npy create mode 100644 margin_logs/step_0000672.npy create mode 100644 margin_logs/step_0000673.npy create mode 100644 margin_logs/step_0000674.npy create mode 100644 margin_logs/step_0000675.npy create mode 100644 margin_logs/step_0000676.npy create mode 100644 margin_logs/step_0000677.npy create mode 100644 margin_logs/step_0000678.npy create mode 100644 margin_logs/step_0000679.npy create mode 100644 margin_logs/step_0000680.npy create mode 100644 margin_logs/step_0000681.npy create mode 100644 model-00001-of-00007.safetensors create mode 100644 model-00002-of-00007.safetensors create mode 100644 model-00003-of-00007.safetensors create mode 100644 model-00004-of-00007.safetensors create mode 100644 model-00005-of-00007.safetensors create mode 100644 model-00006-of-00007.safetensors create mode 100644 model-00007-of-00007.safetensors create mode 100644 model.safetensors.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 train.log create mode 100644 train_results.json create mode 100644 trainer_state.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..d2c5073 --- /dev/null +++ b/README.md @@ -0,0 +1,62 @@ +--- +library_name: transformers +base_model: W-61/llama-3-8b-base-sft-hh-helpful-4xh200 +tags: +- alignment-handbook +- new-dpo +- generated_from_trainer +datasets: +- Anthropic/hh-rlhf +model-index: +- name: llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449 + results: [] +--- + + + +# llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449 + +This model is a fine-tuned version of [W-61/llama-3-8b-base-sft-hh-helpful-4xh200](https://huggingface.co/W-61/llama-3-8b-base-sft-hh-helpful-4xh200) on the Anthropic/hh-rlhf dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 5e-07 +- train_batch_size: 8 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 64 +- total_eval_batch_size: 32 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 1 + +### Training results + + + +### Framework versions + +- Transformers 4.51.0 +- Pytorch 2.3.1+cu121 +- Datasets 2.21.0 +- Tokenizers 0.21.4 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..4fd2a77 --- /dev/null +++ b/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 0.0, + "train_loss": 1.093637848565582, + "train_runtime": 1736.9515, + "train_samples": 43598, + "train_samples_per_second": 25.1, + "train_steps_per_second": 0.392 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..5092b09 --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.51.0", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..76247c9 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,9 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.51.0" +} diff --git a/margin_logs/margins.jsonl b/margin_logs/margins.jsonl new file mode 100644 index 0000000..073ce45 --- /dev/null +++ b/margin_logs/margins.jsonl @@ -0,0 +1,681 @@ +{"epoch": 0.0, "step": 1, "batch_size": 64, "mean": -0.02287048101425171, "std": 0.42023447155952454, "min": -1.4034271240234375, "p10": -0.46674575805664065, "median": 0.04234886169433594, "p90": 0.4323463439941407, "max": 0.89263916015625, "pos_frac": 0.53125, "sample": [-0.06523895263671875, 0.436798095703125, 0.27811431884765625, -0.9194221496582031, 0.018890380859375, 0.20587158203125, 0.18878173828125, -0.3968696594238281, 0.26206207275390625, 0.2470550537109375, -0.040912628173828125, 0.4394989013671875, -0.44133758544921875, -0.39148712158203125, 0.2764854431152344, 0.89263916015625, -0.42584991455078125, -0.46125030517578125, -0.8638992309570312, -0.3508758544921875, 0.371368408203125, 0.887847900390625, -0.382904052734375, 0.36145782470703125, -0.4890003204345703, 0.052455902099609375, -0.036136627197265625, 0.23079299926757812, 0.2469482421875, 0.1643218994140625, -0.07129669189453125, 0.2790794372558594, 0.3637123107910156, -0.8916168212890625, 0.03298759460449219, -0.2790107727050781, -0.17860984802246094, 0.23892593383789062, 0.05171012878417969, -0.2564239501953125, -0.14655303955078125, 0.27777862548828125, 0.0810394287109375, -1.4034271240234375, -0.28739166259765625, -0.1489429473876953, 0.44918060302734375, 0.1693286895751953, 0.10933303833007812, -0.14766693115234375, -0.40944671630859375, -0.18532562255859375, 0.6261310577392578, -0.20856857299804688, 0.602569580078125, 0.05538177490234375, 0.1505279541015625, 0.1313800811767578, -0.006317138671875, 0.42195892333984375, -0.29936981201171875, -0.4691009521484375, 0.16705322265625, -0.5789260864257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000001.npy"} +{"epoch": 0.0014684287812041115, "step": 2, "batch_size": 64, "mean": -0.06572240591049194, "std": 0.3523969054222107, "min": -0.9291305541992188, "p10": -0.46334152221679686, "median": -0.05502510070800781, "p90": 0.3672500610351563, "max": 1.0444793701171875, "pos_frac": 0.4375, "sample": [-0.2829437255859375, 0.3027191162109375, -0.19867706298828125, -0.3062286376953125, 0.10318756103515625, 0.20131683349609375, -0.34906005859375, 0.2802886962890625, 0.1914520263671875, -0.31072998046875, 0.08922195434570312, 0.10284614562988281, -0.03655242919921875, -0.0604095458984375, -0.06208038330078125, 0.32562255859375, -0.37982177734375, 0.2746162414550781, -0.049640655517578125, 0.3752174377441406, -0.103973388671875, 0.0699462890625, 0.36417388916015625, -0.033428192138671875, 0.37265777587890625, -0.3787078857421875, -0.6610565185546875, 0.4720420837402344, 0.47701263427734375, -0.27928924560546875, -0.44719696044921875, -0.0965118408203125, -0.7628555297851562, 0.046764373779296875, 0.06670379638671875, -0.9291305541992188, -0.7122802734375, -0.16554832458496094, 0.1485595703125, -0.07539939880371094, 0.2588920593261719, 0.039890289306640625, 0.201690673828125, 0.0623016357421875, 1.0444793701171875, -0.37696075439453125, -0.02794647216796875, -0.223297119140625, -0.35730743408203125, -0.1309051513671875, -0.3106689453125, -0.11409187316894531, -0.1669769287109375, 0.131317138671875, -0.2361297607421875, 0.4093780517578125, -0.6485977172851562, 0.36856842041015625, -0.1951904296875, -0.4702606201171875, -0.7624168395996094, 0.008928298950195312, -0.31630706787109375, 0.022550582885742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000002.npy"} +{"epoch": 0.002936857562408223, "step": 3, "batch_size": 64, "mean": 0.06905469298362732, "std": 0.40016984939575195, "min": -0.804901123046875, "p10": -0.35855560302734374, "median": 0.051817893981933594, "p90": 0.5267044067382813, "max": 1.18548583984375, "pos_frac": 0.5625, "sample": [-0.21624755859375, -0.10370254516601562, 0.391082763671875, -0.05510711669921875, -0.3452949523925781, 0.17584228515625, 0.257598876953125, 0.22792816162109375, 0.1298370361328125, 0.09908294677734375, 0.3015594482421875, 0.16221237182617188, 0.5388946533203125, -0.3406982421875, -0.06643486022949219, 0.5764846801757812, -0.1379241943359375, -0.2915077209472656, 0.29923248291015625, -0.1660175323486328, -0.5831222534179688, 0.331298828125, -0.804901123046875, -0.170440673828125, -0.02797698974609375, -0.03792381286621094, 0.02301025390625, 0.06266212463378906, 0.10911941528320312, -0.4740791320800781, 0.972381591796875, 1.0604934692382812, 0.019342422485351562, 1.18548583984375, 0.1018524169921875, -0.2600250244140625, -0.26861572265625, -0.2392101287841797, 0.2829437255859375, 0.29195594787597656, -0.45505523681640625, 0.064788818359375, 0.1895751953125, 0.14105224609375, -0.07660675048828125, -0.1813812255859375, 0.30052947998046875, 0.7047462463378906, -0.3642387390136719, -0.11333465576171875, 0.26354026794433594, -0.1661357879638672, 0.1231689453125, 0.3535308837890625, 0.0026111602783203125, 1.1845283508300781, 0.2089996337890625, -0.28641510009765625, 0.2151031494140625, 0.498260498046875, -0.1604595184326172, -0.674163818359375, -0.40518951416015625, 0.040973663330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000003.npy"} +{"epoch": 0.004405286343612335, "step": 4, "batch_size": 64, "mean": -0.02125033736228943, "std": 0.3466644883155823, "min": -0.6687850952148438, "p10": -0.47548522949218747, "median": -0.07352828979492188, "p90": 0.4227813720703127, "max": 1.2737350463867188, "pos_frac": 0.421875, "sample": [0.15477752685546875, -0.5248489379882812, -0.1560821533203125, 0.10869598388671875, -0.10616302490234375, 0.44034576416015625, 0.57501220703125, 0.009521484375, -0.4143562316894531, -0.0028533935546875, -0.3737373352050781, 0.3240699768066406, -0.19890594482421875, -0.11342620849609375, -0.2062225341796875, -0.10839462280273438, 0.126861572265625, -0.2784576416015625, -0.21390151977539062, -0.08125877380371094, -0.4974212646484375, -0.5638961791992188, -0.2659454345703125, -0.383026123046875, -0.096343994140625, -0.0595855712890625, 0.2916107177734375, 0.21562957763671875, -0.21507644653320312, -0.611724853515625, 0.014041900634765625, -0.023233413696289062, -0.167205810546875, 0.04286956787109375, -0.5092754364013672, 0.18294334411621094, -0.6687850952148438, -0.071929931640625, -0.07512664794921875, 0.1259002685546875, -0.14809417724609375, -0.19214820861816406, 0.38179779052734375, 0.59210205078125, 0.15331268310546875, 0.10776901245117188, 0.329803466796875, 0.5581207275390625, -0.12898826599121094, -0.49985504150390625, -0.12280654907226562, 0.24433517456054688, 0.2951202392578125, -0.4243011474609375, 0.18082618713378906, 0.46535301208496094, 1.2737350463867188, -0.16167449951171875, -0.05783843994140625, 0.096649169921875, 0.4747161865234375, -0.22119712829589844, -0.41827392578125, 0.236419677734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000004.npy"} +{"epoch": 0.005873715124816446, "step": 5, "batch_size": 64, "mean": 0.03655460476875305, "std": 0.36775702238082886, "min": -1.0894775390625, "p10": -0.42081298828124997, "median": 0.04675102233886719, "p90": 0.49896697998046885, "max": 0.9008712768554688, "pos_frac": 0.546875, "sample": [0.0397491455078125, 0.1982421875, 0.132476806640625, -0.13463592529296875, 0.3111724853515625, 0.395782470703125, 0.7168006896972656, -0.2298736572265625, -0.06490516662597656, 0.3406524658203125, 0.14237213134765625, 0.541900634765625, 0.07331275939941406, 0.09097671508789062, -0.06625747680664062, -0.0108489990234375, 0.06243133544921875, 0.04116249084472656, -0.062099456787109375, -0.1951141357421875, 0.04395294189453125, 0.5097198486328125, 0.16313552856445312, -0.14658355712890625, 0.157073974609375, -0.29199981689453125, 0.34751129150390625, -0.370697021484375, -1.0894775390625, 0.550872802734375, -0.2722434997558594, 0.049549102783203125, 0.7149658203125, -0.11156272888183594, -0.18854904174804688, 0.20159912109375, 0.304229736328125, 0.46976470947265625, 0.3984260559082031, 0.524871826171875, 0.2529945373535156, 0.148681640625, -0.00342559814453125, -0.4779205322265625, 0.9008712768554688, -0.8629150390625, -0.01839447021484375, -0.5131072998046875, -0.38372802734375, -0.43670654296875, 0.24180030822753906, -0.04369354248046875, -0.16539764404296875, -0.2628440856933594, -0.09454345703125, 0.16192626953125, 0.473876953125, -0.29352569580078125, -0.43929290771484375, 0.19496917724609375, -0.6793594360351562, 0.23276519775390625, -0.0495758056640625, 0.168182373046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000005.npy"} +{"epoch": 0.007342143906020558, "step": 6, "batch_size": 64, "mean": -0.04002311825752258, "std": 0.4169080853462219, "min": -1.043914794921875, "p10": -0.5135873794555664, "median": -0.0203094482421875, "p90": 0.5216491699218752, "max": 0.8243865966796875, "pos_frac": 0.46875, "sample": [-0.9399490356445312, -0.08995246887207031, -0.06512641906738281, 0.0873565673828125, -0.31976318359375, 0.674652099609375, -0.4846916198730469, -0.4577369689941406, -0.4047698974609375, -1.043914794921875, 0.2895622253417969, 0.8243865966796875, -0.9969482421875, 0.3756847381591797, -0.3685417175292969, -0.1401214599609375, -0.00951385498046875, -0.12459754943847656, 0.013851165771484375, 0.184173583984375, -0.7579193115234375, -0.49339866638183594, 0.237030029296875, 0.003902435302734375, -0.2505836486816406, 0.08646392822265625, 0.2991828918457031, 0.8018569946289062, -0.03110504150390625, -0.2967720031738281, -0.20084381103515625, 0.33360862731933594, 0.6303482055664062, -0.12396240234375, -0.04131317138671875, 0.3746795654296875, -0.763885498046875, -0.00054168701171875, -0.21443939208984375, -0.37354278564453125, -0.200042724609375, 0.0389862060546875, -0.5222396850585938, 0.11528968811035156, 0.4557628631591797, 0.04302787780761719, 0.2043914794921875, 0.11734580993652344, 0.5416259765625, -0.41457366943359375, 0.00936126708984375, -0.12725067138671875, 0.18966293334960938, 0.10530662536621094, 0.5838775634765625, 0.1183013916015625, -0.10812759399414062, -0.6804656982421875, 0.47503662109375, -0.27386474609375, -0.1077880859375, -0.10727691650390625, 0.662139892578125, 0.09722900390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000006.npy"} +{"epoch": 0.00881057268722467, "step": 7, "batch_size": 64, "mean": 0.03685298562049866, "std": 0.39603114128112793, "min": -1.09197998046875, "p10": -0.3415199279785156, "median": 0.04515838623046875, "p90": 0.43000030517578125, "max": 1.148834228515625, "pos_frac": 0.578125, "sample": [0.17153549194335938, -0.7578048706054688, -0.15599632263183594, -0.22876739501953125, -1.0454483032226562, 0.13387298583984375, -0.45716094970703125, -0.01141357421875, -0.35170745849609375, 0.8918838500976562, -1.09197998046875, 0.3156280517578125, -0.215667724609375, 0.27007293701171875, 0.0590667724609375, 0.1145172119140625, 0.030172348022460938, -0.23369216918945312, -0.03732109069824219, 0.019306182861328125, 0.13045501708984375, 0.757476806640625, 0.43082427978515625, 0.27620697021484375, 0.1888427734375, 0.034912109375, -0.1998271942138672, 0.08728790283203125, -0.20958328247070312, -0.1233673095703125, 0.13110733032226562, -0.07454681396484375, 0.22499465942382812, -0.210205078125, 0.08056640625, 0.42807769775390625, -0.012958526611328125, 0.0554046630859375, -0.158355712890625, -0.2365875244140625, 0.407806396484375, -0.5453033447265625, 0.3098602294921875, -0.03264617919921875, 0.015655517578125, 0.022918701171875, 0.4537506103515625, -0.3177490234375, 0.2466583251953125, 0.06764984130859375, 1.148834228515625, -0.6785507202148438, -0.0209503173828125, 0.34632110595703125, -0.06939697265625, 0.535247802734375, -0.25147247314453125, 0.11383056640625, 0.9949932098388672, 0.22313308715820312, 0.2706298828125, 0.0997772216796875, -0.06049346923828125, 0.05826568603515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000007.npy"} +{"epoch": 0.010279001468428781, "step": 8, "batch_size": 64, "mean": -0.05733811855316162, "std": 0.3448963165283203, "min": -1.4411468505859375, "p10": -0.48493270874023436, "median": -0.011095046997070312, "p90": 0.3349519729614259, "max": 0.62738037109375, "pos_frac": 0.5, "sample": [0.3493194580078125, 0.3563079833984375, -0.4286041259765625, -0.10817718505859375, -0.48711395263671875, -0.051074981689453125, 0.07484626770019531, -0.06769561767578125, -0.04156494140625, -0.023731231689453125, -0.10165023803710938, 0.16172027587890625, -0.20542144775390625, 0.12717247009277344, 0.38275146484375, -0.36318206787109375, 0.14105606079101562, -0.1526947021484375, 0.166900634765625, -0.7697677612304688, 0.39801788330078125, -0.599365234375, 0.2248687744140625, 0.047149658203125, -0.024181365966796875, -0.55621337890625, -0.35065460205078125, 0.2282428741455078, 0.067230224609375, -0.4704551696777344, -0.36162567138671875, -0.3374481201171875, -0.023956298828125, 0.62738037109375, -0.03778839111328125, 0.108551025390625, 0.07010650634765625, 0.11507225036621094, 0.0478057861328125, -0.2046966552734375, 0.16254425048828125, -0.40816497802734375, 0.13079833984375, -0.619659423828125, 0.22029495239257812, -0.493988037109375, -1.4411468505859375, 0.2999725341796875, 0.09362220764160156, 0.24395751953125, 0.0015411376953125, -0.3343658447265625, -0.09130859375, -0.10546112060546875, 0.4631805419921875, 0.34806060791015625, -0.4798431396484375, 0.11028289794921875, 0.0623016357421875, -0.2927398681640625, -0.06087303161621094, 0.3043651580810547, 0.012285232543945312, 0.2772674560546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000008.npy"} +{"epoch": 0.011747430249632892, "step": 9, "batch_size": 64, "mean": 0.010491013526916504, "std": 0.4288538098335266, "min": -1.1603164672851562, "p10": -0.5315208435058593, "median": 0.0066967010498046875, "p90": 0.5592741012573242, "max": 1.184783935546875, "pos_frac": 0.5, "sample": [-0.002349853515625, 0.0631256103515625, 0.5856990814208984, 0.5285568237304688, 0.05889129638671875, -0.3079071044921875, -0.3601837158203125, 0.42855072021484375, 0.717315673828125, 0.4852943420410156, -0.5639266967773438, -0.15277099609375, -0.5597763061523438, -1.1603164672851562, -0.4655914306640625, -0.70703125, -0.1242523193359375, -0.004062652587890625, -0.1411590576171875, -0.3009529113769531, -1.0293121337890625, 0.018331527709960938, -0.038055419921875, 0.1282196044921875, -0.46068572998046875, 0.06626129150390625, 0.17092514038085938, 0.2177734375, 0.015743255615234375, 0.5599288940429688, 0.06674957275390625, 0.09561538696289062, 0.2195281982421875, 0.6021881103515625, -0.09443092346191406, -0.34069061279296875, -0.20111083984375, 0.146759033203125, 0.5581512451171875, 0.3494110107421875, -0.00347900390625, 0.0662994384765625, -0.116973876953125, -0.09171676635742188, -0.07898139953613281, -0.258819580078125, -0.133026123046875, 0.279693603515625, -0.23926544189453125, 0.03704261779785156, -0.1736602783203125, -0.16157913208007812, 0.24445343017578125, 0.5579509735107422, 0.5597553253173828, 0.2486114501953125, 0.4446563720703125, 0.6092376708984375, -0.29039955139160156, 1.184783935546875, -0.5751876831054688, 0.40840911865234375, -0.8969268798828125, -0.01790618896484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000009.npy"} +{"epoch": 0.013215859030837005, "step": 10, "batch_size": 64, "mean": -0.015163183212280273, "std": 0.3706546127796173, "min": -0.944732666015625, "p10": -0.4676921844482422, "median": -0.04345703125, "p90": 0.4301151275634766, "max": 0.955169677734375, "pos_frac": 0.453125, "sample": [-0.0544281005859375, -0.5283889770507812, 0.06861686706542969, 0.01398468017578125, -0.42329978942871094, 0.4275970458984375, -0.06527519226074219, -0.0302581787109375, 0.27394866943359375, -0.12749481201171875, 0.15576553344726562, 0.4225921630859375, -0.872283935546875, 0.271759033203125, -0.6533966064453125, 0.2159423828125, -0.30381011962890625, -0.17956161499023438, 0.023847579956054688, 0.35688018798828125, 0.274444580078125, -0.06953048706054688, 0.21259117126464844, 0.25125885009765625, -0.141021728515625, -0.22128677368164062, -0.28675079345703125, -0.0324859619140625, 0.4450874328613281, 0.20842361450195312, 0.601043701171875, -0.1962432861328125, -0.0034770965576171875, -0.10068511962890625, -0.4756927490234375, -0.5381202697753906, -0.944732666015625, 0.4311943054199219, 0.16281509399414062, -0.13874244689941406, -0.4810619354248047, -0.08788299560546875, -0.16117477416992188, 0.83538818359375, 0.01239013671875, 0.31293487548828125, 0.41623687744140625, -0.29511260986328125, -0.07083511352539062, 0.1755523681640625, -0.315032958984375, -0.16355514526367188, -0.23836517333984375, 0.004550933837890625, -0.44341278076171875, -0.4490242004394531, -0.1312255859375, 0.11463165283203125, 0.46099090576171875, 0.955169677734375, 0.5910263061523438, -0.43738555908203125, 0.12314224243164062, -0.12921524047851562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000010.npy"} +{"epoch": 0.014684287812041116, "step": 11, "batch_size": 64, "mean": 0.024578243494033813, "std": 0.36094382405281067, "min": -1.34063720703125, "p10": -0.37055854797363275, "median": 0.00807952880859375, "p90": 0.42927398681640627, "max": 1.1585922241210938, "pos_frac": 0.515625, "sample": [-0.046848297119140625, 0.3947906494140625, 0.43877410888671875, -0.07598876953125, -0.2633056640625, -0.19207000732421875, 0.18349456787109375, 0.16388320922851562, -0.06622886657714844, 0.3453369140625, 0.21259117126464844, -0.0736541748046875, 0.13243484497070312, -0.02309417724609375, -0.3908843994140625, -0.26111602783203125, 0.2804107666015625, -0.3231315612792969, 0.35040283203125, 0.16260528564453125, 0.4237518310546875, 0.00084686279296875, -0.42234039306640625, 0.30315399169921875, -0.21710205078125, 0.431640625, -0.2574005126953125, -0.14827728271484375, -0.0072021484375, -0.21186447143554688, 0.0864715576171875, -0.0588836669921875, 0.2151031494140625, 0.4415283203125, 0.4045372009277344, 0.327789306640625, 0.077117919921875, 1.1585922241210938, 0.16107177734375, -0.03583717346191406, 0.08475112915039062, 0.2829856872558594, 0.01531219482421875, -0.0082855224609375, 0.12360382080078125, 0.2314910888671875, 0.0336456298828125, -0.14336013793945312, -1.34063720703125, 0.4790496826171875, -0.13177490234375, -0.6788177490234375, 0.5181732177734375, -0.4339790344238281, 0.4646339416503906, -0.5093116760253906, -0.14192962646484375, -0.5736961364746094, -0.31841278076171875, -0.2347259521484375, 0.21511077880859375, 0.39038848876953125, -0.14275360107421875, -0.22955322265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000011.npy"} +{"epoch": 0.016152716593245228, "step": 12, "batch_size": 64, "mean": 0.005901157855987549, "std": 0.4333747327327728, "min": -1.20166015625, "p10": -0.5622680664062499, "median": 0.029298782348632812, "p90": 0.5754165649414062, "max": 0.8854827880859375, "pos_frac": 0.53125, "sample": [0.492279052734375, 0.26385498046875, 0.1423187255859375, -1.20166015625, -0.12641143798828125, -0.3177642822265625, 0.07976531982421875, -0.14760208129882812, 0.6872711181640625, 0.7110366821289062, 0.08893585205078125, -0.244903564453125, -0.6677169799804688, 0.474365234375, -0.27910614013671875, 0.8854827880859375, -0.15653228759765625, 0.61834716796875, -0.02398681640625, 0.16069412231445312, -1.120697021484375, -0.33226776123046875, 0.70654296875, -0.0976715087890625, 0.022769927978515625, -0.26482391357421875, -0.288360595703125, 0.5640411376953125, -0.2644004821777344, -0.571624755859375, -0.540435791015625, 0.745849609375, 0.1737518310546875, -0.1004486083984375, -0.8997268676757812, 0.2420024871826172, -0.04316139221191406, -0.226898193359375, -0.3194580078125, -0.09997749328613281, -0.19348907470703125, 0.16186904907226562, -0.6494178771972656, 0.17780685424804688, -0.8262786865234375, 0.11347198486328125, 0.48638916015625, -0.08006668090820312, 0.3527851104736328, 0.45659637451171875, 0.14620208740234375, 0.13780784606933594, 0.03582763671875, 0.1669788360595703, 0.2756805419921875, -0.19403076171875, 0.580291748046875, 0.1027679443359375, -0.03559112548828125, 0.010402679443359375, 0.12530136108398438, -0.06374359130859375, 0.272613525390625, 0.0938262939453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000012.npy"} +{"epoch": 0.01762114537444934, "step": 13, "batch_size": 64, "mean": 0.009646564722061157, "std": 0.4301586151123047, "min": -1.637359619140625, "p10": -0.3507537841796875, "median": 0.00809478759765625, "p90": 0.4263500213623048, "max": 1.6751632690429688, "pos_frac": 0.515625, "sample": [-0.2741241455078125, -0.052059173583984375, 0.11940765380859375, -0.44556427001953125, 0.28227996826171875, 0.2332763671875, 0.10430145263671875, 0.21355438232421875, 0.8230056762695312, -0.3305206298828125, 0.11138153076171875, 0.6058120727539062, 0.4803009033203125, -0.08669281005859375, 0.013095855712890625, 1.6751632690429688, -0.33232879638671875, -0.083282470703125, -0.16844558715820312, -0.22289276123046875, 0.018259048461914062, -0.03186798095703125, 0.10190582275390625, 0.08144760131835938, -0.10750198364257812, -0.133544921875, 0.09861373901367188, -0.08642387390136719, -0.3276214599609375, -0.32843017578125, -0.45703887939453125, 0.4378204345703125, 0.11523056030273438, 0.3995857238769531, -1.637359619140625, -0.8367767333984375, -0.20478057861328125, -0.20548057556152344, -0.08886528015136719, -0.297576904296875, 0.2574653625488281, 0.30125999450683594, 0.004119873046875, 0.5090866088867188, 0.0120697021484375, 0.08625030517578125, -0.17089462280273438, -0.08344650268554688, 0.2252044677734375, 0.0472259521484375, -0.5036849975585938, -0.05437469482421875, -0.241363525390625, 0.08892822265625, -0.35865020751953125, 0.16922760009765625, 0.30454254150390625, 0.0879058837890625, 1.00634765625, -0.524810791015625, -0.0253143310546875, 0.12322235107421875, 0.34747314453125, -0.16567230224609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000013.npy"} +{"epoch": 0.01908957415565345, "step": 14, "batch_size": 64, "mean": 0.0010128915309906006, "std": 0.4283331632614136, "min": -1.479461669921875, "p10": -0.4663551330566406, "median": -0.012849807739257812, "p90": 0.4951553344726564, "max": 1.1710586547851562, "pos_frac": 0.46875, "sample": [-0.47235870361328125, 0.28043365478515625, 0.06099700927734375, 0.024158477783203125, 0.668853759765625, 0.5144500732421875, -0.625152587890625, -0.2615966796875, 0.4221954345703125, 0.04989433288574219, -0.13574981689453125, -0.027278900146484375, 0.9397125244140625, 1.1710586547851562, -0.3539009094238281, -0.108642578125, -0.3332405090332031, -0.00858306884765625, -0.3509101867675781, 0.07254981994628906, 0.21436309814453125, 0.036319732666015625, -0.123931884765625, 0.45013427734375, -0.030029296875, -0.5933799743652344, -0.6619873046875, 0.2104644775390625, 0.067474365234375, 0.36624908447265625, -0.26128387451171875, -0.023212432861328125, -0.285675048828125, 0.9023399353027344, -0.1327037811279297, 0.25653839111328125, -0.017116546630859375, 0.721954345703125, 0.24257850646972656, 0.03504371643066406, -0.4678955078125, 0.17490386962890625, -0.7355499267578125, -0.02863311767578125, -0.12773513793945312, 0.08661651611328125, 0.4225883483886719, 0.12268829345703125, 0.060211181640625, -0.0175323486328125, -1.479461669921875, -0.23123931884765625, 0.05445098876953125, -0.2818717956542969, -0.21527862548828125, 0.6582717895507812, -0.005706787109375, 0.33038330078125, -0.46276092529296875, -0.42305755615234375, -0.4161567687988281, 0.2556037902832031, -0.03408050537109375, -0.07496261596679688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000014.npy"} +{"epoch": 0.020558002936857563, "step": 15, "batch_size": 64, "mean": 0.029320329427719116, "std": 0.3721500337123871, "min": -1.3077239990234375, "p10": -0.3649717330932617, "median": 0.045134544372558594, "p90": 0.4791122436523438, "max": 1.141876220703125, "pos_frac": 0.578125, "sample": [-0.07243156433105469, 0.901947021484375, 0.024091720581054688, -0.029165267944335938, 0.22891998291015625, 0.14734268188476562, 0.001201629638671875, 0.19197845458984375, -0.2300567626953125, 0.25641632080078125, -0.13161468505859375, -0.09997749328613281, 0.31307220458984375, -0.6687393188476562, 0.4200096130371094, -0.0316619873046875, 0.030094146728515625, -0.10471343994140625, 0.549285888671875, 0.245391845703125, 0.0937042236328125, -1.3077239990234375, -0.3080902099609375, -0.09282684326171875, -0.46903228759765625, 0.1169281005859375, 0.11470794677734375, -0.32163238525390625, 0.16933441162109375, 0.0630645751953125, -0.168212890625, -0.434539794921875, 0.03924560546875, -0.0020046234130859375, 0.6167449951171875, 0.4800872802734375, -0.5722808837890625, 0.6367950439453125, 0.1488189697265625, -0.3723011016845703, -0.19353675842285156, 0.027833938598632812, 0.05102348327636719, 0.07137680053710938, 0.1047210693359375, -0.13193130493164062, 0.1528644561767578, -0.1832561492919922, 0.48014068603515625, 0.17539596557617188, 0.476837158203125, -0.1295928955078125, 0.05683135986328125, 0.11561965942382812, -0.28636932373046875, 0.25127410888671875, 0.18218994140625, -0.347869873046875, 0.083892822265625, -0.181854248046875, -0.22945404052734375, -0.56634521484375, 0.3826560974121094, 1.141876220703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000015.npy"} +{"epoch": 0.022026431718061675, "step": 16, "batch_size": 64, "mean": 0.06042572855949402, "std": 0.3054898679256439, "min": -0.633697509765625, "p10": -0.32482070922851564, "median": 0.022003173828125, "p90": 0.4334480285644533, "max": 0.7891464233398438, "pos_frac": 0.5625, "sample": [-0.026041030883789062, 0.3798828125, -0.17888641357421875, 0.02234649658203125, 0.14044952392578125, 0.22310638427734375, -0.01763916015625, -0.04574775695800781, 0.13165283203125, 0.5761489868164062, -0.486480712890625, 0.117095947265625, 0.7262191772460938, 0.5210952758789062, 0.016569137573242188, -0.157440185546875, -0.044078826904296875, -0.41613006591796875, -0.3293914794921875, 0.02165985107421875, -0.579986572265625, 0.21640968322753906, 0.18822860717773438, -0.33319091796875, 0.2689208984375, -0.20293617248535156, -0.633697509765625, 0.11115455627441406, -0.031154632568359375, -0.06329154968261719, -0.2885246276855469, 0.5588951110839844, 0.007022857666015625, -0.2329254150390625, -0.21125030517578125, 0.2392425537109375, 0.2328033447265625, 0.3532257080078125, -0.07256317138671875, -0.05965423583984375, 0.36260223388671875, -0.18358612060546875, 0.17421340942382812, 0.171875, 0.7891464233398438, 0.38692474365234375, -0.37490272521972656, -0.2892436981201172, 0.10639190673828125, -0.112548828125, 0.2019329071044922, -0.11588287353515625, 0.0155487060546875, 0.19721031188964844, -0.08812713623046875, 0.0269775390625, 0.23016738891601562, 0.3284912109375, 0.5762901306152344, 0.3927154541015625, -0.085662841796875, 0.45090484619140625, 0.37884521484375, -0.31415557861328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000016.npy"} +{"epoch": 0.023494860499265784, "step": 17, "batch_size": 64, "mean": 0.10754308104515076, "std": 0.3486311137676239, "min": -0.734466552734375, "p10": -0.2747003555297851, "median": 0.11061859130859375, "p90": 0.5492942810058594, "max": 1.1995849609375, "pos_frac": 0.59375, "sample": [-0.5289115905761719, 0.09411048889160156, 0.33838653564453125, -0.08798980712890625, -0.352752685546875, -0.08141326904296875, -0.09307861328125, -0.734466552734375, 0.083953857421875, -0.154327392578125, 0.17893218994140625, 0.5125579833984375, 0.426422119140625, -0.1830596923828125, -0.7002544403076172, 0.0629119873046875, -0.14076995849609375, 0.13175582885742188, -0.00936126708984375, 0.7049026489257812, 0.26924896240234375, 0.18049240112304688, 0.21676254272460938, 0.103729248046875, -0.03455352783203125, 0.333099365234375, 0.17018890380859375, 0.195556640625, 0.4893798828125, 0.2562408447265625, 0.16187286376953125, 0.7891159057617188, 0.1175079345703125, 0.332733154296875, -0.03486061096191406, 0.22969818115234375, -0.062175750732421875, -0.1985034942626953, -0.2923717498779297, 0.25238037109375, 0.5595779418945312, 0.06863021850585938, 0.4790077209472656, 0.24691200256347656, -0.43544769287109375, -0.025604248046875, -0.19278335571289062, 0.6278438568115234, 0.2541961669921875, -0.1522064208984375, -0.089111328125, -0.049968719482421875, 0.1182098388671875, -0.13085174560546875, 0.29383087158203125, 0.525299072265625, 1.1995849609375, 0.6122627258300781, 0.61431884765625, -0.39846038818359375, -0.17499160766601562, 0.0375518798828125, 0.185333251953125, -0.23346710205078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000017.npy"} +{"epoch": 0.024963289280469897, "step": 18, "batch_size": 64, "mean": 0.0317995548248291, "std": 0.36544162034988403, "min": -0.9456634521484375, "p10": -0.3904548645019531, "median": 0.035816192626953125, "p90": 0.49973163604736337, "max": 0.872283935546875, "pos_frac": 0.53125, "sample": [0.15006256103515625, -0.9212875366210938, 0.08345794677734375, 0.01496124267578125, 0.2623252868652344, 0.03221893310546875, 0.13051414489746094, -0.648834228515625, -0.0827178955078125, 0.47240257263183594, 0.4317474365234375, -0.1723003387451172, 0.29035186767578125, 0.7525177001953125, -0.2287750244140625, -0.08542633056640625, 0.19806480407714844, 0.24301719665527344, -0.020061492919921875, -0.3933868408203125, -0.00543975830078125, -0.001445770263671875, 0.511444091796875, -0.081634521484375, 0.5651359558105469, 0.3323020935058594, 0.5580062866210938, 0.3588714599609375, -0.2858409881591797, 0.33957672119140625, 0.3498420715332031, 0.1511821746826172, 0.26827239990234375, -0.34039306640625, 0.52069091796875, -0.4714508056640625, -0.07143402099609375, 0.0394134521484375, -0.38361358642578125, -0.0751190185546875, 0.5347709655761719, 0.872283935546875, -0.890716552734375, 0.14020538330078125, 0.4238433837890625, 0.10968780517578125, 0.10106658935546875, -0.3210334777832031, -0.2531280517578125, 0.07212448120117188, 0.10580825805664062, -0.093994140625, -0.41595458984375, -0.05971527099609375, 0.11084747314453125, -0.155242919921875, 0.2698822021484375, -0.07549285888671875, -0.9456634521484375, 0.09850502014160156, -0.014814376831054688, -0.123504638671875, -0.12859344482421875, -0.11321830749511719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000018.npy"} +{"epoch": 0.02643171806167401, "step": 19, "batch_size": 64, "mean": 0.11113619804382324, "std": 0.3500584363937378, "min": -1.1013031005859375, "p10": -0.2136514663696289, "median": 0.06773662567138672, "p90": 0.5630462646484378, "max": 1.4053955078125, "pos_frac": 0.609375, "sample": [0.1874675750732422, -0.1949901580810547, -0.32861328125, -0.17777633666992188, 0.37270355224609375, -0.221649169921875, -0.1204071044921875, 0.15423202514648438, 0.04536247253417969, 0.6990737915039062, 0.589569091796875, -0.02425384521484375, 0.659027099609375, 0.40985870361328125, 1.4053955078125, -1.1013031005859375, 0.1618499755859375, 0.06277847290039062, -0.0384979248046875, 0.20379638671875, -0.4046630859375, 0.0354766845703125, -0.03929710388183594, 0.13861083984375, -0.038028717041015625, 0.063873291015625, 0.274688720703125, 0.1753692626953125, 0.11197662353515625, -0.2338714599609375, -0.182098388671875, 0.07159996032714844, 0.2809162139892578, 0.03300666809082031, -0.194732666015625, 0.11509895324707031, -0.22167205810546875, -0.284637451171875, 0.50115966796875, 0.712493896484375, 0.16522979736328125, 0.4188690185546875, -0.15973663330078125, -0.0269012451171875, -0.00225067138671875, 0.7743301391601562, 0.0973968505859375, 0.449127197265625, -0.1306915283203125, 0.2668647766113281, 0.31606292724609375, 0.20215225219726562, -0.008544921875, -0.026628494262695312, 0.19464111328125, 0.16596031188964844, -0.17488861083984375, 0.2631950378417969, 0.0322418212890625, 0.01702880859375, 0.7530670166015625, 0.14934158325195312, -0.17995452880859375, -0.10208892822265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000019.npy"} +{"epoch": 0.027900146842878122, "step": 20, "batch_size": 64, "mean": 0.07542020082473755, "std": 0.31793731451034546, "min": -0.8017425537109375, "p10": -0.23335704803466795, "median": 0.028299331665039062, "p90": 0.4594173431396486, "max": 0.9977035522460938, "pos_frac": 0.609375, "sample": [-0.013490676879882812, 0.17618942260742188, -0.042087554931640625, 0.4130859375, -0.165740966796875, 0.415771484375, 0.0328216552734375, 0.3463630676269531, 0.3111114501953125, 0.0747222900390625, -0.8017425537109375, 0.4811859130859375, 0.2582206726074219, 0.01532745361328125, 0.01204681396484375, -0.04012298583984375, -0.14083480834960938, 0.2744407653808594, -0.30699920654296875, 0.529083251953125, 0.2016448974609375, -0.7068023681640625, 0.13829803466796875, 0.03916168212890625, -0.083648681640625, -0.15311622619628906, -0.6802444458007812, -0.036113739013671875, 0.19498062133789062, 0.01218414306640625, -0.059047698974609375, -0.2969169616699219, -0.48925018310546875, 0.0029087066650390625, 0.022006988525390625, -0.1164093017578125, 0.219268798828125, -0.19548797607421875, -0.08725738525390625, -0.20418548583984375, -0.11554718017578125, 0.496673583984375, 0.33452606201171875, 0.34918975830078125, 0.4781227111816406, -0.048503875732421875, 0.1889801025390625, 0.2194061279296875, 0.9977035522460938, -0.07479476928710938, -0.0116729736328125, 0.1735992431640625, 0.16977882385253906, 0.39810943603515625, 0.742095947265625, -0.24585914611816406, 0.011322021484375, 0.2678070068359375, 0.09984016418457031, 0.06274032592773438, -0.09959793090820312, 0.6706771850585938, 0.18719482421875, 0.023777008056640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000020.npy"} +{"epoch": 0.02936857562408223, "step": 21, "batch_size": 64, "mean": 0.06414888799190521, "std": 0.3082134425640106, "min": -0.5070648193359375, "p10": -0.3398246765136719, "median": 0.06994056701660156, "p90": 0.3462173461914063, "max": 1.2077178955078125, "pos_frac": 0.625, "sample": [0.00807952880859375, 0.012805938720703125, -0.08246612548828125, 0.1129302978515625, -0.12456512451171875, 0.004872322082519531, -0.12888717651367188, 0.18958473205566406, 0.262725830078125, 0.3550567626953125, 0.41510009765625, 0.09514617919921875, -0.06507682800292969, 0.3600196838378906, 0.2075653076171875, 0.73577880859375, 0.1009063720703125, 0.08041954040527344, -0.14782142639160156, 0.06701278686523438, -0.3309326171875, 0.1608734130859375, -0.34363555908203125, -0.06576156616210938, 0.07088851928710938, 0.8012161254882812, 0.137176513671875, -0.398193359375, -0.15203857421875, 0.6372604370117188, 0.1448516845703125, 0.06899261474609375, -0.2605400085449219, 0.24495887756347656, -0.3999481201171875, -0.12329864501953125, -0.5000495910644531, 0.2352466583251953, 1.2077178955078125, -0.5070648193359375, 0.137786865234375, 0.17507362365722656, -0.009063720703125, -0.03182792663574219, 0.17751693725585938, -0.1423358917236328, -0.014347076416015625, -0.11165618896484375, 0.30391693115234375, -0.26503753662109375, 0.2410125732421875, 0.19426727294921875, 0.31543731689453125, 0.046417236328125, 0.03778266906738281, -0.20795059204101562, 0.1862945556640625, -0.4647674560546875, 0.17533111572265625, 0.31396484375, -0.4893798828125, 0.10086441040039062, 0.325592041015625, 0.023729324340820312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000021.npy"} +{"epoch": 0.030837004405286344, "step": 22, "batch_size": 64, "mean": 0.206741064786911, "std": 0.4434663951396942, "min": -0.6882171630859375, "p10": -0.3468109130859375, "median": 0.21599674224853516, "p90": 0.8212585449218751, "max": 1.3379440307617188, "pos_frac": 0.6875, "sample": [0.13161087036132812, 0.872528076171875, -0.32906341552734375, 0.21917152404785156, 0.430389404296875, 0.2746429443359375, -0.092071533203125, 0.011600494384765625, 0.32904624938964844, -0.10809326171875, 1.3379440307617188, 0.609344482421875, -0.35318756103515625, 0.9714279174804688, -0.15939712524414062, 0.6170578002929688, 0.21282196044921875, 0.0484771728515625, -0.240570068359375, -0.48065185546875, 0.09432220458984375, 0.5374603271484375, -0.6182861328125, 0.7970123291015625, -0.19370269775390625, 0.1927967071533203, 0.9144973754882812, 0.39037322998046875, 0.30341339111328125, 0.058319091796875, 0.024629592895507812, 0.714599609375, -0.23854827880859375, -0.21891021728515625, 0.4943809509277344, 0.6719818115234375, 0.37210845947265625, -0.33193206787109375, -0.04540252685546875, 0.3915863037109375, 0.23773193359375, 0.39939117431640625, 0.20778656005859375, 0.019016265869140625, 0.018550872802734375, 0.7652206420898438, -0.1911163330078125, -0.6882171630859375, 1.056610107421875, 0.2322845458984375, 0.31436920166015625, 0.02530670166015625, -0.2707328796386719, -0.4319419860839844, -0.4151611328125, 0.242706298828125, 0.5021743774414062, -0.19402122497558594, 0.3004302978515625, 0.96002197265625, 0.36627197265625, 0.8316497802734375, 0.682647705078125, -0.35327911376953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000022.npy"} +{"epoch": 0.032305433186490456, "step": 23, "batch_size": 64, "mean": 0.2705477774143219, "std": 0.4138149321079254, "min": -0.85565185546875, "p10": -0.11949157714843749, "median": 0.20740795135498047, "p90": 0.7874473571777345, "max": 1.4178848266601562, "pos_frac": 0.78125, "sample": [-0.0405731201171875, -0.048610687255859375, 0.8022079467773438, 0.040477752685546875, 0.18039703369140625, 0.3389625549316406, 0.39803314208984375, 0.0228118896484375, -0.3590526580810547, -0.85565185546875, 0.1730194091796875, 1.4178848266601562, 0.306793212890625, 0.09051513671875, 0.36936187744140625, 0.48082733154296875, 0.49271583557128906, 0.04036712646484375, 0.2262725830078125, -0.0640869140625, 0.7095413208007812, 0.24301528930664062, 1.3955841064453125, 0.31696319580078125, -0.02065277099609375, 0.1766815185546875, -0.47829437255859375, -0.5174369812011719, 0.7173614501953125, 1.1053009033203125, 0.028949737548828125, 0.9972991943359375, 0.06673431396484375, 0.6245994567871094, 0.03386497497558594, 0.1037139892578125, -0.104400634765625, 0.2798614501953125, 0.18854331970214844, 0.4280834197998047, 0.27816009521484375, 0.17059707641601562, 0.25727081298828125, 0.6846046447753906, 0.13358306884765625, 0.93463134765625, 0.07613754272460938, 0.166351318359375, 0.086395263671875, -0.1259002685546875, 0.4784507751464844, 0.5290470123291016, 0.15288543701171875, -0.17963218688964844, -0.013311386108398438, 0.6668853759765625, 0.460723876953125, 0.8756103515625, -0.1452465057373047, -0.1045379638671875, 0.25107574462890625, 0.31915283203125, 0.3011322021484375, 0.7530059814453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000023.npy"} +{"epoch": 0.033773861967694566, "step": 24, "batch_size": 64, "mean": 0.24319320917129517, "std": 0.3789023756980896, "min": -0.5788421630859375, "p10": -0.21066436767578126, "median": 0.25905323028564453, "p90": 0.7156667709350588, "max": 1.468292236328125, "pos_frac": 0.734375, "sample": [0.5647048950195312, 0.22069549560546875, -0.163970947265625, -0.15216064453125, 0.7410526275634766, -0.2100982666015625, 0.28823089599609375, 0.115936279296875, 0.9733428955078125, 0.07723236083984375, 0.2868614196777344, -0.09291458129882812, -0.05219841003417969, 0.34140777587890625, 0.6547966003417969, -0.14048004150390625, -0.00745391845703125, 0.03632545471191406, 0.40465545654296875, -0.25708770751953125, 0.7495765686035156, -0.3822479248046875, -0.1859111785888672, 0.44640350341796875, 0.8257904052734375, 0.4961509704589844, 0.2424793243408203, -0.5788421630859375, 0.422393798828125, 0.27562713623046875, 1.468292236328125, 0.458587646484375, 0.4291191101074219, -0.19873809814453125, 0.6384429931640625, -0.12103271484375, 0.1874847412109375, 0.548095703125, -0.4881744384765625, 0.1422882080078125, 0.76904296875, 0.12451362609863281, -0.4048347473144531, 0.49160003662109375, 0.20836639404296875, 0.5399856567382812, -0.210906982421875, 0.12822914123535156, 0.65643310546875, 0.2815074920654297, 0.327484130859375, 0.18404006958007812, 0.17240524291992188, 0.24210166931152344, 0.4122161865234375, 0.3589935302734375, 0.5300865173339844, 0.0412445068359375, 0.35106849670410156, -0.32421112060546875, 0.30876922607421875, 0.1848602294921875, 0.36224365234375, 0.824462890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000024.npy"} +{"epoch": 0.03524229074889868, "step": 25, "batch_size": 64, "mean": 0.2395840287208557, "std": 0.493831992149353, "min": -0.912506103515625, "p10": -0.3890365600585937, "median": 0.21311187744140625, "p90": 0.8702682495117188, "max": 1.4177093505859375, "pos_frac": 0.671875, "sample": [0.4282035827636719, 0.1508636474609375, 0.27056884765625, 0.32250213623046875, -0.166473388671875, -0.017641067504882812, -0.912506103515625, 0.786651611328125, -0.40380859375, 0.04109954833984375, 0.1556549072265625, 0.903656005859375, -0.045574188232421875, 0.57855224609375, 1.4177093505859375, -0.3545684814453125, -0.46321678161621094, 1.0084228515625, 0.5165786743164062, 0.449951171875, -0.2298431396484375, -0.231658935546875, 0.5328960418701172, 0.760711669921875, 0.2902984619140625, 0.37060546875, -0.2667083740234375, 0.9017181396484375, -0.7278594970703125, -0.022212982177734375, 0.4969329833984375, -0.01554107666015625, 0.2938346862792969, 0.8257904052734375, 0.41091156005859375, 0.13558197021484375, 0.155609130859375, 0.8778076171875, -0.8358917236328125, 0.8526763916015625, 0.30106544494628906, 0.7777862548828125, 0.942169189453125, 0.3220558166503906, -0.13441085815429688, -0.19231414794921875, -0.02127838134765625, 0.1370391845703125, 0.7448196411132812, 0.3130226135253906, 1.0805816650390625, 0.032642364501953125, 0.3144569396972656, -0.7196044921875, 0.5967330932617188, 0.06573104858398438, -0.4397735595703125, 0.8284988403320312, -0.11540603637695312, 0.14252090454101562, 0.14324569702148438, 0.8358001708984375, -0.00139617919921875, 0.13710784912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000025.npy"} +{"epoch": 0.03671071953010279, "step": 26, "batch_size": 64, "mean": 0.37870919704437256, "std": 0.5413480997085571, "min": -0.4564056396484375, "p10": -0.181884765625, "median": 0.30806541442871094, "p90": 1.0313491821289062, "max": 1.99169921875, "pos_frac": 0.765625, "sample": [0.7156524658203125, 0.08474540710449219, 0.3180961608886719, 0.04680824279785156, 1.4901123046875, 0.09639739990234375, 0.5727310180664062, 1.151947021484375, 0.137176513671875, -0.06720161437988281, -0.29937744140625, 0.08972930908203125, 1.81317138671875, 0.38822174072265625, 0.092681884765625, 0.754974365234375, 0.6745529174804688, -0.163543701171875, 0.420867919921875, -0.21583938598632812, 0.21643829345703125, 0.011690139770507812, 0.43784332275390625, 0.4549827575683594, 0.29803466796875, 0.9150161743164062, -0.18547821044921875, -0.3322601318359375, 0.40361785888671875, 0.5408096313476562, -0.04695892333984375, -0.4564056396484375, 0.25511741638183594, 0.4009723663330078, -0.11382675170898438, 0.4036865234375, 1.667724609375, -0.147369384765625, 0.25574493408203125, 0.8127288818359375, 0.19910812377929688, 0.3804779052734375, 1.99169921875, 0.5758819580078125, 0.7429313659667969, 1.0417633056640625, -0.3347587585449219, 0.41339111328125, 0.10787773132324219, 0.06578254699707031, -0.4409294128417969, -0.10100936889648438, -0.17350006103515625, 0.40538787841796875, 0.136138916015625, 0.515411376953125, 0.8547134399414062, 1.7485733032226562, 1.007049560546875, 0.08044815063476562, 0.2202911376953125, -0.09522628784179688, 0.4923248291015625, 0.5095481872558594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000026.npy"} +{"epoch": 0.0381791483113069, "step": 27, "batch_size": 64, "mean": 0.5219472646713257, "std": 0.5141395330429077, "min": -0.261474609375, "p10": -0.05240001678466792, "median": 0.41822052001953125, "p90": 1.1893386840820315, "max": 1.9866104125976562, "pos_frac": 0.875, "sample": [0.18301963806152344, 0.5727996826171875, 0.502960205078125, 0.44484710693359375, 0.6270980834960938, 1.1485366821289062, 0.23211669921875, 0.688323974609375, 1.7204818725585938, 0.2648200988769531, 0.6349105834960938, 0.0105743408203125, 1.1391754150390625, -0.07242202758789062, 0.396697998046875, 0.4168701171875, 1.9866104125976562, 0.21929931640625, 0.3165740966796875, 1.0002059936523438, 0.4692535400390625, 0.0068817138671875, 0.18283843994140625, -0.0056819915771484375, 0.25646209716796875, 0.3414459228515625, 0.04593849182128906, 0.08858299255371094, 0.5111427307128906, 1.8701324462890625, -0.16314697265625, -0.1167144775390625, -0.10153961181640625, 1.88555908203125, 0.8954086303710938, 0.22186279296875, 0.6445770263671875, 0.7666473388671875, 0.32796478271484375, 0.3065948486328125, 0.057262420654296875, 0.4559478759765625, 0.141632080078125, 1.0400772094726562, 0.47699737548828125, 0.900390625, -0.11808013916015625, 0.3351116180419922, 0.9842758178710938, 0.4867401123046875, 1.23193359375, 0.15697860717773438, 0.675537109375, 0.26186370849609375, 0.4195709228515625, 1.2068252563476562, 1.3327178955078125, -0.11540794372558594, 0.24570083618164062, -0.261474609375, 0.5312728881835938, 0.9332504272460938, 0.307830810546875, 0.8499603271484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000027.npy"} +{"epoch": 0.039647577092511016, "step": 28, "batch_size": 64, "mean": 0.36013028025627136, "std": 0.6014600396156311, "min": -1.4525146484375, "p10": -0.292889404296875, "median": 0.35738372802734375, "p90": 1.028032684326172, "max": 2.2101898193359375, "pos_frac": 0.75, "sample": [1.1276779174804688, 0.1740570068359375, -0.14539337158203125, 0.13084983825683594, 0.4761180877685547, -0.1380462646484375, -0.273834228515625, 0.3813934326171875, -0.18882369995117188, 0.20379257202148438, 0.36760711669921875, 0.662445068359375, 1.0475311279296875, 0.657684326171875, 0.36078643798828125, -0.12270355224609375, 0.5693016052246094, 0.2404327392578125, 0.3319854736328125, 0.600921630859375, 0.4873695373535156, -0.9485855102539062, 0.03804779052734375, 1.473724365234375, 0.08940887451171875, 0.6173324584960938, 0.08278656005859375, 0.8543167114257812, 1.0550079345703125, 0.9044952392578125, 0.7363128662109375, 0.03229522705078125, -0.09845352172851562, -0.301055908203125, 0.8462104797363281, 0.3365764617919922, 0.4647216796875, 0.8416366577148438, 0.3279914855957031, 0.467315673828125, 0.52099609375, 0.5603160858154297, 0.11356544494628906, 0.84283447265625, 1.782806396484375, 0.2657623291015625, 0.9825363159179688, -0.12276649475097656, 0.943084716796875, -0.314727783203125, 0.9324588775634766, -0.7049636840820312, -0.3025779724121094, 0.52734375, -0.17371749877929688, 0.40218353271484375, 0.35398101806640625, 1.2928924560546875, 2.2101898193359375, -0.10295486450195312, 0.04470062255859375, -1.4525146484375, -0.34185791015625, 0.017528533935546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000028.npy"} +{"epoch": 0.041116005873715125, "step": 29, "batch_size": 64, "mean": 0.5567758679389954, "std": 0.5686749815940857, "min": -0.3724250793457031, "p10": -0.09239253997802732, "median": 0.5233612060546875, "p90": 1.1527282714843752, "max": 2.991455078125, "pos_frac": 0.84375, "sample": [0.7795867919921875, 0.03495025634765625, 0.3055877685546875, 0.6639289855957031, -0.07665252685546875, 0.32082176208496094, 0.1028594970703125, 0.6999282836914062, 0.6386260986328125, 0.17673492431640625, 0.4615516662597656, -0.028141021728515625, 0.9139480590820312, 0.6131820678710938, 0.1831817626953125, 0.8812255859375, 0.9775238037109375, -0.13516998291015625, 0.10093116760253906, 1.3250503540039062, -0.1638641357421875, 0.355926513671875, 0.614501953125, 0.4335155487060547, 0.8181228637695312, 0.7964401245117188, -0.3724250793457031, 0.2644920349121094, 0.560516357421875, 0.752288818359375, -0.11136436462402344, 1.17559814453125, 2.991455078125, -0.2067718505859375, 0.493316650390625, 0.17224502563476562, 0.1274566650390625, 0.12610626220703125, 1.7411651611328125, 1.099365234375, 0.884429931640625, 1.038604736328125, 0.3768119812011719, 0.55340576171875, 0.5759372711181641, -0.09913825988769531, 0.8807373046875, 0.2929840087890625, 0.7023582458496094, 0.12822723388671875, 0.379150390625, 0.13332366943359375, 0.709808349609375, 0.37822723388671875, 0.8341865539550781, 1.2412872314453125, -0.0535125732421875, 1.6932754516601562, 0.5987815856933594, 0.4742164611816406, 1.7395858764648438, 0.93817138671875, -0.281341552734375, 0.9063949584960938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000029.npy"} +{"epoch": 0.042584434654919234, "step": 30, "batch_size": 64, "mean": 0.7079058289527893, "std": 0.6552006006240845, "min": -0.41600799560546875, "p10": -0.08536376953125, "median": 0.6320343017578125, "p90": 1.5072158813476564, "max": 3.364990234375, "pos_frac": 0.875, "sample": [0.6613540649414062, 0.5860214233398438, -0.09296226501464844, 0.9230728149414062, 1.7312469482421875, -0.29601287841796875, 0.09174346923828125, 0.4889068603515625, 0.12222862243652344, 0.7850494384765625, 0.9357376098632812, 1.1751327514648438, 0.7069473266601562, 0.9803199768066406, 1.479156494140625, 1.5400238037109375, 0.7650909423828125, 0.8964157104492188, -0.08731842041015625, 1.5192413330078125, 0.9484481811523438, 0.2890625, 3.364990234375, 0.6027145385742188, 0.9537506103515625, 0.5631027221679688, 0.545379638671875, 0.47145843505859375, -0.27263641357421875, 0.19425201416015625, 0.5032119750976562, 0.07378005981445312, 0.884429931640625, 0.21383094787597656, 0.15954971313476562, 0.4063873291015625, 1.414520263671875, 1.658233642578125, 1.2607574462890625, 0.7636394500732422, 0.1987152099609375, 1.37628173828125, 0.8248882293701172, 0.34645843505859375, -0.3920936584472656, 1.2135086059570312, 1.1814727783203125, 0.2867774963378906, 0.8070449829101562, 0.957550048828125, 1.4445037841796875, 1.7040023803710938, 0.4357337951660156, -0.08080291748046875, 0.026742935180664062, 1.2188262939453125, 0.5783882141113281, 0.015819549560546875, 0.5782890319824219, -0.09870338439941406, -0.41600799560546875, 1.8297119140625, 0.80023193359375, 0.5583744049072266], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000030.npy"} +{"epoch": 0.04405286343612335, "step": 31, "batch_size": 64, "mean": 0.492476224899292, "std": 0.6538965106010437, "min": -0.8985137939453125, "p10": -0.19391326904296874, "median": 0.3832082748413086, "p90": 1.2969055175781252, "max": 2.550933837890625, "pos_frac": 0.796875, "sample": [0.03210639953613281, 1.246917724609375, -0.049072265625, 0.765594482421875, 1.0937576293945312, 0.424591064453125, -0.198486328125, -0.31035614013671875, 0.4679985046386719, -0.5715179443359375, 2.550933837890625, -0.06760406494140625, 0.18831634521484375, 0.5584335327148438, 0.2774810791015625, 0.020040512084960938, 1.3154220581054688, -0.11706161499023438, 1.6423873901367188, 0.6742305755615234, 0.3929901123046875, 0.0730743408203125, 0.26781463623046875, 0.090911865234375, 0.1865997314453125, 0.2905254364013672, 0.9868888854980469, 0.46042633056640625, 0.6913032531738281, 0.099212646484375, 2.016357421875, 0.5092544555664062, 1.90411376953125, -0.362060546875, 0.3687477111816406, -0.3096923828125, 0.221923828125, 0.5246047973632812, -0.15528106689453125, 0.9146499633789062, 1.0303573608398438, 1.2537002563476562, -0.29877471923828125, 0.3734264373779297, -0.1832427978515625, -0.8985137939453125, 0.3673858642578125, 0.819976806640625, 0.1413421630859375, -0.03934669494628906, 0.491302490234375, 1.3676223754882812, 0.8547515869140625, 0.010101318359375, 0.05279541015625, 0.24622344970703125, 0.18230056762695312, 0.53582763671875, 0.5784988403320312, 0.5501174926757812, 0.8727874755859375, 1.9662246704101562, 1.196380615234375, 0.930755615234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000031.npy"} +{"epoch": 0.04552129221732746, "step": 32, "batch_size": 64, "mean": 0.7637800574302673, "std": 0.9055874943733215, "min": -1.0298004150390625, "p10": -0.16612014770507813, "median": 0.5863561630249023, "p90": 1.9382125854492191, "max": 4.1165771484375, "pos_frac": 0.84375, "sample": [0.571624755859375, 0.2105865478515625, 1.3085556030273438, 1.0286941528320312, 0.7469482421875, 1.018280029296875, -0.1695098876953125, 0.71099853515625, 0.4255790710449219, 1.0358123779296875, 1.3278961181640625, -0.45655059814453125, 0.0524749755859375, 0.11553573608398438, -1.0298004150390625, 1.4357376098632812, -0.42998504638671875, 1.0793037414550781, -0.3780670166015625, 2.661224365234375, 0.42897796630859375, 1.4805374145507812, 1.9758453369140625, 0.4795379638671875, 0.7602386474609375, 0.9092254638671875, 0.17176055908203125, 0.5377273559570312, -0.0128936767578125, 0.5675582885742188, 0.31037139892578125, 0.09965896606445312, 0.6010875701904297, 2.6692352294921875, 1.5270233154296875, 0.2898712158203125, 0.956756591796875, 0.7275428771972656, -0.12489700317382812, 0.4359245300292969, 2.4787750244140625, 0.99957275390625, -0.15821075439453125, 0.1635894775390625, 4.1165771484375, 1.218536376953125, 0.31813812255859375, 1.85040283203125, 2.790374755859375, -0.28269386291503906, 0.03200531005859375, 0.10419464111328125, 2.2996902465820312, 1.3449211120605469, -0.2529792785644531, 0.12990379333496094, 0.8092803955078125, 1.2014007568359375, 0.2222614288330078, 0.96807861328125, 0.19114303588867188, 0.7467422485351562, 1.1757659912109375, 0.35802459716796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000032.npy"} +{"epoch": 0.04698972099853157, "step": 33, "batch_size": 64, "mean": 0.5975916385650635, "std": 0.6344035267829895, "min": -0.6681900024414062, "p10": -0.17468910217285144, "median": 0.46619606018066406, "p90": 1.3964073181152343, "max": 2.4553565979003906, "pos_frac": 0.828125, "sample": [-0.3941001892089844, 0.4609565734863281, 0.6709785461425781, 0.368560791015625, 1.6577911376953125, -0.01065826416015625, -0.252716064453125, 0.2698974609375, 0.471435546875, 1.0429840087890625, 0.362884521484375, 1.38787841796875, 1.132598876953125, 0.9759368896484375, 1.064056396484375, 0.33119964599609375, 0.5120010375976562, -0.6681900024414062, 0.3195343017578125, -0.609954833984375, 0.25414276123046875, 1.245941162109375, 0.8640975952148438, 2.0506973266601562, 0.8238601684570312, 0.4588165283203125, 1.6281089782714844, 0.02960205078125, 0.1194305419921875, 1.2283935546875, -0.037403106689453125, -0.36666107177734375, 2.4553565979003906, 0.011121749877929688, -0.04669952392578125, 0.7975387573242188, 0.46063995361328125, 0.5451812744140625, 1.2674369812011719, 0.8716354370117188, 0.2017974853515625, 1.0537796020507812, 0.37824249267578125, 0.36540985107421875, 0.37165069580078125, 1.4000625610351562, 0.43708038330078125, 1.4958877563476562, 0.8171863555908203, -0.009380340576171875, 0.3582611083984375, -0.22954177856445312, 1.5256156921386719, 0.7006340026855469, 0.08355712890625, 0.7782440185546875, 1.191650390625, -0.6423416137695312, 0.42957305908203125, 0.611846923828125, 0.7012939453125, 0.393585205078125, 0.9938926696777344, 1.0835647583007812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000033.npy"} +{"epoch": 0.048458149779735685, "step": 34, "batch_size": 64, "mean": 0.8158169984817505, "std": 0.8475502133369446, "min": -0.570037841796875, "p10": -0.10389842987060544, "median": 0.5883846282958984, "p90": 2.1682266235351566, "max": 3.139617919921875, "pos_frac": 0.84375, "sample": [2.2894744873046875, -0.34139251708984375, 0.5287857055664062, 0.13504409790039062, 2.24273681640625, 0.3976917266845703, 1.7216033935546875, 1.5234832763671875, 0.2183685302734375, -0.29949951171875, 0.380126953125, -0.07632637023925781, 1.111114501953125, 0.4144287109375, 2.2198715209960938, 2.1872100830078125, 0.13805770874023438, -0.22551345825195312, 1.840972900390625, 0.46686553955078125, 1.0957412719726562, 0.06060028076171875, 0.9476871490478516, 1.8420753479003906, 1.16778564453125, 1.638214111328125, 1.018218994140625, -0.11571502685546875, 0.500335693359375, 0.9079666137695312, -0.4199066162109375, 0.604766845703125, 0.7183952331542969, 0.8590660095214844, 2.123931884765625, 0.8700942993164062, 0.8598556518554688, 3.139617919921875, 0.30150604248046875, 0.17729568481445312, 0.4556312561035156, 0.23285484313964844, 1.244110107421875, 1.3095932006835938, 0.8804550170898438, 1.1653518676757812, 0.11310195922851562, 0.013111114501953125, 2.2249298095703125, -0.035121917724609375, 0.3112144470214844, -0.570037841796875, 0.8693466186523438, 0.7063674926757812, 0.4048919677734375, 0.2136096954345703, 2.0475387573242188, -0.043941497802734375, -0.28910064697265625, 0.34104156494140625, 2.0918731689453125, 2.49334716796875, 0.28948211669921875, 0.5720024108886719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000034.npy"} +{"epoch": 0.049926578560939794, "step": 35, "batch_size": 64, "mean": 1.1769659519195557, "std": 1.1176694631576538, "min": -0.42818450927734375, "p10": 0.15845642089843756, "median": 0.9615554809570312, "p90": 2.318426513671875, "max": 6.7027587890625, "pos_frac": 0.921875, "sample": [1.1796035766601562, 0.4361305236816406, 2.03363037109375, 1.6453895568847656, 0.2536773681640625, 0.7318611145019531, 0.7703857421875, 0.1362457275390625, -0.125091552734375, 1.9095611572265625, 2.3313446044921875, 0.2102813720703125, 1.0933609008789062, 0.6141853332519531, 0.23268890380859375, 0.7049560546875, 2.0897369384765625, 3.4026107788085938, 0.5116806030273438, 0.2807121276855469, 0.7914371490478516, 1.8934783935546875, 2.9918365478515625, 0.309051513671875, -0.15047454833984375, 0.9729080200195312, -0.14479637145996094, 1.3530654907226562, 0.5731582641601562, 0.9502029418945312, 6.7027587890625, 1.3162994384765625, 0.4862060546875, 1.56744384765625, 0.45010948181152344, 2.93072509765625, 0.3933563232421875, 1.4089202880859375, 2.0173492431640625, 2.6710205078125, 0.24666213989257812, 2.83648681640625, 1.1510848999023438, 0.9969558715820312, 0.24260711669921875, -0.05158805847167969, 2.07550048828125, 2.2882843017578125, 1.7300872802734375, 0.57928466796875, 1.094614028930664, 0.26233673095703125, 0.7071113586425781, 1.9844512939453125, 1.2466278076171875, 0.8244571685791016, 1.246612548828125, 0.03357124328613281, 1.53485107421875, -0.42818450927734375, 0.7237300872802734, 1.8063201904296875, 1.7924652099609375, 0.4744873046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000035.npy"} +{"epoch": 0.0513950073421439, "step": 36, "batch_size": 64, "mean": 1.094736099243164, "std": 1.2846930027008057, "min": -2.7100677490234375, "p10": -0.19489250183105458, "median": 0.8944025039672852, "p90": 2.8227348327636723, "max": 4.15069580078125, "pos_frac": 0.796875, "sample": [1.4486923217773438, -0.07552337646484375, 0.8630847930908203, 0.8051300048828125, 1.3740463256835938, 0.7127742767333984, 1.54144287109375, -0.5506439208984375, -0.0069141387939453125, 2.852752685546875, 2.3326950073242188, -0.06914710998535156, -0.4393196105957031, 0.4741344451904297, 0.0074863433837890625, 1.33428955078125, 0.07972145080566406, 1.3417205810546875, 1.24639892578125, 0.04227447509765625, -0.2370147705078125, -0.6694736480712891, -0.05329132080078125, 0.1721954345703125, 3.644439697265625, 4.047554016113281, 1.4942741394042969, -0.30400848388671875, 3.08416748046875, 0.7273330688476562, 0.7232646942138672, 1.1945724487304688, -0.09660720825195312, 3.4634170532226562, 0.29773712158203125, -0.06018829345703125, 2.20465087890625, 0.34783172607421875, 1.9979171752929688, 0.7754173278808594, 2.3607254028320312, 1.2041816711425781, 2.259246826171875, 0.9783840179443359, 2.352874755859375, 1.1391677856445312, 0.5956916809082031, 4.15069580078125, 0.7542209625244141, -2.7100677490234375, 2.7246551513671875, 0.061248779296875, 1.5126609802246094, 0.92572021484375, 2.7526931762695312, 1.3528518676757812, -0.2884674072265625, 0.4950828552246094, 2.1005859375, 0.4364356994628906, 1.6637420654296875, 3.7472991943359375, 1.0804290771484375, 0.3437690734863281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000036.npy"} +{"epoch": 0.05286343612334802, "step": 37, "batch_size": 64, "mean": 1.0146114826202393, "std": 1.3858215808868408, "min": -2.61865234375, "p10": -0.2255735397338866, "median": 0.7293777465820312, "p90": 2.7518539428710946, "max": 6.3740234375, "pos_frac": 0.796875, "sample": [1.6860122680664062, -0.013946533203125, 1.3075485229492188, 0.6056442260742188, 0.15875244140625, -0.0401611328125, -0.40863037109375, 0.6845779418945312, 1.3842544555664062, -0.076873779296875, 0.9497528076171875, 3.047760009765625, 0.24517822265625, 0.272491455078125, 0.208587646484375, 0.38120269775390625, 0.8408775329589844, 0.11485862731933594, 1.868703842163086, 6.3740234375, 0.7741775512695312, 4.427337646484375, 1.4219131469726562, 0.9518394470214844, 1.4612350463867188, 1.2358169555664062, 0.9848709106445312, 0.17833709716796875, 1.5712738037109375, 1.0789012908935547, 0.4520683288574219, -0.06163787841796875, 0.10485267639160156, -1.0080337524414062, 0.5343704223632812, 0.2805442810058594, 0.27812957763671875, 2.190765380859375, 2.5041885375976562, 2.8578033447265625, -0.12848854064941406, 3.0472564697265625, 0.42543792724609375, 1.498260498046875, -0.5338211059570312, 0.8113918304443359, 0.016271591186523438, -0.27880859375, 0.79241943359375, 1.7046737670898438, 2.3525466918945312, -0.267181396484375, 2.4035110473632812, 2.504638671875, 0.3589973449707031, 3.3080902099609375, 1.6216964721679688, -2.61865234375, 0.4853973388671875, -0.431121826171875, 2.3571510314941406, 3.6165771484375, -0.09886360168457031, 0.17838668823242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000037.npy"} +{"epoch": 0.05433186490455213, "step": 38, "batch_size": 64, "mean": 1.344200849533081, "std": 1.6346410512924194, "min": -1.7084808349609375, "p10": -0.054677581787109325, "median": 0.8669166564941406, "p90": 3.2927429199218756, "max": 7.29541015625, "pos_frac": 0.875, "sample": [0.09067535400390625, 7.29541015625, 4.1405029296875, 1.4813518524169922, 0.23117446899414062, 0.04170799255371094, 1.4351882934570312, -0.40483856201171875, 1.2051239013671875, 2.57257080078125, 1.471771240234375, 0.8087615966796875, 0.1119384765625, 2.7542724609375, 1.174346923828125, -0.3412132263183594, 0.5314788818359375, 0.27643585205078125, -0.00142669677734375, 2.2769393920898438, 0.8171844482421875, 2.0788421630859375, 2.5112152099609375, 0.1966400146484375, 3.133209228515625, 0.11271286010742188, 1.5365371704101562, 2.7201385498046875, 1.7891502380371094, 0.3577079772949219, 0.562774658203125, 3.361114501953125, -1.7084808349609375, 0.9166488647460938, 0.6838169097900391, 0.5190811157226562, 1.22900390625, 5.6529693603515625, 0.6211433410644531, 0.39383697509765625, 3.3665695190429688, 5.878448486328125, 1.7239990234375, 0.6677398681640625, 0.491790771484375, -0.875701904296875, 0.18028640747070312, 0.19033432006835938, 0.8056221008300781, 1.350677490234375, -0.0774993896484375, 0.46661376953125, -0.3891735076904297, 1.8696937561035156, 1.3590259552001953, 2.24560546875, 1.30908203125, 5.151222229003906, 1.241546630859375, 0.19795989990234375, 1.8208541870117188, -0.4308032989501953, 0.7528228759765625, 2.0947227478027344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000038.npy"} +{"epoch": 0.055800293685756244, "step": 39, "batch_size": 64, "mean": 1.5921775102615356, "std": 1.3956255912780762, "min": -2.6716461181640625, "p10": 0.23551158905029304, "median": 1.2817916870117188, "p90": 3.6658027648925784, "max": 4.9209747314453125, "pos_frac": 0.921875, "sample": [2.2532119750976562, 2.0909271240234375, 4.9209747314453125, 2.2091140747070312, 1.242462158203125, -0.6747360229492188, 2.9158706665039062, 2.438648223876953, 1.393239974975586, 2.814115524291992, 0.787445068359375, 3.508026123046875, 1.6200714111328125, 1.169149398803711, 3.873882293701172, 1.0582046508789062, 0.505889892578125, 0.3256072998046875, -0.0616455078125, 0.782073974609375, 0.20563507080078125, 1.8240509033203125, 2.3686141967773438, 0.9167976379394531, 1.0498847961425781, 2.5280838012695312, -0.07294464111328125, 1.513803482055664, 4.27587890625, 0.4831695556640625, 0.5949859619140625, 0.3052234649658203, 2.9181671142578125, 1.9554061889648438, -2.6716461181640625, 1.9434432983398438, 0.3068504333496094, 1.5413227081298828, 3.744874954223633, 4.1374664306640625, 1.0471763610839844, 2.4693603515625, 4.187339782714844, 0.5455474853515625, 1.3211212158203125, 0.7453231811523438, -0.21112060546875, 0.5110740661621094, 1.23193359375, 0.9262008666992188, 0.9416732788085938, 3.6057052612304688, 2.6007766723632812, 2.8060302734375, 0.4246559143066406, 3.691558837890625, 2.9618988037109375, 0.9895992279052734, 2.0414810180664062, 0.35285377502441406, 0.7514209747314453, 0.1039581298828125, 0.4931468963623047, 2.3190441131591797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000039.npy"} +{"epoch": 0.05726872246696035, "step": 40, "batch_size": 64, "mean": 1.5351676940917969, "std": 1.656908631324768, "min": -1.090057373046875, "p10": -0.21006011962890622, "median": 1.1917686462402344, "p90": 3.84886932373047, "max": 7.1263427734375, "pos_frac": 0.859375, "sample": [-0.68304443359375, 0.233123779296875, 0.6062393188476562, 0.8682975769042969, 1.5935821533203125, 0.7240753173828125, 0.4557647705078125, 0.44641876220703125, 0.23168182373046875, 2.001401901245117, 6.260101318359375, 3.567474365234375, 0.9337997436523438, 3.384918212890625, 1.879974365234375, -0.6501274108886719, 2.1688079833984375, 2.687530517578125, 0.1897430419921875, -1.090057373046875, 2.9409332275390625, -0.2541484832763672, 1.25225830078125, -0.220245361328125, 2.1590347290039062, 1.88836669921875, 0.039707183837890625, 1.1799850463867188, 0.2627410888671875, -0.1862945556640625, 4.100311279296875, -0.24281692504882812, 3.3308029174804688, 0.1363677978515625, 0.3930339813232422, 4.3996734619140625, 0.1065826416015625, 3.9694671630859375, 0.9090347290039062, 1.2267341613769531, 4.762664794921875, 1.0427932739257812, 0.8771591186523438, 7.1263427734375, -0.376251220703125, 1.9193954467773438, -0.09644317626953125, 2.03680419921875, 2.84393310546875, 2.087696075439453, 1.429229736328125, 2.3803024291992188, 3.3391342163085938, 1.3389148712158203, 1.1044464111328125, 1.20355224609375, 4.4152374267578125, 0.43686676025390625, 2.0682144165039062, 0.4227714538574219, 0.35817718505859375, 1.3747482299804688, 2.0938720703125, 0.8599357604980469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000040.npy"} +{"epoch": 0.05873715124816446, "step": 41, "batch_size": 64, "mean": 1.8554792404174805, "std": 2.001645088195801, "min": -3.066680908203125, "p10": -0.02576808929443339, "median": 1.3504142761230469, "p90": 4.417221069335938, "max": 8.768112182617188, "pos_frac": 0.890625, "sample": [0.865814208984375, 0.8433380126953125, 3.5187530517578125, 1.6478958129882812, 0.571807861328125, 2.2460098266601562, 3.8543853759765625, 2.128826141357422, -0.959075927734375, 3.1564865112304688, 5.6116485595703125, 1.2997589111328125, 2.4298858642578125, 8.768112182617188, 0.1785717010498047, 0.428131103515625, 0.8908233642578125, 1.2938156127929688, 0.3207893371582031, -1.64703369140625, 1.3644485473632812, 3.122180938720703, 4.8681793212890625, 1.0261554718017578, 6.58343505859375, 1.6158447265625, 4.4842376708984375, 4.8438720703125, 0.6357841491699219, 0.26424407958984375, 2.531524658203125, 0.653228759765625, 6.764434814453125, 0.6491641998291016, 1.2193470001220703, 3.7293701171875, 2.1214675903320312, 2.1517791748046875, -0.3547821044921875, 2.5264053344726562, 4.028953552246094, 1.5901165008544922, -3.066680908203125, 2.6518707275390625, 0.5278701782226562, 1.20977783203125, 1.6205596923828125, 1.7341651916503906, 2.4970550537109375, 0.8030643463134766, 1.409423828125, 0.874755859375, 1.11724853515625, 1.3363800048828125, 0.8642444610595703, 0.9762172698974609, -0.11334228515625, 3.4408721923828125, 1.0046920776367188, 0.45551109313964844, -0.5531463623046875, -0.21187591552734375, 2.0430259704589844, 4.2608489990234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000041.npy"} +{"epoch": 0.06020558002936858, "step": 42, "batch_size": 64, "mean": 2.4679267406463623, "std": 2.316767692565918, "min": -1.4112701416015625, "p10": 0.06012077331542984, "median": 1.8595638275146484, "p90": 5.852120208740236, "max": 10.449981689453125, "pos_frac": 0.890625, "sample": [1.7782058715820312, -0.2077484130859375, 2.4191226959228516, 8.935623168945312, 10.449981689453125, 2.4389419555664062, 0.284881591796875, 6.110107421875, 1.8187103271484375, 2.465068817138672, 1.8514480590820312, 5.978858947753906, 2.4932861328125, 7.089263916015625, 0.9021492004394531, 1.6646194458007812, 2.56158447265625, -0.00433349609375, 2.2998428344726562, 3.2196578979492188, 1.6710739135742188, 1.404672622680664, 3.8101577758789062, 0.59735107421875, 1.1770057678222656, 3.1149635314941406, -0.8181610107421875, 3.009033203125, 1.6869735717773438, -0.5399169921875, 2.273967742919922, 5.35772705078125, 0.38789939880371094, 5.556396484375, 2.2232666015625, 1.8650360107421875, 0.9240322113037109, 1.4047679901123047, 1.946533203125, -0.8748016357421875, 3.56719970703125, -0.03021240234375, 1.8540916442871094, 6.3135833740234375, 1.5797157287597656, 4.1817779541015625, 0.9949760437011719, 0.3075294494628906, 0.8494663238525391, 1.7783355712890625, 0.9699478149414062, 6.195892333984375, -1.4112701416015625, 0.7137508392333984, 2.1836013793945312, 2.9486656188964844, 0.21051406860351562, 1.53387451171875, 4.904197692871094, 2.9354400634765625, 4.8314208984375, 4.9960479736328125, 0.7461719512939453, 4.065338134765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000042.npy"} +{"epoch": 0.06167400881057269, "step": 43, "batch_size": 64, "mean": 2.342538356781006, "std": 1.8794914484024048, "min": -1.098297119140625, "p10": 0.20966148376464847, "median": 2.1711063385009766, "p90": 4.45673065185547, "max": 10.042739868164062, "pos_frac": 0.96875, "sample": [-0.3757209777832031, 0.0064697265625, 0.1991558074951172, 3.1932449340820312, 3.5842361450195312, 1.550811767578125, 3.4396705627441406, 2.2703399658203125, 2.534219741821289, 6.2111968994140625, 1.07061767578125, 1.7060432434082031, 0.69158935546875, 1.991546630859375, 2.743968963623047, 0.9206619262695312, 3.18023681640625, 5.205848693847656, 1.7826004028320312, 0.15467453002929688, 6.055328369140625, 3.2957077026367188, 5.058494567871094, 1.0917205810546875, 2.778148651123047, 2.1277236938476562, 0.5776481628417969, 2.349773406982422, 0.05035972595214844, 3.99285888671875, 1.5546646118164062, 2.5247802734375, 3.891143798828125, 3.399688720703125, 2.58343505859375, 3.799774169921875, 0.5721282958984375, 2.2508697509765625, 1.7410697937011719, 0.3658599853515625, 2.2133102416992188, 1.2766342163085938, 6.016754150390625, 3.3783645629882812, 1.4598617553710938, -1.098297119140625, 1.085855484008789, 2.751373291015625, 1.52947998046875, 3.3118133544921875, 1.9890861511230469, 3.6936416625976562, 2.1289024353027344, 0.4359931945800781, 0.8514633178710938, 10.042739868164062, 3.541717529296875, 1.629659652709961, 0.023042678833007812, 0.6433944702148438, 3.1348876953125, 4.6555328369140625, 2.8704833984375, 0.2341747283935547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000043.npy"} +{"epoch": 0.0631424375917768, "step": 44, "batch_size": 64, "mean": 2.71984601020813, "std": 2.155487537384033, "min": -0.7050457000732422, "p10": 0.37867565155029304, "median": 2.30275821685791, "p90": 5.687815856933594, "max": 9.21075439453125, "pos_frac": 0.953125, "sample": [3.7109909057617188, 2.492706298828125, 1.8139820098876953, 0.5608272552490234, 6.735160827636719, 2.4648818969726562, 2.7541961669921875, 1.0667037963867188, 2.093181610107422, 5.1909332275390625, 6.662017822265625, 0.6707496643066406, 4.186927795410156, 5.24114990234375, 4.78680419921875, 3.2060012817382812, 0.9451103210449219, 1.788726806640625, 2.2134876251220703, 5.6090850830078125, 0.3241119384765625, 2.789215087890625, 2.7850799560546875, 7.8972930908203125, 1.597747802734375, 3.0890350341796875, 0.446136474609375, 1.5945281982421875, 4.722007751464844, 1.9735794067382812, 2.8925552368164062, -0.27831077575683594, 1.6798954010009766, 1.3552284240722656, 1.7098464965820312, 3.0664749145507812, 1.0781478881835938, 1.593597412109375, 5.7215576171875, 2.39202880859375, 0.7303924560546875, 2.148040771484375, 4.088592529296875, 3.2839508056640625, 1.174530029296875, 3.5687026977539062, 2.56414794921875, 2.5831298828125, 0.5827541351318359, 0.29680442810058594, 6.2164154052734375, 4.284332275390625, 1.9590606689453125, 3.232025146484375, 1.295064926147461, 9.21075439453125, 0.16848373413085938, -0.42462158203125, 3.625030517578125, 1.5662040710449219, -0.7050457000732422, 0.3497638702392578, 1.3307151794433594, 8.317543029785156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000044.npy"} +{"epoch": 0.06461086637298091, "step": 45, "batch_size": 64, "mean": 2.208528518676758, "std": 2.53650164604187, "min": -1.0955734252929688, "p10": -0.1773731231689452, "median": 1.5621986389160156, "p90": 5.6249755859375, "max": 13.0001220703125, "pos_frac": 0.859375, "sample": [3.642730712890625, 1.501617431640625, 0.7557830810546875, 3.1797561645507812, 0.16960525512695312, 1.8144073486328125, 3.5316238403320312, 5.930419921875, 5.6456298828125, 2.1600189208984375, 0.9093093872070312, 7.428199768066406, 3.222074508666992, 0.08140182495117188, -0.060733795166015625, 1.6033248901367188, -0.0621795654296875, 1.8177299499511719, 0.04630470275878906, 5.5767822265625, 0.12868499755859375, 0.23797988891601562, 2.940826416015625, 0.8514137268066406, 0.8080062866210938, 5.257246017456055, 6.9957275390625, 3.6768112182617188, 0.13054275512695312, 0.48044776916503906, 4.6272125244140625, 2.50677490234375, 5.5270538330078125, 0.03694915771484375, -0.32305145263671875, 1.5210723876953125, 5.8154449462890625, 3.488109588623047, -0.22674179077148438, 1.6270904541015625, 0.9040336608886719, 0.230987548828125, -1.0955734252929688, -0.44844818115234375, 1.274169921875, 13.0001220703125, 1.2187652587890625, 6.458229064941406, 2.6542282104492188, -0.6807403564453125, 2.214689254760742, 1.3556289672851562, 1.8692646026611328, 0.33403968811035156, 0.9720268249511719, 5.4557342529296875, 0.8685760498046875, 1.993988037109375, 2.3193893432617188, 2.2345428466796875, -0.3471641540527344, 3.955097198486328, 0.6522464752197266, -1.0494117736816406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000045.npy"} +{"epoch": 0.06607929515418502, "step": 46, "batch_size": 64, "mean": 2.753678798675537, "std": 2.966536283493042, "min": -2.3441009521484375, "p10": -0.17053642272949218, "median": 2.0067310333251953, "p90": 6.049882888793945, "max": 13.966522216796875, "pos_frac": 0.875, "sample": [0.40608787536621094, -0.2232666015625, 5.242279052734375, 6.939697265625, 3.7248306274414062, -2.3441009521484375, 1.7165565490722656, 1.7524852752685547, 5.8242950439453125, 3.5635128021240234, 6.037906646728516, 1.406982421875, 0.2205352783203125, 2.5891857147216797, 5.26153564453125, 0.0846405029296875, -0.6981658935546875, 0.317962646484375, 4.389305114746094, 9.746429443359375, -0.38933563232421875, 0.31103515625, 0.4578742980957031, 4.0304412841796875, 0.2629890441894531, 4.493255615234375, 4.1727294921875, 1.0541725158691406, 3.7199440002441406, 2.254039764404297, 4.043548583984375, 0.6693286895751953, -2.177734375, 0.8164863586425781, 2.3774871826171875, 5.508083343505859, 5.217655181884766, -0.17590713500976562, 7.8423004150390625, -0.1580047607421875, 0.2981452941894531, 3.5992279052734375, 5.148712158203125, 0.45896148681640625, 13.966522216796875, 0.314483642578125, 6.9065093994140625, 5.628715515136719, 0.7417640686035156, 2.1596832275390625, 1.2835426330566406, 7.9680023193359375, 2.9505462646484375, 0.20934295654296875, 1.8537788391113281, 6.055015563964844, 1.1712417602539062, 2.414287567138672, 3.711395263671875, 5.655128479003906, 1.4641380310058594, 0.8498897552490234, 1.4539222717285156, -0.31658935546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000046.npy"} +{"epoch": 0.06754772393538913, "step": 47, "batch_size": 64, "mean": 2.8084940910339355, "std": 2.5952467918395996, "min": -2.314098358154297, "p10": 0.1304063796997071, "median": 2.227280616760254, "p90": 6.219530487060547, "max": 10.015106201171875, "pos_frac": 0.90625, "sample": [1.4976348876953125, 2.8017959594726562, 0.7344512939453125, 1.0090999603271484, 5.144756317138672, 1.3113059997558594, 1.4425506591796875, 1.6918411254882812, 6.684013366699219, 3.050748825073242, -0.19408416748046875, 6.431938171386719, 6.270751953125, 1.4975204467773438, 3.3701553344726562, 2.382457733154297, 0.16996002197265625, 1.5924148559570312, 1.677337646484375, 5.361991882324219, 6.100013732910156, 1.3227691650390625, 0.8214511871337891, 2.948272705078125, 3.3046798706054688, 1.1883659362792969, -0.5001602172851562, 5.9267730712890625, 6.738189697265625, 2.396047592163086, -0.35321998596191406, 3.2825145721435547, 5.808036804199219, 4.3163604736328125, -2.314098358154297, 0.11345481872558594, 0.2802886962890625, 1.0931167602539062, 1.116485595703125, 9.87750244140625, 5.8236236572265625, 2.1422157287597656, 1.1955242156982422, 2.794677734375, 8.660232543945312, 5.55548095703125, -0.7502517700195312, 3.2030868530273438, 10.015106201171875, -1.09423828125, 1.1379013061523438, 1.8871307373046875, 0.1934051513671875, 1.3147335052490234, 2.9491729736328125, 0.7071342468261719, 5.778228759765625, 1.817352294921875, 5.1494903564453125, 1.646484375, 3.690643310546875, 3.85064697265625, 2.3680152893066406, 2.312345504760742], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000047.npy"} +{"epoch": 0.06901615271659324, "step": 48, "batch_size": 64, "mean": 2.720108985900879, "std": 2.682295799255371, "min": -6.2974395751953125, "p10": 0.01788673400878909, "median": 2.2589492797851562, "p90": 6.938107299804687, "max": 9.83245849609375, "pos_frac": 0.90625, "sample": [3.7593536376953125, 0.6459312438964844, 6.875579833984375, 6.96490478515625, 5.780731201171875, 3.1298751831054688, -1.1116485595703125, 2.3195724487304688, 5.0488128662109375, 3.8452987670898438, 1.5587615966796875, 5.140708923339844, 2.0462493896484375, 0.007354736328125, -0.14864349365234375, 1.3589439392089844, 7.07867431640625, 0.06800270080566406, 1.3714218139648438, -0.35608673095703125, 1.1412239074707031, 7.655754089355469, -0.5316276550292969, 2.1919403076171875, 5.765281677246094, 1.4915618896484375, 2.3039169311523438, 3.0126285552978516, 0.042461395263671875, 3.1278152465820312, 1.8955726623535156, 1.6675586700439453, 2.8143157958984375, 1.8392658233642578, 7.191314697265625, 3.9081897735595703, 4.108253479003906, 8.627822875976562, 3.5085296630859375, 0.8557052612304688, 1.1421775817871094, 1.4503116607666016, 1.4158935546875, 2.465038299560547, 1.188140869140625, 3.8537464141845703, 2.4843597412109375, 1.8206329345703125, -1.31243896484375, 7.660484313964844, 2.45208740234375, 2.8612194061279297, 2.109283447265625, 9.83245849609375, 4.8221282958984375, -6.2974395751953125, 5.086376190185547, 1.5745086669921875, 0.7411994934082031, 0.475006103515625, 3.624053955078125, 2.0584030151367188, 2.364093780517578, 2.2139816284179688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000048.npy"} +{"epoch": 0.07048458149779736, "step": 49, "batch_size": 64, "mean": 3.2672815322875977, "std": 3.4105348587036133, "min": -3.291961669921875, "p10": 0.048761177062988326, "median": 2.7687034606933594, "p90": 7.769887542724612, "max": 14.187881469726562, "pos_frac": 0.90625, "sample": [3.061359405517578, 0.5331039428710938, 0.35956382751464844, 2.4762039184570312, 2.2668399810791016, 5.526363372802734, 0.8236236572265625, 2.415384292602539, -1.0635986328125, 0.787200927734375, 0.08919334411621094, 1.8609161376953125, 2.6810760498046875, 8.257591247558594, 7.280525207519531, 2.0377960205078125, 3.704692840576172, 1.7175464630126953, 2.8563308715820312, 1.1740188598632812, 1.8127365112304688, -2.9268970489501953, 3.942413330078125, 0.11493110656738281, 11.3323974609375, 9.07684326171875, 1.13873291015625, 5.346580505371094, 4.1695709228515625, 0.7188701629638672, 6.6278228759765625, 14.187881469726562, 3.6599807739257812, 0.822235107421875, -0.7567138671875, -3.291961669921875, 3.0712814331054688, 3.5664215087890625, 3.7712020874023438, 4.057548522949219, 2.6110572814941406, 0.03143310546875, -0.503692626953125, 3.8523025512695312, 4.999977111816406, 12.69439697265625, 2.5288772583007812, 2.0280914306640625, 7.9796142578125, 5.737579345703125, 5.766941070556641, 3.0269813537597656, 3.5719337463378906, 4.695518493652344, 11.772003173828125, 0.18674659729003906, -0.9648895263671875, 2.8778533935546875, 1.7580833435058594, 3.6411819458007812, 4.0842132568359375, 2.207988739013672, 4.065155029296875, 1.1690673828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000049.npy"} +{"epoch": 0.07195301027900147, "step": 50, "batch_size": 64, "mean": 3.5516457557678223, "std": 3.4186770915985107, "min": -2.6736907958984375, "p10": -0.5345260620117187, "median": 2.7652015686035156, "p90": 8.545244598388674, "max": 13.205413818359375, "pos_frac": 0.828125, "sample": [5.324737548828125, -0.0602874755859375, -1.230621337890625, 8.859371185302734, 0.7726936340332031, 1.7912368774414062, 6.938873291015625, 1.8265724182128906, 2.6888389587402344, 1.7584123611450195, -0.5196151733398438, 6.3728790283203125, 2.9356231689453125, 12.702041625976562, 0.3504180908203125, 2.407684326171875, 6.6142120361328125, 2.9626731872558594, 8.70587158203125, 3.5991058349609375, 0.9705066680908203, 5.0768585205078125, 8.170448303222656, 2.1046981811523438, 1.8286399841308594, 4.410486221313477, -0.26572418212890625, -0.5409164428710938, 2.2174949645996094, 9.036117553710938, 5.90325927734375, 4.879451751708984, 9.077255249023438, 13.205413818359375, -2.6736907958984375, 0.652191162109375, 0.9032135009765625, 2.1703033447265625, -0.07048797607421875, 3.70196533203125, 2.47637939453125, 1.2619190216064453, 5.818965911865234, 3.9515151977539062, 6.353031158447266, 6.48406982421875, -0.6119384765625, 1.7945518493652344, 5.217506408691406, -0.5818023681640625, 5.590667724609375, 2.841564178466797, 4.383033752441406, 5.029426574707031, 2.6112327575683594, 5.590766906738281, 6.798408508300781, 2.4178619384765625, 9.373298645019531, -0.9618301391601562, 5.847389221191406, 1.6566238403320312, -1.716400146484375, 0.120880126953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000050.npy"} +{"epoch": 0.07342143906020558, "step": 51, "batch_size": 64, "mean": 3.5724036693573, "std": 4.458898067474365, "min": -5.367889404296875, "p10": -0.28134651184082027, "median": 2.4499435424804688, "p90": 9.279871368408203, "max": 17.663986206054688, "pos_frac": 0.828125, "sample": [8.98614501953125, 3.723236083984375, 0.8275508880615234, -0.22011947631835938, 0.7900161743164062, 0.31085205078125, 8.512184143066406, -0.7489814758300781, 1.7327499389648438, 0.4948883056640625, 10.198211669921875, 7.169902801513672, 1.8056831359863281, 3.9162673950195312, 2.165803909301758, 4.8417510986328125, 2.982837677001953, -0.7646541595458984, 8.5361328125, 5.068553924560547, 3.8701438903808594, -0.1195526123046875, -0.13894271850585938, 3.430461883544922, 1.8848419189453125, 7.347873687744141, 0.3943023681640625, 4.998847961425781, 0.7295722961425781, 4.332496643066406, 17.663986206054688, 2.5602951049804688, 0.6959075927734375, 5.2451171875, 2.968332290649414, 13.0499267578125, 1.434844970703125, 13.100814819335938, 5.11627197265625, 1.7819099426269531, 2.375885009765625, 0.9989814758300781, 3.2911529541015625, 9.405754089355469, 1.1664962768554688, 0.09983062744140625, 4.6843109130859375, -3.8662872314453125, -0.307586669921875, 16.278640747070312, 13.229293823242188, 6.122871398925781, 6.653621673583984, -0.8066482543945312, 1.4365043640136719, 5.605552673339844, 0.06493377685546875, 0.7290267944335938, 2.005645751953125, -5.367889404296875, 2.5240020751953125, -1.1453094482421875, 2.818195343017578, -0.039608001708984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000051.npy"} +{"epoch": 0.07488986784140969, "step": 52, "batch_size": 64, "mean": 5.411087989807129, "std": 4.423210620880127, "min": -2.4843597412109375, "p10": 0.6714458465576173, "median": 4.760313034057617, "p90": 11.151004028320314, "max": 19.1807861328125, "pos_frac": 0.9375, "sample": [1.4709701538085938, 10.752174377441406, 4.098533630371094, 11.1961669921875, 7.149314880371094, 8.209945678710938, 3.052276611328125, 6.502227783203125, 8.13458251953125, 12.117050170898438, 0.03101348876953125, 11.455886840820312, 4.8309783935546875, 5.005653381347656, 8.64564323425293, 7.610250473022461, 2.531879425048828, 4.9181976318359375, 15.269485473632812, 9.26434326171875, 3.1796398162841797, 0.6301498413085938, 11.045623779296875, 2.741016387939453, 0.044933319091796875, 6.729957580566406, 2.360321044921875, 1.8827590942382812, 7.953386306762695, 19.1807861328125, 8.867889404296875, 2.0815200805664062, 8.888664245605469, 4.689647674560547, 3.7131195068359375, 12.473556518554688, 4.04852294921875, -0.6497955322265625, 4.193572998046875, -2.4843597412109375, -2.0307464599609375, 1.9537277221679688, 6.55573844909668, 5.507743835449219, 1.3355598449707031, 8.824005126953125, 3.9224853515625, 10.144622802734375, 6.7519683837890625, 1.6406116485595703, 0.7678031921386719, 2.703094482421875, 3.2714004516601562, 5.061363220214844, 3.05010986328125, 8.991798400878906, 5.662750244140625, 1.5444107055664062, 15.90618896484375, -1.4978866577148438, 5.502738952636719, 3.6881675720214844, 2.3110694885253906, 0.9234333038330078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000052.npy"} +{"epoch": 0.0763582966226138, "step": 53, "batch_size": 64, "mean": 5.484249114990234, "std": 5.811614036560059, "min": -2.352142333984375, "p10": 0.47873916625976565, "median": 3.8004379272460938, "p90": 13.809008789062503, "max": 26.019927978515625, "pos_frac": 0.921875, "sample": [-1.4273433685302734, 3.27252197265625, 3.9656143188476562, 2.420166015625, 0.8875503540039062, 3.6748085021972656, 12.339614868164062, 0.19185638427734375, 7.262626647949219, -1.4532623291015625, 1.85955810546875, 25.00970458984375, -2.352142333984375, 1.1058273315429688, 5.780357360839844, 7.105533599853516, 2.2113189697265625, 9.91180419921875, 2.7956619262695312, 10.590629577636719, 3.788066864013672, 5.6917877197265625, 11.827133178710938, 6.554847717285156, 14.027252197265625, 0.5918960571289062, 4.7135772705078125, 0.47104644775390625, 5.5041656494140625, 1.7671642303466797, 15.37982177734375, 7.7661590576171875, 6.291431427001953, 8.006172180175781, 3.8128089904785156, 0.4966888427734375, 0.7252769470214844, 3.9423789978027344, 1.2129936218261719, 4.026679992675781, 2.8493118286132812, 6.967519760131836, 1.8590927124023438, 13.299774169921875, -2.0651931762695312, 15.057098388671875, 16.906341552734375, 3.1031265258789062, 4.0637359619140625, 2.961627960205078, 1.7830657958984375, 3.916400909423828, 2.52435302734375, 1.77301025390625, 3.7868576049804688, 4.076770782470703, 2.9311981201171875, 26.019927978515625, 2.5110397338867188, 13.1112060546875, 4.6240081787109375, 3.2169418334960938, 14.584831237792969, -0.6198463439941406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000053.npy"} +{"epoch": 0.07782672540381791, "step": 54, "batch_size": 64, "mean": 4.783502578735352, "std": 4.476994514465332, "min": -0.97015380859375, "p10": 0.5653587341308594, "median": 3.8231658935546875, "p90": 10.56023941040039, "max": 20.3184814453125, "pos_frac": 0.921875, "sample": [7.164070129394531, 4.217742919921875, 7.493171691894531, 0.7285346984863281, 0.9668502807617188, 2.4121856689453125, 1.177642822265625, 2.475862503051758, 5.447265625, 7.287700653076172, 0.8489246368408203, 5.186004638671875, 10.610641479492188, 2.285341262817383, 3.6014251708984375, 10.172882080078125, -0.07115554809570312, 4.364337921142578, 4.732706069946289, 5.235176086425781, -0.014942169189453125, 1.6258010864257812, 14.520408630371094, 0.594940185546875, 7.219917297363281, 0.472259521484375, 4.169559478759766, 4.406303405761719, 5.5215606689453125, 5.675178527832031, 1.3898601531982422, 0.5526809692382812, 7.126472473144531, -0.15752792358398438, 0.8302974700927734, 3.136371612548828, 11.967903137207031, 3.2358970642089844, 5.024085998535156, 12.652740478515625, 1.9077701568603516, 2.5778961181640625, 20.3184814453125, 10.442634582519531, -0.97015380859375, 2.597240447998047, -0.020965576171875, 11.736907958984375, 0.6072711944580078, 4.0449066162109375, 1.3527908325195312, 4.195613861083984, 9.81988525390625, 1.0833663940429688, 2.28338623046875, 16.652374267578125, 0.8555660247802734, 4.110101699829102, 2.697052001953125, 9.575332641601562, 2.3955631256103516, 10.374053955078125, 1.2194194793701172, 10.000572204589844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000054.npy"} +{"epoch": 0.07929515418502203, "step": 55, "batch_size": 64, "mean": 5.739418983459473, "std": 6.457626819610596, "min": -9.5828857421875, "p10": -1.1265609741210936, "median": 5.1821746826171875, "p90": 14.347319793701171, "max": 25.52313232421875, "pos_frac": 0.828125, "sample": [7.91351318359375, 9.039871215820312, 4.770195007324219, -3.070587158203125, 22.28424072265625, -2.3770980834960938, 6.729541778564453, 4.156253814697266, 14.883338928222656, -0.38945770263671875, 5.943992614746094, 5.475894927978516, 1.9337081909179688, 9.757278442382812, 17.430831909179688, 8.569992065429688, 4.184661865234375, 8.759857177734375, 25.52313232421875, 3.9289817810058594, 11.23297119140625, -1.3271484375, 4.222129821777344, 7.405891418457031, 21.137741088867188, 0.26033592224121094, 14.308349609375, 0.28499603271484375, 0.4656658172607422, 7.336353302001953, 3.857391357421875, -1.6635284423828125, 5.788948059082031, 0.22574806213378906, 10.841033935546875, 3.605010986328125, -1.0082511901855469, 0.5089569091796875, 0.24942970275878906, -4.2782135009765625, 8.52878189086914, 3.0333786010742188, 1.927154541015625, 13.700439453125, 6.665218353271484, -1.1772651672363281, 0.8115310668945312, 9.667327880859375, 5.450159072875977, -9.5828857421875, 2.3686161041259766, 14.364021301269531, 5.689056396484375, 3.5127792358398438, 7.100055694580078, 5.1398773193359375, -0.5732002258300781, 8.1822509765625, 7.228240966796875, 12.413406372070312, 5.2244720458984375, -0.3330726623535156, 4.42266845703125, 14.657852172851562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000055.npy"} +{"epoch": 0.08076358296622614, "step": 56, "batch_size": 64, "mean": 5.461906433105469, "std": 6.40669584274292, "min": -8.48746109008789, "p10": -1.4576095581054687, "median": 5.224672317504883, "p90": 13.871589660644537, "max": 24.472976684570312, "pos_frac": 0.828125, "sample": [-1.3777999877929688, 6.9513092041015625, -2.3592987060546875, 7.457000732421875, -0.37066078186035156, -8.48746109008789, 9.905662536621094, -1.4918136596679688, 3.786266326904297, -3.5986862182617188, 3.3298492431640625, 2.1512680053710938, 6.523643493652344, 14.384490966796875, 2.0059032440185547, 0.09862518310546875, -0.3867301940917969, 9.147956848144531, 8.978130340576172, 6.920463562011719, -2.083179473876953, 9.494552612304688, 2.2523422241210938, 20.047332763671875, 7.106964111328125, 14.7088623046875, 1.115732192993164, 0.041637420654296875, 9.104230880737305, 2.4046707153320312, 6.8402862548828125, 6.758392333984375, 8.32904052734375, 1.438690185546875, 10.207725524902344, 11.227928161621094, 0.6791191101074219, 17.196044921875, 6.986064910888672, 5.711761474609375, 5.667640686035156, 4.319568634033203, 3.069852828979492, 18.947235107421875, 7.011077880859375, 11.761062622070312, 5.758886337280273, 9.886947631835938, 0.8617820739746094, 24.472976684570312, 1.0197925567626953, 0.02703094482421875, 4.066307067871094, 12.674819946289062, -5.9561767578125, 2.6937923431396484, -0.6662445068359375, 20.260414123535156, 4.894275665283203, -3.2195205688476562, 3.2938461303710938, 2.0112533569335938, 5.5550689697265625, 8.013999938964844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000056.npy"} +{"epoch": 0.08223201174743025, "step": 57, "batch_size": 64, "mean": 6.123730182647705, "std": 5.664114475250244, "min": -8.65020751953125, "p10": 0.1048212051391603, "median": 5.812950134277344, "p90": 13.373243331909181, "max": 20.917526245117188, "pos_frac": 0.921875, "sample": [7.745433807373047, 3.007070541381836, 0.6629714965820312, -2.4545135498046875, 9.749099731445312, -8.65020751953125, 10.878250122070312, 0.2445507049560547, 11.765350341796875, 5.9770355224609375, 15.719955444335938, 3.2582054138183594, 20.917526245117188, 2.8892059326171875, 6.953834533691406, 5.64886474609375, -0.7502517700195312, 3.5164527893066406, 6.454963684082031, 18.106170654296875, 10.064544677734375, 15.747634887695312, 10.547439575195312, 3.635639190673828, 6.404937744140625, 1.5121746063232422, 13.076587677001953, 4.741899490356445, 3.916980743408203, 2.5631637573242188, 1.20489501953125, 4.208000183105469, 2.9806365966796875, 0.02935028076171875, 6.730560302734375, 6.7663421630859375, 0.0449371337890625, 1.6090202331542969, 2.4732894897460938, 8.289390563964844, 9.888656616210938, 1.9785919189453125, 9.457084655761719, 4.842567443847656, 8.198226928710938, -2.2121620178222656, 8.48061752319336, 6.724266052246094, 8.611968994140625, 3.0862808227539062, 2.134449005126953, 16.942581176757812, 5.412410736083984, 2.7899627685546875, 7.1648406982421875, 4.258369445800781, 0.6883907318115234, 11.2506103515625, 11.649147033691406, -4.9671630859375, 13.500381469726562, 18.69219970703125, 7.147520065307617, 8.011543273925781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000057.npy"} +{"epoch": 0.08370044052863436, "step": 58, "batch_size": 64, "mean": 6.960675239562988, "std": 7.283979892730713, "min": -8.165863037109375, "p10": -0.15657005310058586, "median": 4.8431396484375, "p90": 17.01222686767579, "max": 26.385833740234375, "pos_frac": 0.875, "sample": [14.90240478515625, -1.6394309997558594, 0.7983589172363281, 4.182563781738281, 3.5980072021484375, 4.165802001953125, 19.116683959960938, 2.1693649291992188, 14.039878845214844, 2.7689285278320312, 5.495014190673828, 6.4330902099609375, 7.8250885009765625, 3.705904006958008, 2.7046279907226562, 12.3720703125, 10.735305786132812, 8.766242980957031, 4.191265106201172, 2.8262081146240234, 7.040924072265625, 13.865676879882812, 13.243804931640625, -1.6291351318359375, 11.291770935058594, 1.6610641479492188, 9.061782836914062, 6.723325729370117, 21.51044464111328, 3.7341251373291016, 26.385833740234375, 8.068038940429688, 6.619495391845703, -0.07796478271484375, 14.702957153320312, 0.8828525543212891, 13.767471313476562, 3.548694610595703, 2.70098876953125, -2.149993896484375, 1.5605487823486328, 2.270893096923828, 21.597991943359375, -1.9931564331054688, 10.025894165039062, -0.19025802612304688, 1.4699535369873047, 7.37237548828125, -4.228546142578125, 23.3475341796875, 2.078348159790039, 6.268451690673828, 0.8422393798828125, 14.359081268310547, 10.621341705322266, -8.165863037109375, 22.395309448242188, 1.39453125, 2.2394180297851562, 6.60980224609375, 15.345657348632812, 1.7395401000976562, 0.6861000061035156, 17.726470947265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000058.npy"} +{"epoch": 0.08516886930983847, "step": 59, "batch_size": 64, "mean": 7.362071514129639, "std": 7.624090671539307, "min": -5.909423828125, "p10": -1.2778446197509765, "median": 6.645444869995117, "p90": 18.62580108642578, "max": 32.22602844238281, "pos_frac": 0.875, "sample": [8.322715759277344, 7.982383728027344, 14.810272216796875, 19.03460693359375, 15.112266540527344, 4.192024230957031, 32.22602844238281, 20.020721435546875, 7.333362579345703, 15.899154663085938, 0.7549095153808594, 6.779186248779297, -3.72528076171875, 2.388568878173828, 21.0130615234375, -1.3303718566894531, 2.4608755111694336, 13.576919555664062, 7.2828369140625, 19.564468383789062, 0.9286270141601562, 2.2508392333984375, 0.9340591430664062, 1.0956382751464844, 3.7268218994140625, 24.861679077148438, 2.731578826904297, 1.9985675811767578, 7.909767150878906, -5.909423828125, 9.510711669921875, -2.449634552001953, 7.833000183105469, 8.2703857421875, 8.685600280761719, 17.53736114501953, 2.3358535766601562, 11.472755432128906, 6.840293884277344, 3.7067184448242188, -5.557014465332031, -3.1932601928710938, 6.032297134399414, -1.1552810668945312, 4.066526412963867, 9.750473022460938, 7.062152862548828, 3.212127685546875, 7.299468994140625, 2.8349266052246094, 2.2858657836914062, 5.761741638183594, 2.775146484375, 8.898868560791016, 5.416900634765625, 14.567001342773438, 9.305381774902344, 3.4055557250976562, 2.5939178466796875, -2.2538681030273438, 16.506366729736328, 18.317916870117188, 6.5117034912109375, 18.75775146484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000059.npy"} +{"epoch": 0.08663729809104258, "step": 60, "batch_size": 64, "mean": 5.937168121337891, "std": 6.694998741149902, "min": -14.41754150390625, "p10": -1.6272411346435547, "median": 6.3509016036987305, "p90": 12.099561309814453, "max": 25.782875061035156, "pos_frac": 0.828125, "sample": [7.500698089599609, -3.3667755126953125, 1.581705093383789, 11.620353698730469, 0.5745086669921875, 2.9387454986572266, 10.821216583251953, 8.26580810546875, 2.0553207397460938, -14.41754150390625, 6.664552688598633, -1.7320938110351562, 1.936594009399414, 3.869688034057617, 10.527778625488281, 5.880584716796875, 4.3164215087890625, 10.505821228027344, 3.6941757202148438, 10.327621459960938, 13.380477905273438, 11.97528076171875, 17.6448974609375, 8.543083190917969, 8.590179443359375, 7.445274353027344, -0.6603069305419922, 6.229988098144531, 10.095550537109375, 0.14325904846191406, 8.882671356201172, 18.407608032226562, 3.5217552185058594, 6.099315643310547, 8.155887603759766, -1.6035423278808594, 9.683059692382812, 9.058761596679688, 2.0871658325195312, -4.818183898925781, 10.9786376953125, 2.9487838745117188, 5.357078552246094, 8.242759704589844, 4.5932159423828125, -0.9718132019042969, -10.16384506225586, 15.839736938476562, 19.643096923828125, 11.896820068359375, 12.152824401855469, 2.51812744140625, -1.6976051330566406, -1.6373977661132812, 10.220975875854492, 6.47181510925293, 25.782875061035156, 9.069938659667969, 1.4972763061523438, 8.223104476928711, -0.7335968017578125, 7.9198455810546875, 4.283702850341797, 1.1150646209716797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000060.npy"} +{"epoch": 0.0881057268722467, "step": 61, "batch_size": 64, "mean": 7.258552074432373, "std": 9.79314136505127, "min": -9.294906616210938, "p10": -1.1030864715576172, "median": 5.387714385986328, "p90": 17.715953826904297, "max": 54.39056396484375, "pos_frac": 0.828125, "sample": [2.7423458099365234, 1.3120002746582031, -1.2756805419921875, 1.4614791870117188, 14.113899230957031, 0.17702293395996094, 0.28493309020996094, 7.842464447021484, -0.948028564453125, 26.841354370117188, 21.229515075683594, 19.409393310546875, 27.158187866210938, 12.469512939453125, 6.9866180419921875, 8.771507263183594, -2.032745361328125, 1.4881629943847656, 11.604541778564453, 11.085708618164062, 3.9116134643554688, 5.074520111083984, 14.546234130859375, 8.368968963623047, 6.021919250488281, 6.71684455871582, 5.318733215332031, 5.734458923339844, 8.763816833496094, 8.254745483398438, -1.0676116943359375, 0.5185546875, 12.605278015136719, 16.178802490234375, 15.777069091796875, 21.008316040039062, 17.367828369140625, 1.2127456665039062, -0.862060546875, 13.628585815429688, 3.008533477783203, 0.13791465759277344, 11.220367431640625, 2.577709197998047, 1.2829647064208984, 17.865150451660156, 1.243011474609375, 3.273883819580078, 54.39056396484375, 12.775321960449219, 12.202255249023438, -0.746429443359375, -9.294906616210938, 11.730125427246094, 2.2157974243164062, -4.637901306152344, 5.456695556640625, 3.7058181762695312, -7.4734039306640625, 2.815998077392578, 7.031524658203125, -6.0870208740234375, -1.1182899475097656, 1.1700859069824219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000061.npy"} +{"epoch": 0.08957415565345081, "step": 62, "batch_size": 64, "mean": 5.524896621704102, "std": 7.01943826675415, "min": -8.648681640625, "p10": -2.8673089981079096, "median": 5.220463752746582, "p90": 14.511708068847659, "max": 28.301368713378906, "pos_frac": 0.859375, "sample": [8.556774139404297, 1.4826736450195312, 0.5866317749023438, 10.502082824707031, 5.376943588256836, 11.299179077148438, 6.89094352722168, 0.893157958984375, 9.608695983886719, -5.395050048828125, 14.721908569335938, 5.772529602050781, 1.8425979614257812, 0.02208709716796875, 9.159660339355469, 3.412769317626953, 9.539306640625, 5.300506591796875, 0.1614990234375, 5.224334716796875, 6.887947082519531, 7.154689788818359, 1.810333251953125, 4.908092498779297, -4.0428619384765625, 8.532257080078125, 4.133296966552734, 7.954936981201172, -0.4099903106689453, 28.301368713378906, 17.204498291015625, 8.036201477050781, 5.81524658203125, 2.339447021484375, 1.8145751953125, 5.6868743896484375, 1.7879791259765625, 17.094467163085938, -8.648681640625, -3.7052078247070312, -5.955780029296875, 0.8525276184082031, 1.4433937072753906, 0.8811454772949219, 1.1666717529296875, 8.793445587158203, 16.363758087158203, 9.478034973144531, -3.085966110229492, 14.021240234375, 8.469799041748047, 5.216592788696289, -6.192962646484375, 3.390169143676758, 12.961551666259766, 3.0258102416992188, 3.1953697204589844, 4.0953521728515625, 25.843276977539062, 19.538742065429688, 7.03864860534668, 1.2868156433105469, -2.3571090698242188, 6.508148193359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000062.npy"} +{"epoch": 0.09104258443465492, "step": 63, "batch_size": 64, "mean": 7.485637664794922, "std": 7.910577297210693, "min": -12.992572784423828, "p10": -0.36663284301757765, "median": 6.831754684448242, "p90": 16.559085083007815, "max": 31.837738037109375, "pos_frac": 0.890625, "sample": [24.701812744140625, 8.00262451171875, 4.646566390991211, 9.524723052978516, 2.452972412109375, 0.8351669311523438, 4.597587585449219, -6.8447265625, 6.341087341308594, 6.634616851806641, -12.992572784423828, 16.7130126953125, 9.349456787109375, 7.028892517089844, 2.8032760620117188, -1.0459423065185547, 16.199920654296875, 4.150690078735352, 11.928848266601562, 20.87340545654297, 11.963973999023438, 13.688453674316406, 5.877349853515625, 0.10333251953125, -0.5680465698242188, 12.045129776000977, -4.482612609863281, 0.27256011962890625, 5.009727478027344, 3.407114028930664, 11.423141479492188, 8.130226135253906, 8.062568664550781, 9.187259674072266, 10.536247253417969, 12.284347534179688, 13.659744262695312, 13.286476135253906, 5.106895446777344, 2.5273971557617188, 0.9323577880859375, 13.57330322265625, 2.401548385620117, 6.107929229736328, -10.49920654296875, 13.512014389038086, 7.1820526123046875, 1.295074462890625, 1.8016815185546875, 5.695953369140625, 2.9959716796875, 13.827400207519531, 17.533096313476562, 8.13079833984375, 17.700721740722656, 0.6702651977539062, 8.380697250366211, 31.837738037109375, -3.1706466674804688, 14.441234588623047, 4.8003082275390625, 25.1739501953125, 5.801856994628906, 11.532005310058594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000063.npy"} +{"epoch": 0.09251101321585903, "step": 64, "batch_size": 64, "mean": 8.758337020874023, "std": 9.909212112426758, "min": -16.666961669921875, "p10": -0.7858884811401364, "median": 7.340604782104492, "p90": 21.024116516113285, "max": 46.760498046875, "pos_frac": 0.8125, "sample": [1.7029495239257812, 17.394744873046875, 26.986373901367188, 7.556148529052734, 4.84834098815918, 12.217491149902344, 3.240081787109375, 16.438087463378906, -0.2341899871826172, 6.52379035949707, 2.000164031982422, 10.43023681640625, 0.7709579467773438, 4.849945068359375, 12.24481201171875, 29.34869384765625, 4.596305847167969, -1.8371562957763672, 16.990097045898438, 7.519325256347656, 11.466598510742188, -3.2168426513671875, 27.998794555664062, 8.008865356445312, -0.28986358642578125, 6.444843292236328, -0.9086475372314453, 6.595989227294922, -16.666961669921875, -0.4697990417480469, 7.607288360595703, -0.49945068359375, 46.760498046875, 5.519100189208984, 2.229524612426758, 10.540725708007812, 17.08013916015625, 19.957015991210938, 10.358917236328125, 5.928302764892578, 5.753530502319336, 23.05016326904297, 11.707023620605469, 8.441276550292969, -1.2991485595703125, -9.410514831542969, 23.4808349609375, 3.2678070068359375, 21.4814453125, 0.2619476318359375, 15.372871398925781, 13.166122436523438, 11.671928405761719, -1.3052825927734375, 12.812713623046875, -0.3718414306640625, 4.7322998046875, 7.161884307861328, 13.292129516601562, 12.827590942382812, 1.6077804565429688, 11.4898681640625, 18.425689697265625, 4.883197784423828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000064.npy"} +{"epoch": 0.09397944199706314, "step": 65, "batch_size": 64, "mean": 8.849407196044922, "std": 9.720344543457031, "min": -22.24115753173828, "p10": -0.616684341430664, "median": 7.15633487701416, "p90": 22.604815673828128, "max": 30.037887573242188, "pos_frac": 0.875, "sample": [30.037887573242188, 24.27740478515625, 16.600662231445312, 12.517730712890625, 5.549161911010742, 9.339881896972656, 0.6753616333007812, 12.766765594482422, 3.490215301513672, 20.16883087158203, 16.233963012695312, 1.4378623962402344, -0.47119903564453125, 23.068603515625, 28.056396484375, 0.23386001586914062, 9.99068832397461, 2.3719959259033203, -4.0348052978515625, 4.855934143066406, 1.1144046783447266, 17.731201171875, 5.912864685058594, -22.24115753173828, 16.193878173828125, 7.87310791015625, -4.2362518310546875, 7.204387664794922, 14.011909484863281, 5.026287078857422, 5.945426940917969, 5.681266784667969, -1.7587242126464844, 5.230152130126953, 28.34454345703125, 6.930576324462891, 21.52264404296875, 15.279064178466797, 24.92792510986328, 7.509119033813477, 0.1097259521484375, -10.611259460449219, 8.892425537109375, 2.1167469024658203, 6.340789794921875, 18.404815673828125, 23.627357482910156, -1.3054580688476562, 6.765960693359375, 19.720993041992188, 11.239700317382812, 3.920684814453125, 20.308441162109375, -0.6790351867675781, 12.578826904296875, 7.5952911376953125, 5.3663482666015625, 2.5346832275390625, 17.45258331298828, 11.152618408203125, 0.24939727783203125, 8.008529663085938, 0.09377861022949219, 7.108282089233398], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000065.npy"} +{"epoch": 0.09544787077826726, "step": 66, "batch_size": 64, "mean": 8.239856719970703, "std": 9.795476913452148, "min": -8.536376953125, "p10": -0.7811008453369136, "median": 6.427303314208984, "p90": 18.879657363891603, "max": 46.99446105957031, "pos_frac": 0.859375, "sample": [7.446739196777344, 46.99446105957031, 0.69781494140625, 14.779182434082031, 13.737640380859375, 12.121978759765625, 2.3297290802001953, -0.34705352783203125, 10.384958267211914, 6.5466461181640625, -8.536376953125, 2.409055709838867, 6.189533233642578, -0.9671211242675781, 12.045799255371094, 3.2483367919921875, 4.649658203125, 0.15288162231445312, 2.3184738159179688, 18.743240356445312, 7.328575134277344, 10.483057022094727, 0.6119880676269531, 10.560997009277344, 17.495582580566406, 3.2617645263671875, 4.3423614501953125, -2.15765380859375, -3.9257144927978516, -7.635650634765625, 10.27835464477539, 17.51862335205078, 18.938121795654297, 7.420280456542969, 3.6919097900390625, 11.419296264648438, 6.307960510253906, -0.34487342834472656, 2.2108497619628906, 14.130935668945312, 11.209854125976562, 3.8775711059570312, 0.097747802734375, 6.8609619140625, 5.931392669677734, -8.328559875488281, 10.151687622070312, 1.6986961364746094, -4.067878723144531, 13.638031005859375, 4.436820983886719, 19.330322265625, 26.950469970703125, 22.435874938964844, 4.2880859375, 16.779541015625, 2.0994033813476562, 8.301555633544922, 15.577911376953125, 2.657928466796875, 31.894668579101562, 1.0135688781738281, 30.129486083984375, 13.503372192382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000066.npy"} +{"epoch": 0.09691629955947137, "step": 67, "batch_size": 64, "mean": 8.440168380737305, "std": 9.773744583129883, "min": -11.538116455078125, "p10": -0.9826553344726556, "median": 5.99215030670166, "p90": 20.833009338378908, "max": 45.8328857421875, "pos_frac": 0.859375, "sample": [22.053115844726562, 3.5303192138671875, 21.105140686035156, 0.27748870849609375, 2.002655029296875, 0.0134429931640625, 15.638816833496094, 5.243406295776367, -6.343666076660156, 2.1799678802490234, 17.065093994140625, 10.198875427246094, 4.2890472412109375, 15.862052917480469, -0.33405303955078125, -5.024806976318359, 9.773651123046875, 9.390731811523438, 1.317291259765625, -11.538116455078125, 20.691558837890625, 8.04644775390625, 0.37191009521484375, -0.068145751953125, 5.874244689941406, 2.6580429077148438, 20.893630981445312, 6.110055923461914, -1.2606277465820312, 6.6395416259765625, 3.280242919921875, 14.205333709716797, 6.703330993652344, 2.8117923736572266, 2.1319503784179688, 9.574592590332031, 15.078279495239258, 5.571245193481445, 1.400360107421875, -5.0413055419921875, 5.37445068359375, 17.976181030273438, 4.3790283203125, 3.7125587463378906, 25.875823974609375, 14.81829833984375, 6.519815444946289, 18.109481811523438, 14.259056091308594, 14.192840576171875, 12.496116638183594, 5.069122314453125, 11.747991561889648, 31.72967529296875, 5.748252868652344, 0.8353595733642578, 45.8328857421875, 3.6195106506347656, -2.589202880859375, 25.036956787109375, 20.061782836914062, 7.139835357666016, -2.9527435302734375, 8.804765701293945], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000067.npy"} +{"epoch": 0.09838472834067548, "step": 68, "batch_size": 64, "mean": 8.181711196899414, "std": 8.226612091064453, "min": -6.4801788330078125, "p10": 0.2630773544311526, "median": 5.993472099304199, "p90": 20.47516555786133, "max": 36.070159912109375, "pos_frac": 0.90625, "sample": [-4.660896301269531, 3.0965652465820312, 25.06597900390625, 8.180770874023438, 5.702083587646484, -6.33831787109375, 8.2642822265625, 22.491111755371094, 1.3354873657226562, 0.5248737335205078, 13.677902221679688, 15.566146850585938, 17.488449096679688, 11.472824096679688, 10.068267822265625, 0.15087890625, 6.338932037353516, 13.031963348388672, 3.6750411987304688, 7.7253570556640625, -1.0388031005859375, 4.799352645874023, 4.926670074462891, 2.35675048828125, 7.128276824951172, 13.114128112792969, 18.738021850585938, 8.574783325195312, 8.776847839355469, 19.65337371826172, 20.827362060546875, 21.914066314697266, 1.8187637329101562, 6.2587127685546875, 9.936996459960938, 1.7198238372802734, -0.1806468963623047, 5.728231430053711, 2.08880615234375, 5.1177520751953125, 16.231204986572266, 2.8135986328125, 10.758533477783203, -6.4801788330078125, 7.416450500488281, 5.405097961425781, 3.840362548828125, 21.382164001464844, 3.316986083984375, 1.4008255004882812, 1.455038070678711, 36.070159912109375, 24.68366241455078, 17.396804809570312, 13.250251770019531, 1.0840072631835938, 2.075084686279297, 5.48979377746582, 12.173721313476562, -2.308746337890625, 5.6685638427734375, 2.424091339111328, 5.417217254638672, 7.54779052734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000068.npy"} +{"epoch": 0.09985315712187959, "step": 69, "batch_size": 64, "mean": 9.668987274169922, "std": 9.481657981872559, "min": -4.6898651123046875, "p10": -0.2681427001953124, "median": 6.995128631591797, "p90": 20.486595153808597, "max": 43.473358154296875, "pos_frac": 0.875, "sample": [4.22479248046875, 6.584953308105469, 30.43212890625, 8.732917785644531, 12.065399169921875, 15.789024353027344, -4.606292724609375, -1.25531005859375, -0.153106689453125, 3.9714126586914062, 5.66339111328125, 8.839855194091797, -1.23516845703125, 8.99905014038086, 7.469135284423828, 14.870624542236328, 26.1024169921875, 6.525505065917969, 16.641555786132812, 15.577392578125, -0.8164253234863281, 24.494300842285156, 13.535736083984375, 10.456008911132812, 18.947113037109375, 14.155471801757812, 4.2408599853515625, 15.102287292480469, 12.615745544433594, 5.587717056274414, 0.8584079742431641, 0.8925514221191406, 18.994728088378906, 43.473358154296875, 4.637676239013672, 19.877395629882812, 5.499603271484375, 15.211700439453125, 16.66968536376953, 0.853851318359375, 2.0217666625976562, 16.598175048828125, 10.993392944335938, 1.339813232421875, -0.8395919799804688, 14.10635757446289, 6.904396057128906, 36.209983825683594, 3.7904510498046875, 7.897584915161133, 6.302925109863281, -4.6898651123046875, 5.93914794921875, 4.150398254394531, 2.4200973510742188, 7.0858612060546875, 20.79571533203125, 20.7476806640625, 1.0400390625, 18.15606689453125, 1.5331897735595703, 1.2922515869140625, 4.809314727783203, -0.31744384765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000069.npy"} +{"epoch": 0.1013215859030837, "step": 70, "batch_size": 64, "mean": 10.476768493652344, "std": 10.976624488830566, "min": -9.244384765625, "p10": -1.228683471679687, "median": 9.992218017578125, "p90": 25.026464843750006, "max": 40.26800537109375, "pos_frac": 0.84375, "sample": [6.441108703613281, 1.5677947998046875, 11.09823989868164, 19.70343780517578, 23.553619384765625, 12.04942512512207, -7.907249450683594, -1.54833984375, 12.464736938476562, 4.033483505249023, -1.4087448120117188, 13.460517883300781, -2.6362037658691406, 8.061737060546875, 37.533721923828125, 1.8637752532958984, 11.282363891601562, 25.657684326171875, 11.371973037719727, 40.26800537109375, 11.261734008789062, 9.779525756835938, 2.2384567260742188, 28.52496337890625, 0.41889190673828125, 17.516014099121094, 19.245513916015625, 13.286500930786133, -2.052854537963867, 7.263051986694336, 5.847587585449219, 27.09345245361328, 1.9497642517089844, 0.6972236633300781, 1.1364669799804688, -0.15876388549804688, 14.081958770751953, 2.6405563354492188, 11.83721923828125, -1.9583797454833984, 2.760955810546875, 3.8347911834716797, -0.42221832275390625, 2.9757080078125, 10.853700637817383, 2.7167205810546875, 19.020225524902344, 22.87964630126953, -9.244384765625, 5.069999694824219, 17.633987426757812, 3.3520736694335938, 1.5598392486572266, 32.932525634765625, 13.248695373535156, -0.8085403442382812, 34.567352294921875, 10.204910278320312, 22.06720733642578, 16.658992767333984, 3.5499820709228516, 22.9769287109375, 13.715957641601562, 20.84814453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000070.npy"} +{"epoch": 0.1027900146842878, "step": 71, "batch_size": 64, "mean": 12.117111206054688, "std": 11.999532699584961, "min": -9.489480972290039, "p10": 0.6195663452148453, "median": 10.725912094116211, "p90": 23.169992065429696, "max": 50.33636474609375, "pos_frac": 0.890625, "sample": [-5.564811706542969, 40.86187744140625, 6.707279205322266, 2.1231842041015625, 5.668216705322266, 12.57595443725586, 6.402133941650391, 12.336235046386719, 17.929946899414062, 5.790241241455078, 20.842910766601562, 6.893260955810547, 17.669937133789062, 4.417142868041992, 2.4780807495117188, 6.933319091796875, 12.080036163330078, 15.472259521484375, -0.1421051025390625, 2.251628875732422, 3.8969497680664062, 43.76173400878906, 7.269111633300781, 6.328147888183594, 15.89337158203125, 16.427352905273438, 5.885158538818359, 12.557579040527344, 13.061668395996094, -8.390975952148438, 4.568603515625, 13.840118408203125, 11.27471923828125, -0.1298999786376953, 5.02672004699707, 17.555374145507812, 19.37085723876953, -0.02484130859375, 21.361160278320312, 16.042015075683594, 5.174263000488281, 13.485908508300781, 6.8602752685546875, 18.009078979492188, 14.6356201171875, 18.05797576904297, 4.704418182373047, 36.288482666015625, 48.27777099609375, 50.33636474609375, 2.55859375, 23.945205688476562, -9.489480972290039, -1.0905590057373047, 13.41845703125, 6.262687683105469, 7.751972198486328, 10.881511688232422, 19.732086181640625, 16.50806427001953, 30.198394775390625, 3.0691375732421875, 6.046979904174805, 10.5703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000071.npy"} +{"epoch": 0.10425844346549193, "step": 72, "batch_size": 64, "mean": 12.761024475097656, "std": 13.074019432067871, "min": -14.089691162109375, "p10": -2.1346458435058566, "median": 10.784193992614746, "p90": 32.09220123291017, "max": 55.10345458984375, "pos_frac": 0.890625, "sample": [18.166954040527344, 14.91963005065918, 6.134056091308594, 12.610836029052734, 5.735633850097656, 3.5860671997070312, 9.27835464477539, 26.768203735351562, -14.089691162109375, 24.970001220703125, 14.753170013427734, 13.979816436767578, 7.915243148803711, 0.538787841796875, 17.253246307373047, 9.325740814208984, 12.477516174316406, 25.191864013671875, 14.612518310546875, 33.62275695800781, 39.40754699707031, -3.3268566131591797, 6.598869323730469, 10.522184371948242, 16.894493103027344, 3.74884033203125, 18.47760772705078, 10.160400390625, 11.04620361328125, 16.45575714111328, 3.1367416381835938, 5.6684722900390625, 11.808456420898438, -13.160842895507812, 35.26194763183594, 28.520904541015625, 19.380355834960938, 6.703563690185547, 22.620101928710938, -3.7938079833984375, 8.4920654296875, 40.4989013671875, 55.10345458984375, -7.617919921875, 9.725324630737305, 25.85662841796875, 7.68316650390625, 7.767719268798828, 0.9906940460205078, 34.42466735839844, -10.13427734375, 4.028022766113281, 2.5602035522460938, 18.120452880859375, 12.796112060546875, 19.796829223632812, 10.298439025878906, 15.743232727050781, 35.726165771484375, 12.13227653503418, 6.679222106933594, 3.4870986938476562, -3.2804031372070312, 1.9458427429199219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000072.npy"} +{"epoch": 0.10572687224669604, "step": 73, "batch_size": 64, "mean": 12.9429292678833, "std": 18.591745376586914, "min": -25.261825561523438, "p10": -8.75775909423828, "median": 10.025529861450195, "p90": 33.505692672729495, "max": 67.05513000488281, "pos_frac": 0.78125, "sample": [30.779083251953125, -13.465042114257812, 7.479442596435547, 14.684890747070312, 7.3932647705078125, 35.37023162841797, -6.038055419921875, 25.35833740234375, 7.54766845703125, -0.349578857421875, 28.61553955078125, 33.289241790771484, 24.188316345214844, -1.4833145141601562, 0.40594482421875, -14.64947509765625, 56.536956787109375, -1.8878402709960938, 10.461418151855469, 19.519760131835938, 16.763214111328125, 8.10769271850586, 10.465675354003906, 1.1250743865966797, 2.04345703125, 9.589641571044922, -18.188190460205078, 11.128650665283203, 29.67431640625, 15.52825927734375, 13.250381469726562, 2.9685935974121094, 57.658416748046875, -11.294479370117188, 5.201145172119141, 31.005523681640625, 31.64948272705078, -9.344442367553711, 4.341587066650391, 2.7031726837158203, -2.2555389404296875, -5.5533294677734375, -9.178421020507812, 7.101797103881836, 16.40802001953125, 15.82757568359375, 2.6898880004882812, 5.539579391479492, 21.181434631347656, 43.89390563964844, 14.91961669921875, 6.847202301025391, 22.140350341796875, 5.719429016113281, 14.644218444824219, -7.776214599609375, 14.62371826171875, 57.34288024902344, 27.878143310546875, 67.05513000488281, 3.142467498779297, 33.59845733642578, 19.68499755859375, -25.261825561523438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000073.npy"} +{"epoch": 0.10719530102790015, "step": 74, "batch_size": 64, "mean": 14.029674530029297, "std": 20.74785614013672, "min": -40.30040740966797, "p10": -2.9911163330078114, "median": 10.624895095825195, "p90": 32.64582748413087, "max": 121.46435546875, "pos_frac": 0.796875, "sample": [27.40526580810547, 9.759231567382812, 12.80548095703125, 0.26739501953125, 24.703224182128906, 6.705333709716797, 10.387760162353516, 55.922706604003906, 2.189258575439453, 15.395500183105469, 15.884124755859375, 10.43515396118164, 20.376773834228516, 2.3784866333007812, 17.426902770996094, 121.46435546875, 8.73529052734375, 30.950050354003906, 15.364727020263672, 3.604278564453125, 1.7860698699951172, 50.45625305175781, 3.7391433715820312, 33.372589111328125, 26.607711791992188, 10.745979309082031, -3.735960006713867, 0.2978515625, -0.7028064727783203, -19.43968963623047, 14.969650268554688, -1.4126739501953125, -0.10745429992675781, 8.88067626953125, -40.30040740966797, 21.191146850585938, -1.8659744262695312, 26.468936920166016, 4.994794845581055, -4.373867034912109, -10.35504150390625, 11.16717529296875, 19.12664794921875, 24.84991455078125, -0.0720367431640625, 45.915802001953125, 14.120437622070312, 3.1414871215820312, 8.798599243164062, 9.094112396240234, 10.50381088256836, 36.584197998046875, 18.85223388671875, 23.592498779296875, 37.989219665527344, 16.64581298828125, 5.403657913208008, 16.619056701660156, 28.212188720703125, 27.2095947265625, -0.01448822021484375, -4.4408721923828125, -3.4733200073242188, 14.695236206054688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000074.npy"} +{"epoch": 0.10866372980910426, "step": 75, "batch_size": 64, "mean": 20.05126190185547, "std": 20.014732360839844, "min": -12.407470703125, "p10": -2.9196683883666967, "median": 16.644107818603516, "p90": 48.70598220825196, "max": 73.53080749511719, "pos_frac": 0.859375, "sample": [-0.6165943145751953, 24.211929321289062, 17.296100616455078, 14.302864074707031, 18.44481658935547, 12.457618713378906, 10.769725799560547, 6.437046051025391, 46.38215637207031, 66.73309326171875, 31.70189666748047, 14.701299667358398, -6.787406921386719, 27.810508728027344, 73.53080749511719, 15.897701263427734, 41.890838623046875, 63.022735595703125, 9.568233489990234, -0.3710460662841797, 43.047637939453125, 24.698593139648438, 15.729568481445312, 10.941200256347656, 17.254898071289062, 47.440773010253906, 49.24821472167969, 3.8868236541748047, 9.210468292236328, 31.358776092529297, 17.912063598632812, 17.075458526611328, 5.0293426513671875, 65.40013122558594, -5.0759429931640625, -3.9067001342773438, 26.135635375976562, -12.407470703125, 20.94446563720703, 20.696208953857422, 6.301486968994141, 11.851478576660156, -8.449504852294922, 8.066352844238281, -7.452968597412109, 28.255203247070312, 2.863462448120117, 12.692340850830078, 38.853485107421875, 27.013946533203125, 4.1536102294921875, -8.103500366210938, 8.84151840209961, 6.79925537109375, 16.915786743164062, 5.492227554321289, 7.3217926025390625, 23.261550903320312, 16.773109436035156, 17.408096313476562, 37.917381286621094, 57.770751953125, 60.2142333984375, 16.515106201171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000075.npy"} +{"epoch": 0.11013215859030837, "step": 76, "batch_size": 64, "mean": 13.184097290039062, "std": 15.858263969421387, "min": -43.6123046875, "p10": -2.5099504470825185, "median": 12.413437843322754, "p90": 30.95248832702637, "max": 67.09577941894531, "pos_frac": 0.84375, "sample": [13.940938949584961, 6.090381622314453, 2.5387954711914062, 25.312896728515625, 2.27423095703125, 18.777862548828125, 10.95648193359375, 27.2330322265625, 31.007572174072266, 13.922248840332031, 15.596107482910156, 22.147361755371094, 9.618995666503906, 32.328895568847656, 4.565834045410156, 5.382871627807617, 8.442024230957031, 67.09577941894531, 32.88770294189453, 6.097190856933594, -5.645240783691406, 12.555479049682617, -0.04867362976074219, 12.670097351074219, -14.970283508300781, 7.175603866577148, 5.647520065307617, 15.630531311035156, 6.7547760009765625, 8.028312683105469, 30.823959350585938, 18.428359985351562, 22.650588989257812, 9.324705123901367, 30.380828857421875, 4.11732292175293, -3.0266036987304688, 26.236618041992188, 3.2212371826171875, -10.536033630371094, 7.3252716064453125, 2.192506790161133, -6.417022705078125, -6.83697509765625, 16.872154235839844, 30.349853515625, 50.35260009765625, 28.120529174804688, 8.752742767333984, 5.9423675537109375, 12.27139663696289, -1.3044261932373047, 16.445449829101562, 16.768600463867188, 15.738624572753906, 12.95767593383789, -0.7155437469482422, 13.921806335449219, 35.34496307373047, 24.420936584472656, 23.83544921875, 33.689544677734375, 11.729759216308594, -43.6123046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000076.npy"} +{"epoch": 0.11160058737151249, "step": 77, "batch_size": 64, "mean": 21.237140655517578, "std": 22.011798858642578, "min": -40.013763427734375, "p10": 0.12546348571777421, "median": 21.783546447753906, "p90": 55.34773101806641, "max": 75.38580322265625, "pos_frac": 0.890625, "sample": [16.690895080566406, 44.33525848388672, 31.715229034423828, 41.95673751831055, 42.400543212890625, -0.20832061767578125, 59.869537353515625, 22.92263412475586, 9.246545791625977, 10.890483856201172, 6.021520614624023, 33.96440124511719, 39.50212860107422, 5.515918731689453, 4.1625213623046875, 27.113494873046875, -40.013763427734375, 22.27796173095703, 35.625953674316406, 31.366539001464844, 14.284318923950195, -2.6637229919433594, 10.032821655273438, 0.9042930603027344, 3.7216033935546875, 54.41401672363281, 3.4335880279541016, 27.840919494628906, 11.575218200683594, -5.346456527709961, 32.26118850708008, 29.61815643310547, 13.874580383300781, 64.38612365722656, 33.22541809082031, 9.622053146362305, 57.11212158203125, 75.38580322265625, 21.28913116455078, 13.186515808105469, -32.43341064453125, 7.7773590087890625, -15.09149169921875, 3.8797473907470703, 12.146612167358398, 26.32140350341797, 32.879486083984375, 24.552011489868164, 26.738021850585938, 26.05475616455078, 7.710437774658203, -6.8913421630859375, 4.9029541015625, 26.23613739013672, 28.487030029296875, 24.127853393554688, 4.22479248046875, 56.200897216796875, 68.05091857910156, 12.064796447753906, 23.691524505615234, 55.747894287109375, 21.17459487915039, 7.110107421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000077.npy"} +{"epoch": 0.1130690161527166, "step": 78, "batch_size": 64, "mean": 16.436477661132812, "std": 18.949737548828125, "min": -20.861366271972656, "p10": -2.860781860351562, "median": 16.084810256958008, "p90": 42.61825103759766, "max": 84.21044921875, "pos_frac": 0.8125, "sample": [2.2380523681640625, 1.7361736297607422, 41.60107421875, 16.675552368164062, -2.0611343383789062, 47.00437927246094, 17.08253288269043, 84.21044921875, 26.4132080078125, 22.502635955810547, 21.10049057006836, 8.506118774414062, -4.74798583984375, 14.667022705078125, 22.8466796875, 19.59946060180664, -0.6156005859375, 8.494367599487305, -6.809478759765625, 3.135894775390625, 7.020967483520508, 18.64369773864746, 27.037227630615234, 23.415733337402344, 20.001575469970703, 14.47439193725586, 17.25588607788086, 17.676544189453125, 22.8807373046875, 6.295633316040039, 43.05418395996094, 16.4633846282959, 5.548492431640625, -10.55605697631836, 25.919044494628906, -11.30621337890625, 53.988922119140625, 20.94770050048828, 44.34220886230469, -0.762786865234375, 37.67290115356445, 4.005882263183594, 50.704010009765625, 33.20476531982422, 29.540077209472656, 0.3189201354980469, 11.493392944335938, 3.579458236694336, 12.940425872802734, 7.23187255859375, 6.338102340698242, -1.1934661865234375, 15.706235885620117, -3.1302490234375, 31.72052001953125, -20.861366271972656, 52.24797058105469, 5.096515655517578, -18.28240966796875, 26.605682373046875, -2.232025146484375, 35.25750732421875, 1.9166984558105469, 26.131935119628906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000078.npy"} +{"epoch": 0.1145374449339207, "step": 79, "batch_size": 64, "mean": 18.058425903320312, "std": 23.04805564880371, "min": -20.985855102539062, "p10": -6.339723205566405, "median": 12.986186981201172, "p90": 50.54457244873049, "max": 76.422119140625, "pos_frac": 0.75, "sample": [-6.734375, 55.18012237548828, 6.059610366821289, 70.08642578125, 5.069038391113281, 0.10889434814453125, -1.841409683227539, -10.632949829101562, 44.093650817871094, 37.786529541015625, 19.25975799560547, -20.985855102539062, 24.70537567138672, 70.88215637207031, 9.372676849365234, 26.32561492919922, 73.66712951660156, 52.89973449707031, 17.692140579223633, -10.948089599609375, 76.422119140625, 17.469070434570312, -3.668243408203125, -11.30828857421875, 28.921722412109375, 0.8179702758789062, -0.196441650390625, 16.961692810058594, 14.037666320800781, 62.85395050048828, 3.1858444213867188, 8.904664993286133, 11.934707641601562, 27.88589096069336, -11.032562255859375, 10.975440979003906, -0.9833221435546875, 7.3872833251953125, 41.901397705078125, 40.20179748535156, 20.656021118164062, 31.7939453125, 34.20338439941406, 19.933746337890625, 27.876144409179688, -5.4188690185546875, 3.0119781494140625, 30.751571655273438, 36.869163513183594, 11.225030899047852, 15.544164657592773, -5.213083267211914, -1.3585357666015625, 38.96156311035156, -1.3363800048828125, 22.281784057617188, 5.160289764404297, 6.555080413818359, 0.21154022216796875, -9.271629333496094, 6.742198944091797, 45.0491943359375, -3.479419708251953, 20.27172088623047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000079.npy"} +{"epoch": 0.11600587371512482, "step": 80, "batch_size": 64, "mean": 16.79857063293457, "std": 24.100067138671875, "min": -22.36371612548828, "p10": -6.923367309570312, "median": 12.127460479736328, "p90": 43.8201530456543, "max": 98.13021850585938, "pos_frac": 0.78125, "sample": [27.088401794433594, 8.190750122070312, -6.933845520019531, 24.59442901611328, 6.012733459472656, 3.7311458587646484, -3.0190048217773438, -10.163902282714844, 31.537445068359375, 2.6567764282226562, 4.064121246337891, 16.747554779052734, -15.575565338134766, -21.242660522460938, -6.8268585205078125, 23.993087768554688, 98.13021850585938, 39.994232177734375, 83.23983764648438, 46.44843292236328, 25.744583129882812, 6.995063781738281, 41.961029052734375, 44.616920471191406, 35.22962951660156, 15.731819152832031, -1.362152099609375, 33.471954345703125, 15.236724853515625, 11.510711669921875, 3.3162155151367188, -16.729347229003906, 4.615505218505859, -6.898918151855469, 25.074813842773438, -22.36371612548828, 60.960052490234375, 31.92620086669922, -8.120147705078125, 88.57919311523438, 10.755741119384766, 2.05206298828125, 2.4326210021972656, 2.3104171752929688, 46.51476287841797, 12.744209289550781, 23.834861755371094, 29.705795288085938, 16.69032096862793, 10.433563232421875, -3.9370880126953125, -2.857006072998047, 0.5473861694335938, 6.820625305175781, 22.95025062561035, 23.001785278320312, 6.390705108642578, 3.5410690307617188, 32.69435119628906, 25.100204467773438, 13.4443359375, 38.978179931640625, -5.563032150268555, 14.358970642089844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000080.npy"} +{"epoch": 0.11747430249632893, "step": 81, "batch_size": 64, "mean": 23.490432739257812, "std": 28.395017623901367, "min": -29.920196533203125, "p10": -9.631399536132811, "median": 19.41441535949707, "p90": 59.09971313476563, "max": 120.59808349609375, "pos_frac": 0.84375, "sample": [44.15582275390625, 34.49517822265625, -29.920196533203125, 40.824859619140625, 25.822662353515625, 59.57664489746094, 20.108844757080078, -11.039936065673828, 7.726539611816406, 33.52405548095703, 32.00181579589844, -22.544387817382812, 2.3677101135253906, 25.20947265625, 87.74313354492188, 10.425409317016602, 24.006973266601562, 37.39338684082031, 59.180389404296875, -15.850982666015625, 1.3499088287353516, 10.361175537109375, 21.94696807861328, 73.28672790527344, 24.300010681152344, 12.061904907226562, 17.923919677734375, 8.287948608398438, 7.230144500732422, -8.009552001953125, 25.986427307128906, 7.55108642578125, 5.460699081420898, 13.268699645996094, 29.062896728515625, 120.59808349609375, 43.28330612182617, 18.719985961914062, 3.5670547485351562, 58.911468505859375, 43.1845703125, 50.4998779296875, 15.641006469726562, -0.6531524658203125, 4.707633972167969, -2.2108497619628906, -18.566783905029297, 15.28703498840332, 15.061851501464844, 30.524795532226562, 14.969676971435547, 13.433053970336914, 21.347267150878906, -15.622055053710938, 39.260597229003906, 45.63213348388672, 110.040771484375, 60.316619873046875, 10.165313720703125, 35.297767639160156, 2.3096847534179688, 28.722633361816406, -10.32647705078125, 34.00840759277344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000081.npy"} +{"epoch": 0.11894273127753303, "step": 82, "batch_size": 64, "mean": 18.563827514648438, "std": 30.420879364013672, "min": -59.29170227050781, "p10": -16.61303901672363, "median": 16.258251190185547, "p90": 52.664073944091804, "max": 122.32901000976562, "pos_frac": 0.796875, "sample": [-9.48160171508789, 33.14213562011719, -20.409568786621094, 18.09170150756836, 21.024600982666016, 19.07103157043457, -27.75147247314453, 15.503852844238281, 26.00525665283203, 58.94390106201172, 69.2874755859375, 13.131595611572266, -12.976119995117188, 8.917938232421875, 15.861013412475586, 105.83329772949219, 13.766487121582031, -53.06752014160156, -8.779884338378906, 33.90632629394531, 1.5623397827148438, 33.12291717529297, 53.123497009277344, 4.048330307006836, -18.17171859741211, -6.69366455078125, 3.9847335815429688, 24.116867065429688, 21.495895385742188, 13.530784606933594, 20.998329162597656, 37.77909851074219, 37.74797821044922, -0.8783702850341797, 74.86163330078125, 19.70716094970703, 51.59208679199219, 16.041439056396484, 9.591140747070312, 5.255607604980469, -1.8864059448242188, 30.477676391601562, 61.22502136230469, -18.90484619140625, 29.307235717773438, 21.668060302734375, 7.146539688110352, 48.00575637817383, 35.57952117919922, -21.947097778320312, 9.921646118164062, 7.1698455810546875, -59.29170227050781, 8.345611572265625, 32.95411682128906, 21.980043411254883, 6.1912689208984375, 27.272361755371094, 16.47506332397461, 122.32901000976562, 15.347038269042969, 18.099687576293945, 0.43483734130859375, 47.348121643066406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000082.npy"} +{"epoch": 0.12041116005873716, "step": 83, "batch_size": 64, "mean": 21.99319076538086, "std": 26.900634765625, "min": -27.345932006835938, "p10": -9.301779174804686, "median": 18.42578125, "p90": 63.717628479003935, "max": 86.50747680664062, "pos_frac": 0.8125, "sample": [21.813575744628906, 26.358020782470703, 17.884197235107422, 71.55845642089844, 45.82896423339844, 12.939697265625, -7.509674072265625, 71.78341674804688, 56.998809814453125, 4.91815185546875, 12.259567260742188, 18.43470001220703, 1.161956787109375, -5.659767150878906, 18.719573974609375, 71.40939331054688, -14.78619384765625, 86.50747680664062, 10.968093872070312, 74.7355728149414, 39.627685546875, 12.442283630371094, 50.81498718261719, -21.10797882080078, -27.345932006835938, -10.0233154296875, 56.8824462890625, -20.948883056640625, 17.378087997436523, 66.59712219238281, 12.101791381835938, 15.069580078125, 4.861322402954102, 1.528594970703125, 25.64331817626953, 12.092964172363281, 26.572792053222656, -0.08208084106445312, 40.740379333496094, 13.638683319091797, 20.899749755859375, 19.987743377685547, 45.38374328613281, 74.10519409179688, 20.49020004272461, 48.666168212890625, 5.998531341552734, -19.03119659423828, -2.5652828216552734, 2.0180282592773438, -22.87335205078125, 52.172576904296875, 9.991127014160156, 43.309814453125, 25.851852416992188, 24.03314971923828, -7.618194580078125, 7.933467864990234, 18.94710922241211, 33.365631103515625, 43.88069152832031, 18.41686248779297, 29.073089599609375, 2.319751739501953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000083.npy"} +{"epoch": 0.12187958883994127, "step": 84, "batch_size": 64, "mean": 19.20960235595703, "std": 20.670482635498047, "min": -43.95539855957031, "p10": -1.7366424560546871, "median": 16.698697090148926, "p90": 43.14859466552734, "max": 83.69926452636719, "pos_frac": 0.84375, "sample": [7.8775787353515625, 15.634567260742188, 3.6066513061523438, 5.204292297363281, 46.863128662109375, 5.5839691162109375, 5.625762939453125, 21.310317993164062, 15.926214218139648, 9.593280792236328, 30.669300079345703, 35.905738830566406, -19.9033203125, 15.193580627441406, 22.221294403076172, 30.41419219970703, 10.440967559814453, 83.69926452636719, 30.697635650634766, 3.555755615234375, 7.100395202636719, 30.888160705566406, 7.757148742675781, 6.769702911376953, -0.7314720153808594, 43.05888366699219, 26.170677185058594, 32.47575378417969, -43.95539855957031, -1.895843505859375, 54.64265441894531, 3.407562255859375, 37.07701110839844, 16.765426635742188, 19.37743377685547, 8.864166259765625, 49.14051055908203, 43.187042236328125, 30.864532470703125, 21.828590393066406, 3.1875534057617188, 32.38306427001953, -4.025386810302734, 16.631967544555664, 28.933624267578125, 29.425445556640625, -1.36517333984375, -3.7994136810302734, 34.84745788574219, 34.07536315917969, -2.7456741333007812, 41.59661102294922, 56.69146728515625, 20.019901275634766, 61.43792724609375, 16.172882080078125, 30.4410400390625, 8.451416015625, -17.71648406982422, 31.86469268798828, 12.822954177856445, 18.76384735107422, 8.689445495605469, -0.28305816650390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000084.npy"} +{"epoch": 0.12334801762114538, "step": 85, "batch_size": 64, "mean": 19.705352783203125, "std": 24.871047973632812, "min": -31.039138793945312, "p10": -5.590434265136719, "median": 17.608476638793945, "p90": 59.037527465820325, "max": 69.53416442871094, "pos_frac": 0.71875, "sample": [42.82392883300781, 13.738899230957031, 30.665077209472656, 3.6383304595947266, -2.0081100463867188, 22.941265106201172, 26.958602905273438, 2.398113250732422, 60.42682647705078, 9.504077911376953, -2.5815658569335938, 6.00994873046875, 65.944091796875, 26.745338439941406, 24.340505599975586, 20.917007446289062, -5.4244384765625, -18.712158203125, 20.45056915283203, 1.2788925170898438, -31.039138793945312, 69.53416442871094, 19.51104736328125, -1.990966796875, -5.05328369140625, 46.16838073730469, 7.4132232666015625, -8.639984130859375, 18.517662048339844, 10.939529418945312, 48.20716857910156, -5.6615753173828125, -17.146484375, -1.827718734741211, 14.990493774414062, 5.138072967529297, 64.52235412597656, 16.786518096923828, 38.35472869873047, -0.30559730529785156, -4.2920379638671875, 69.49174499511719, -1.6646785736083984, 46.261444091796875, 60.537139892578125, 15.266761779785156, 55.79582977294922, 66.85903930664062, 10.32525634765625, 5.983358383178711, 32.86060333251953, 50.674339294433594, -0.80419921875, -6.55767822265625, 18.430435180664062, 18.75883674621582, 54.58184814453125, 21.771392822265625, 30.392059326171875, 38.2334098815918, -23.281463623046875, 19.15151596069336, -1.098846435546875, 44.992637634277344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000085.npy"} +{"epoch": 0.12481644640234948, "step": 86, "batch_size": 64, "mean": 20.98587989807129, "std": 38.07899856567383, "min": -58.33819580078125, "p10": -15.392702102661131, "median": 15.565719604492188, "p90": 66.18696746826173, "max": 145.25918579101562, "pos_frac": 0.734375, "sample": [-13.439239501953125, 19.220848083496094, 69.56175231933594, -19.571372985839844, 35.34925079345703, 31.54150390625, 27.63776969909668, -1.0603103637695312, 106.477783203125, 11.910591125488281, 6.620349884033203, -4.379920959472656, 1.051116943359375, 22.69799041748047, -10.20819091796875, 19.48766326904297, 49.80975341796875, 19.300975799560547, -3.524913787841797, -53.031219482421875, 62.906219482421875, 22.346729278564453, 31.457111358642578, 10.701311111450195, 10.635292053222656, -16.229900360107422, -3.1892318725585938, 67.59300231933594, -4.792915344238281, 7.835500717163086, 37.74765396118164, 19.25519561767578, 7.959226608276367, -58.33819580078125, 47.365692138671875, 54.99072265625, 27.912628173828125, -40.507850646972656, 75.37675476074219, 2.4525699615478516, 145.25918579101562, 19.730724334716797, 25.181427001953125, 11.477588653564453, -4.125205993652344, 130.08767700195312, 6.906879425048828, 122.80120849609375, 47.7327880859375, -6.6251068115234375, -30.51428985595703, 39.024513244628906, 9.771642684936523, 34.6368408203125, 29.366500854492188, 35.32497024536133, 37.17291259765625, 5.985254287719727, 2.0624847412109375, 4.169191360473633, -4.885408401489258, -19.23529815673828, 2.8524627685546875, 20.007678985595703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000086.npy"} +{"epoch": 0.1262848751835536, "step": 87, "batch_size": 64, "mean": 24.446144104003906, "std": 33.614288330078125, "min": -77.30804443359375, "p10": -7.398130798339843, "median": 19.969839096069336, "p90": 66.31752777099611, "max": 130.2517852783203, "pos_frac": 0.8125, "sample": [23.00156593322754, 35.85700988769531, 0.12163352966308594, 9.277717590332031, 39.876312255859375, -7.605621337890625, 31.577056884765625, -0.5327644348144531, 1.7856521606445312, -4.2577667236328125, -23.447341918945312, 46.478485107421875, -6.9139862060546875, 68.50822448730469, 32.181884765625, 59.81101989746094, 53.618804931640625, 10.869504928588867, 90.19430541992188, 59.380218505859375, 20.818710327148438, 19.755508422851562, 7.146867752075195, 41.59661102294922, 40.919960021972656, 2.0842933654785156, 38.46797180175781, 130.2517852783203, 13.78399658203125, 2.6977920532226562, 14.377019882202148, 75.67086791992188, 61.17317199707031, 9.703113555908203, 57.95843505859375, 27.04499053955078, 13.44462776184082, 39.861568450927734, 19.74298095703125, 76.71699523925781, 0.8489055633544922, 81.53800964355469, 14.217727661132812, 20.174625396728516, -77.30804443359375, 11.642866134643555, 41.474327087402344, -34.884490966796875, 39.178733825683594, 70.61436462402344, -15.99062728881836, -16.47742462158203, 8.84317398071289, 19.765052795410156, -6.539520263671875, 61.205902099609375, 30.619094848632812, 33.936187744140625, -37.051116943359375, 24.39678955078125, 15.174346923828125, 0.91015625, 50.18775939941406, -4.922809600830078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000087.npy"} +{"epoch": 0.1277533039647577, "step": 88, "batch_size": 64, "mean": 18.518863677978516, "std": 32.3276481628418, "min": -60.90460205078125, "p10": -9.1239501953125, "median": 11.53247356414795, "p90": 59.70619049072269, "max": 132.062255859375, "pos_frac": 0.765625, "sample": [9.781890869140625, 10.857879638671875, 65.95783996582031, 65.04098510742188, 34.47425842285156, 48.26325988769531, 4.072231292724609, 63.05430603027344, -9.133773803710938, 13.964803695678711, 31.966827392578125, 7.220878601074219, 3.04168701171875, 46.99224853515625, -23.184951782226562, 30.015426635742188, -20.133331298828125, 15.757102966308594, -46.904052734375, -60.90460205078125, -5.569013595581055, 74.29208374023438, 38.41728973388672, -3.9531097412109375, 44.665855407714844, 5.396078109741211, 29.721221923828125, -9.101028442382812, 23.281965255737305, 23.104248046875, 40.59252166748047, 7.1824951171875, -1.8782386779785156, 28.141983032226562, 1.1656990051269531, 94.23077392578125, 10.692802429199219, 51.8939208984375, 4.81787109375, 132.062255859375, 6.267002105712891, 44.778282165527344, 4.250436782836914, 13.917375564575195, 11.378097534179688, 25.25371551513672, 1.00048828125, -3.0063095092773438, 34.11964416503906, 82.58575439453125, 34.56884002685547, 14.69805908203125, -0.5508041381835938, 10.293907165527344, -51.195709228515625, -5.7046661376953125, 10.97824478149414, -1.7250652313232422, 11.686849594116211, 15.210887908935547, 19.293394088745117, -21.077590942382812, 8.380935668945312, 40.446868896484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000088.npy"} +{"epoch": 0.12922173274596183, "step": 89, "batch_size": 64, "mean": 20.72658920288086, "std": 27.619815826416016, "min": -65.46405029296875, "p10": -2.215394592285156, "median": 17.006900787353516, "p90": 49.10557708740236, "max": 122.94154357910156, "pos_frac": 0.859375, "sample": [5.97552490234375, 1.5741710662841797, 5.675689697265625, 21.78364372253418, -9.534343719482422, 50.6715087890625, 41.19678497314453, 16.326995849609375, -9.205429077148438, 57.33374786376953, 59.533714294433594, 23.78310775756836, 22.574050903320312, 45.45173645019531, 36.515708923339844, 29.707584381103516, 13.31268310546875, 10.12799072265625, -2.969663619995117, 19.70730209350586, 15.496368408203125, 2.988466262817383, 16.73457145690918, 11.56362533569336, -9.5948486328125, 1.4166336059570312, 0.8698501586914062, 10.373872756958008, 75.69612121582031, 19.46624755859375, 28.548995971679688, 88.48104858398438, 16.54879379272461, 44.219215393066406, 21.66107940673828, 1.9947967529296875, 13.33527946472168, 122.94154357910156, 4.635349273681641, -2.299407958984375, 28.78940200805664, -0.7921600341796875, 21.622987747192383, 30.812854766845703, 15.589357376098633, 5.668315887451172, 10.307788848876953, 5.562839508056641, 4.1076202392578125, 35.63710021972656, 21.228309631347656, 41.28271484375, -2.0193634033203125, 17.574020385742188, 42.27825164794922, -65.46405029296875, -25.957672119140625, 27.305519104003906, 17.27923011779785, 23.537212371826172, 23.43218994140625, 24.063865661621094, 92.85093688964844, 7.184379577636719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000089.npy"} +{"epoch": 0.13069016152716592, "step": 90, "batch_size": 64, "mean": 28.227087020874023, "std": 41.6912841796875, "min": -50.83802795410156, "p10": -10.848527336120599, "median": 18.97040557861328, "p90": 91.36948394775392, "max": 149.06085205078125, "pos_frac": 0.796875, "sample": [68.21255493164062, -44.23893737792969, 28.843475341796875, 93.24533081054688, 22.81171417236328, 36.44408416748047, 0.7605934143066406, -4.443660736083984, 57.718658447265625, -1.5202960968017578, 45.18333435058594, 18.51980209350586, -1.0130233764648438, 12.1070556640625, 38.062889099121094, 86.42779541015625, -4.4530487060546875, 149.06085205078125, 53.41658020019531, 1.9988250732421875, 3.103872299194336, 10.719108581542969, 20.127464294433594, -13.428489685058594, 117.00640869140625, 16.8104305267334, 24.071876525878906, 120.29574584960938, 4.787773132324219, 23.577388763427734, 138.54391479492188, 22.25720977783203, -25.678848266601562, 11.41285514831543, -3.535888671875, -50.83802795410156, 27.641841888427734, 8.052757263183594, 62.30366516113281, 5.590782165527344, 13.794776916503906, 12.315261840820312, -22.195022583007812, -4.828615188598633, 21.190505981445312, 86.99250793457031, 100.15054321289062, -35.373748779296875, -30.555191040039062, 11.387619018554688, 37.041168212890625, 1.2064990997314453, 19.421009063720703, 36.70977020263672, 67.97409057617188, 32.59898376464844, 10.658933639526367, 10.556291580200195, 61.04852294921875, 17.269847869873047, 23.955711364746094, 17.317934036254883, 97.08462524414062, 40.84498596191406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000090.npy"} +{"epoch": 0.13215859030837004, "step": 91, "batch_size": 64, "mean": 23.309101104736328, "std": 34.401737213134766, "min": -34.10810089111328, "p10": -19.130560302734374, "median": 21.721559524536133, "p90": 69.64406585693364, "max": 121.14190673828125, "pos_frac": 0.75, "sample": [18.68859100341797, 53.00495910644531, -33.17607116699219, 7.826698303222656, 73.61335754394531, 0.7240982055664062, -18.5615234375, 43.82685470581055, 3.606395721435547, 21.211952209472656, 42.44843292236328, 39.93506622314453, 42.452064514160156, 12.752525329589844, -34.10810089111328, 111.38960266113281, -11.735885620117188, 2.19500732421875, 7.950653076171875, 84.97648620605469, 9.722770690917969, 32.40529251098633, 121.14190673828125, -17.43378448486328, 27.52362823486328, 54.884666442871094, -20.303077697753906, 2.9150028228759766, 22.662220001220703, -5.2898101806640625, -18.725723266601562, 22.143632888793945, 60.38238525390625, -21.4339599609375, 19.117698669433594, 21.826889038085938, 57.31395721435547, -13.791084289550781, 21.616230010986328, -4.354789733886719, 8.619178771972656, 24.504074096679688, 59.31157684326172, 2.2408599853515625, -19.304061889648438, 39.53248596191406, 24.077571868896484, 78.50019836425781, 25.702720642089844, 41.545021057128906, 10.213851928710938, -25.06537628173828, 0.39975929260253906, 29.2025146484375, -2.290342330932617, 42.88013458251953, -0.38555335998535156, 85.77998352050781, 45.415504455566406, 24.02783966064453, 51.699859619140625, -26.232322692871094, 48.10621643066406, 81.95548248291016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000091.npy"} +{"epoch": 0.13362701908957417, "step": 92, "batch_size": 64, "mean": 23.60449981689453, "std": 33.466407775878906, "min": -46.92228698730469, "p10": -10.796741485595703, "median": 17.51198101043701, "p90": 57.90043640136719, "max": 126.91476440429688, "pos_frac": 0.75, "sample": [-4.69940185546875, 5.747833251953125, 7.026275634765625, 46.67676544189453, -7.808460235595703, 6.75860595703125, -46.92228698730469, -16.28936004638672, 48.061683654785156, 24.782794952392578, 66.47555541992188, 0.22510910034179688, 44.307220458984375, 3.0091075897216797, -4.950004577636719, 110.36117553710938, 56.086021423339844, 126.91476440429688, -41.3253173828125, -7.07830810546875, 25.79354476928711, -10.797096252441406, 26.464187622070312, -7.756359100341797, -18.04021453857422, 58.27369689941406, -10.795913696289062, 73.06149291992188, 29.529945373535156, 5.486713409423828, 72.9433364868164, -2.5657272338867188, 35.49174499511719, 0.5671558380126953, 51.67710876464844, 8.332862854003906, 32.268775939941406, 105.02522277832031, 10.155420303344727, 56.96336364746094, 43.98638153076172, 57.02949523925781, -1.796173095703125, -5.641593933105469, 8.89212417602539, 24.833354949951172, 5.34625244140625, 41.09869384765625, 35.46014404296875, 11.140647888183594, 0.36124229431152344, -13.65707015991211, 51.92451858520508, 16.630403518676758, 18.393558502197266, 14.176689147949219, 36.01464080810547, 51.437774658203125, 43.18402862548828, 24.388221740722656, 11.083555221557617, -12.173416137695312, 55.11982727050781, 34.01569366455078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000092.npy"} +{"epoch": 0.13509544787077826, "step": 93, "batch_size": 64, "mean": 23.836261749267578, "std": 29.824560165405273, "min": -36.252403259277344, "p10": -3.636377334594724, "median": 16.723175048828125, "p90": 58.430398559570314, "max": 152.33377075195312, "pos_frac": 0.84375, "sample": [21.172515869140625, 9.023149490356445, 26.46257781982422, 63.222267150878906, 58.50715637207031, 54.99864196777344, 4.5252838134765625, 22.80847930908203, 45.415321350097656, -14.532737731933594, 74.1842041015625, 19.27039337158203, 51.60126495361328, 51.17181396484375, 152.33377075195312, 12.620868682861328, -0.9599609375, 4.6208953857421875, 2.7286148071289062, 87.91657257080078, -8.616447448730469, 12.860576629638672, 21.96459197998047, 0.30724334716796875, 34.0164794921875, 10.973403930664062, 31.39464569091797, 6.309761047363281, -1.1653060913085938, 12.61334228515625, -0.40075111389160156, 14.099632263183594, 9.497817993164062, 20.42111587524414, 32.909019470214844, 53.38743591308594, 64.89232635498047, -23.45557403564453, -4.695407867431641, 49.98200988769531, 44.87610626220703, 35.16604232788086, 36.358848571777344, 6.59837532043457, 10.068742752075195, 8.847671508789062, 29.288909912109375, 2.874713897705078, 58.25129699707031, 1.2023696899414062, -7.607841491699219, 58.65666198730469, 6.64117431640625, 17.334877014160156, 6.232854843139648, 5.4035797119140625, -19.06543731689453, 31.135658264160156, 46.651695251464844, 35.36067199707031, 45.96968078613281, 16.111473083496094, -36.252403259277344, 1.0280818939208984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000093.npy"} +{"epoch": 0.13656387665198239, "step": 94, "batch_size": 64, "mean": 27.329792022705078, "std": 31.28114891052246, "min": -20.617950439453125, "p10": -7.8596807479858395, "median": 24.237979888916016, "p90": 68.56506423950195, "max": 112.87734985351562, "pos_frac": 0.78125, "sample": [26.191238403320312, -17.355133056640625, 45.099456787109375, 19.12140655517578, 5.3586578369140625, -14.87432861328125, 9.686027526855469, 61.205265045166016, 5.94120979309082, -20.617950439453125, 24.331064224243164, 11.478073120117188, 24.89544677734375, 6.8756103515625, 11.821456909179688, 0.09488296508789062, 58.5511474609375, 11.858261108398438, -7.327976226806641, 111.31170654296875, 5.555515289306641, 30.609050750732422, 32.73786163330078, 53.931907653808594, 79.49368286132812, 28.6171875, 13.592620849609375, 29.679359436035156, 21.44167709350586, -3.810699462890625, -4.941930770874023, 17.219812393188477, 36.038780212402344, 59.064605712890625, 43.04185104370117, 19.254817962646484, 36.27000427246094, 67.8332290649414, 34.17262649536133, 24.144895553588867, 5.441747665405273, 45.741668701171875, -2.8005752563476562, -17.110305786132812, 68.87870788574219, 75.14151000976562, -7.452676773071289, 58.866485595703125, 112.87734985351562, -8.034111022949219, -20.523391723632812, 88.64152526855469, 31.431365966796875, 56.199249267578125, -4.269195556640625, 18.256256103515625, -14.761184692382812, -5.534332275390625, 37.196533203125, 15.209747314453125, 36.8864631652832, 56.51849365234375, 72.60980224609375, 52.10319519042969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000094.npy"} +{"epoch": 0.13803230543318648, "step": 95, "batch_size": 64, "mean": 22.231979370117188, "std": 29.796245574951172, "min": -74.62229919433594, "p10": -6.541800689697265, "median": 16.74095344543457, "p90": 63.3253112792969, "max": 93.18487548828125, "pos_frac": 0.828125, "sample": [16.10723876953125, 42.72064971923828, 6.352561950683594, 23.261383056640625, 20.19066047668457, 69.6961669921875, 7.668544769287109, 6.628885269165039, -7.554786682128906, 16.67266082763672, 19.390798568725586, 4.594390869140625, 35.66038131713867, 81.94642639160156, 50.0604248046875, -6.0467987060546875, 38.30785369873047, 3.6391735076904297, 15.38160514831543, 45.97437286376953, 23.4639892578125, 39.9991455078125, 19.652009963989258, 15.928312301635742, 9.759504318237305, -12.524772644042969, 10.420154571533203, 52.541908264160156, 20.425052642822266, 29.1944580078125, -9.884132385253906, -6.753944396972656, 52.93476867675781, 93.18487548828125, 77.6121826171875, 58.27037048339844, 37.12605285644531, 2.141359329223633, 0.515411376953125, 65.49171447753906, 46.02394485473633, 82.86825561523438, 72.69514465332031, 3.7657928466796875, 14.693462371826172, 8.925573348999023, -1.5957279205322266, 32.74983215332031, 29.6884765625, -74.62229919433594, 2.783344268798828, 2.081310272216797, -30.1903076171875, 46.24513244628906, 43.282676696777344, 13.549945831298828, 42.86134338378906, 11.827877044677734, 16.809246063232422, 17.642959594726562, -5.64710807800293, -2.463785171508789, 15.49339485168457, -36.77288055419922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000095.npy"} +{"epoch": 0.1395007342143906, "step": 96, "batch_size": 64, "mean": 30.2021541595459, "std": 36.12517166137695, "min": -54.57609558105469, "p10": -0.3743946075439445, "median": 21.142333984375, "p90": 79.22219314575197, "max": 156.9261016845703, "pos_frac": 0.890625, "sample": [62.83659362792969, 17.87738800048828, 37.67558288574219, 31.77181625366211, 21.399703979492188, 64.68560791015625, 4.4232177734375, 2.808300018310547, 1.3286304473876953, 25.71642303466797, 111.95793151855469, 16.420909881591797, 10.226486206054688, -4.061779022216797, 2.246417999267578, 22.008670806884766, 29.082286834716797, 43.777732849121094, 87.1428451538086, 91.16098022460938, 9.159709930419922, 44.51000213623047, 37.411582946777344, 156.9261016845703, 74.05652618408203, 15.188610076904297, 20.884963989257812, 9.162269592285156, 14.848342895507812, 16.311386108398438, 9.544990539550781, 136.25259399414062, 32.966041564941406, 28.22545623779297, 5.4750213623046875, 89.90884399414062, 59.40275573730469, 6.442070007324219, 36.983543395996094, 2.049783706665039, 47.096717834472656, 26.42189598083496, 23.129180908203125, -9.838340759277344, 15.445640563964844, 0.41975975036621094, -2.0356216430664062, 40.680442810058594, -0.7147464752197266, 24.84490966796875, 12.282745361328125, 58.78404235839844, -14.466022491455078, 5.22125244140625, 65.22573852539062, 11.384559631347656, 16.938180923461914, 64.59864807128906, 81.43605041503906, 1.0360565185546875, -0.7548274993896484, 31.85516357421875, -54.57609558105469, 2.3262863159179688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000096.npy"} +{"epoch": 0.14096916299559473, "step": 97, "batch_size": 64, "mean": 23.385099411010742, "std": 28.15974235534668, "min": -62.521934509277344, "p10": -8.138808441162107, "median": 22.409391403198242, "p90": 57.624321746826176, "max": 108.17071533203125, "pos_frac": 0.78125, "sample": [17.79071044921875, 52.49365234375, 1.95672607421875, 1.871551513671875, -4.536407470703125, -3.1216087341308594, 16.769027709960938, 28.049375534057617, 2.465320587158203, 47.96430969238281, 23.173114776611328, 41.999168395996094, -1.0295448303222656, 18.03449821472168, 79.03460693359375, 62.04920959472656, -21.384124755859375, 35.294769287109375, 0.7183303833007812, 13.741775512695312, 27.048187255859375, 5.797859191894531, 16.28797721862793, -21.141571044921875, 69.18511962890625, 20.223299026489258, 25.872360229492188, 31.340415954589844, 41.07221984863281, 14.690082550048828, 27.24333953857422, -5.571996688842773, 17.453659057617188, -11.664339065551758, 49.07862091064453, 42.222015380859375, 49.047454833984375, 30.028133392333984, -62.521934509277344, -9.288936614990234, 49.48493957519531, 32.490299224853516, -9.892364501953125, 8.187431335449219, 56.450767517089844, 34.984981536865234, 1.2230453491210938, 21.645668029785156, 23.7597599029541, 17.766029357910156, -4.859052658081055, 58.12727355957031, -9.238870620727539, 51.32272720336914, -0.5608329772949219, 65.21702575683594, -1.1347427368164062, 44.24641418457031, 49.910369873046875, 63.22804260253906, 32.59577560424805, 8.951068878173828, 108.17071533203125, 24.83343505859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000097.npy"} +{"epoch": 0.14243759177679882, "step": 98, "batch_size": 64, "mean": 26.731998443603516, "std": 37.19391632080078, "min": -101.63946533203125, "p10": -13.150492095947262, "median": 29.05353546142578, "p90": 68.63235397338867, "max": 127.72702026367188, "pos_frac": 0.796875, "sample": [41.286865234375, 6.593235015869141, 65.58856201171875, 44.17554473876953, 6.490745544433594, 20.893526077270508, 23.545143127441406, 26.254539489746094, 29.056350708007812, 7.335121154785156, 30.99420738220215, -8.43853759765625, 20.826202392578125, 24.244285583496094, 67.63752746582031, 51.01996612548828, -14.55588150024414, 3.297882080078125, -0.7327194213867188, -23.423202514648438, 41.03181076049805, 35.56047821044922, -30.298988342285156, -38.728973388671875, 56.627281188964844, 53.217105865478516, 31.843276977539062, 69.05870819091797, 70.47355651855469, 26.351673126220703, 40.35957336425781, -2.5967254638671875, 62.994361877441406, 127.72702026367188, -101.63946533203125, 83.91218566894531, 59.69647216796875, 29.05072021484375, 46.5859375, 17.30907440185547, -9.87125015258789, 40.317848205566406, -56.71916198730469, 25.584136962890625, 37.443389892578125, 3.3015518188476562, 40.097251892089844, 83.72698974609375, 60.48199462890625, 36.98732376098633, 15.872329711914062, 9.817062377929688, 74.62225341796875, 106.139404296875, 36.042152404785156, 31.83713150024414, 30.679931640625, 13.438322067260742, 4.595424652099609, -8.841617584228516, 53.70825958251953, -3.8790645599365234, 3.5478897094726562, -18.706092834472656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000098.npy"} +{"epoch": 0.14390602055800295, "step": 99, "batch_size": 64, "mean": 32.502403259277344, "std": 41.242069244384766, "min": -33.50464630126953, "p10": -7.966613006591795, "median": 21.281310081481934, "p90": 84.28812255859376, "max": 177.9186553955078, "pos_frac": 0.828125, "sample": [-1.3930225372314453, -8.782707214355469, 6.017459869384766, 21.001529693603516, 3.138660430908203, 43.39663314819336, 26.704147338867188, 30.350059509277344, 109.011962890625, 21.017690658569336, 23.365882873535156, 122.57958984375, 8.300548553466797, -30.163780212402344, 85.18342590332031, -4.8400115966796875, 20.892295837402344, 37.16822814941406, -1.2221717834472656, 17.67853546142578, 74.25469970703125, 14.314262390136719, 11.135011672973633, 158.0574951171875, 9.25262451171875, 59.32598876953125, 2.4215545654296875, -24.15465545654297, 10.485414505004883, -33.50464630126953, -9.702842712402344, 53.78633117675781, 38.649253845214844, 26.16482925415039, 63.0146484375, 177.9186553955078, 74.84529113769531, 38.24777603149414, 16.746078491210938, 19.567764282226562, 5.008182525634766, 30.20435333251953, 38.22676086425781, 82.19908142089844, 100.01346588134766, 5.541969299316406, 12.904045104980469, 66.69281005859375, -6.0623931884765625, 1.047525405883789, 32.29261779785156, 88.26335144042969, 30.410686492919922, 10.782588958740234, 57.19902038574219, -9.716796875, 41.845279693603516, 14.822471618652344, 65.82452392578125, 52.171424865722656, 21.54492950439453, 47.20856475830078, 4.068424224853516, -22.569442749023438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000099.npy"} +{"epoch": 0.14537444933920704, "step": 100, "batch_size": 64, "mean": 19.239667892456055, "std": 35.788570404052734, "min": -74.70394897460938, "p10": -18.775580024719233, "median": 19.737937927246094, "p90": 64.47654647827149, "max": 144.22515869140625, "pos_frac": 0.765625, "sample": [53.43620300292969, 31.278406143188477, -6.523130416870117, 14.938579559326172, 23.252033233642578, -8.039688110351562, 32.570594787597656, 26.171669006347656, 7.310070037841797, 41.159637451171875, 11.686454772949219, 0.9916915893554688, 37.999420166015625, 24.154094696044922, 4.264982223510742, 79.19381713867188, 19.828277587890625, -45.33662414550781, -30.65472412109375, -51.738861083984375, -13.412267684936523, -10.604804992675781, 1.4605445861816406, -60.97811508178711, 55.15031814575195, 19.647598266601562, 50.070289611816406, 23.893417358398438, 1.1893692016601562, 5.427091598510742, 43.36540985107422, -1.925872802734375, 44.96397399902344, 4.921436309814453, 67.2890625, 30.080352783203125, 8.770978927612305, 11.2730712890625, 62.806190490722656, 25.27301025390625, 39.99953079223633, 15.994461059570312, 7.5022430419921875, 1.4559497833251953, 30.88709259033203, 34.3753662109375, -8.391281127929688, -27.047454833984375, -7.0583953857421875, 67.60406494140625, 37.217979431152344, 69.56365203857422, 24.75274085998535, -74.70394897460938, 144.22515869140625, -11.518415451049805, 15.718719482421875, 80.43075561523438, -21.074142456054688, 65.19241333007812, 46.08824157714844, 7.633445739746094, 37.724517822265625, 20.162059783935547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000100.npy"} +{"epoch": 0.14684287812041116, "step": 101, "batch_size": 64, "mean": 23.208131790161133, "std": 40.0408935546875, "min": -52.93267059326172, "p10": -19.66166496276855, "median": 18.265384674072266, "p90": 84.4170242309571, "max": 132.154541015625, "pos_frac": 0.6875, "sample": [96.02580261230469, 132.154541015625, 29.76384735107422, 29.534210205078125, -14.276687622070312, 102.6163101196289, 106.18569946289062, 10.548141479492188, 91.52084350585938, 107.48863220214844, -34.272125244140625, 28.699798583984375, -38.92015838623047, 47.49993896484375, 13.12728500366211, -7.996391296386719, -20.53919219970703, 14.116281509399414, 61.15363311767578, 27.424453735351562, -13.325691223144531, 25.62786865234375, 51.869476318359375, 62.34331512451172, 18.27264404296875, 18.25812530517578, 9.660991668701172, 65.47469329833984, 115.290283203125, 26.057884216308594, -6.1449737548828125, -6.591215133666992, -13.831344604492188, 51.12582015991211, 17.93317985534668, 38.36100769042969, 67.84144592285156, 45.44194030761719, -6.619903564453125, 7.013370513916016, -2.5608787536621094, -17.61410140991211, 19.85222625732422, -6.818817138671875, 0.2279510498046875, -15.323263168334961, 30.44086456298828, 32.40139389038086, 8.806041717529297, 25.24272346496582, -52.93267059326172, 35.410797119140625, 2.4886016845703125, -30.358013153076172, 25.97427749633789, 16.6337947845459, -14.75106430053711, 38.71842575073242, 7.3540191650390625, -1.8094196319580078, 51.481170654296875, -27.506900787353516, 39.37809753417969, -35.32862854003906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000101.npy"} +{"epoch": 0.14831130690161526, "step": 102, "batch_size": 64, "mean": 17.13527488708496, "std": 37.78377151489258, "min": -77.0101318359375, "p10": -21.81753997802734, "median": 11.732864379882812, "p90": 56.20214233398438, "max": 134.6588592529297, "pos_frac": 0.65625, "sample": [2.9432201385498047, 39.743568420410156, 56.938011169433594, 12.28131103515625, 5.1375579833984375, 5.198066711425781, -15.8377685546875, -44.17454528808594, 21.50678825378418, -72.76409912109375, 28.0819091796875, 87.23751831054688, -11.058128356933594, 37.679412841796875, 28.044017791748047, -8.824115753173828, 11.184417724609375, 34.63898468017578, 48.97785186767578, 1.0297431945800781, -14.994468688964844, -4.0908660888671875, 46.00653076171875, -6.014326095581055, 57.65061950683594, 21.949520111083984, 37.22345733642578, 134.6588592529297, -3.332366943359375, -0.09503936767578125, 108.7533950805664, -0.335357666015625, 37.461483001708984, 42.15385437011719, 4.00160026550293, -16.02672576904297, 30.99065589904785, -77.0101318359375, 0.6606903076171875, -28.020301818847656, 0.9534091949462891, -43.89495849609375, -22.742034912109375, -3.5042037963867188, 54.48511505126953, 28.491546630859375, 41.47997283935547, 81.34231567382812, 43.73284912109375, 8.00311279296875, 68.4637680053711, -26.457595825195312, -6.153350830078125, 6.523113250732422, 53.89568328857422, -19.660385131835938, 38.16142272949219, 33.664894104003906, 24.16668128967285, 36.66693878173828, -11.25152587890625, -15.725311279296875, 41.20671081542969, 45.254581451416016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000102.npy"} +{"epoch": 0.14977973568281938, "step": 103, "batch_size": 64, "mean": 34.391334533691406, "std": 41.59410858154297, "min": -36.36299133300781, "p10": -11.938301086425781, "median": 28.310373306274414, "p90": 79.47033615112305, "max": 178.87353515625, "pos_frac": 0.796875, "sample": [-6.001522064208984, 8.382568359375, 14.189962387084961, 33.130287170410156, -19.337692260742188, -24.819732666015625, 33.66432571411133, 39.39353942871094, 13.687776565551758, 19.781795501708984, 27.656909942626953, 44.404518127441406, 17.587112426757812, 19.94335174560547, -0.8438186645507812, 65.55929565429688, 110.65484619140625, 35.25989532470703, 49.1458740234375, 59.988380432128906, -33.34443664550781, -12.320281982421875, 47.59013366699219, 28.963836669921875, -36.36299133300781, 74.15192413330078, 35.290687561035156, -16.284423828125, 14.860885620117188, 19.48217010498047, 37.23286437988281, 161.83755493164062, 25.092056274414062, 19.885120391845703, 70.06552124023438, 67.08431243896484, -5.686210632324219, -11.047012329101562, 21.528583526611328, 17.56922721862793, -4.898902893066406, 39.623687744140625, 62.67390441894531, -27.873626708984375, 22.618316650390625, 99.81005096435547, 77.60836791992188, 26.480751037597656, 178.87353515625, 98.44621276855469, 102.90271759033203, 80.07139587402344, 12.476470947265625, 39.06715393066406, 66.81681823730469, -1.5716590881347656, 31.3767147064209, 3.416685104370117, 4.632289886474609, 50.125038146972656, 78.06786346435547, 16.14240074157715, 32.01446533203125, 45.12781524658203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000103.npy"} +{"epoch": 0.1512481644640235, "step": 104, "batch_size": 64, "mean": 32.55640411376953, "std": 47.21324157714844, "min": -64.38365173339844, "p10": -12.161727714538573, "median": 19.383869171142578, "p90": 90.03089904785156, "max": 184.71368408203125, "pos_frac": 0.75, "sample": [89.71078491210938, -6.877204895019531, 13.833328247070312, 14.642505645751953, 60.95420837402344, -5.1094970703125, -26.67188262939453, 2.9424514770507812, -2.6075820922851562, 37.26012420654297, -12.940519332885742, 33.855472564697266, 147.25344848632812, 51.65605926513672, 84.32547760009766, -2.7188949584960938, 90.1680908203125, 31.95942497253418, -17.706100463867188, 114.24957275390625, 36.96940612792969, 5.3908843994140625, 19.750591278076172, 7.157341003417969, 13.684412002563477, 2.228086471557617, 184.71368408203125, -43.84833526611328, 56.605499267578125, 132.4660186767578, 134.85975646972656, 29.60406494140625, 13.603141784667969, 64.56114196777344, 12.545614242553711, 14.565750122070312, -10.344547271728516, -22.25981903076172, 111.81396484375, 15.031303405761719, 50.660255432128906, -6.6433258056640625, 68.25872802734375, -3.1092758178710938, 83.72007751464844, 37.98981475830078, 32.04450225830078, 50.6573371887207, 36.524688720703125, 32.18458557128906, 4.440753936767578, 89.71005249023438, 8.402595520019531, 27.49419403076172, 62.501800537109375, 67.44884490966797, -7.005285263061523, 27.527746200561523, -0.3509063720703125, 19.017147064208984, -21.384422302246094, 1.7829265594482422, -64.38365173339844, 8.843561172485352], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000104.npy"} +{"epoch": 0.1527165932452276, "step": 105, "batch_size": 64, "mean": 46.19757843017578, "std": 41.30510330200195, "min": -64.43658447265625, "p10": 2.876650238037111, "median": 43.8984260559082, "p90": 99.62243194580081, "max": 176.6494903564453, "pos_frac": 0.90625, "sample": [46.81153869628906, 31.537399291992188, 45.264930725097656, 29.706344604492188, 41.773067474365234, 44.83563995361328, 72.86801147460938, 109.26811981201172, 9.804349899291992, -64.43658447265625, -6.4071502685546875, 80.98397827148438, 81.11264038085938, 13.363668441772461, 35.82518768310547, 48.082584381103516, -32.30801010131836, 2.2261886596679688, -5.872283935546875, 106.96556854248047, 31.801231384277344, 106.50440979003906, 69.06858825683594, 18.70886993408203, 11.129547119140625, 80.8875503540039, 62.541412353515625, 42.88642883300781, 9.504039764404297, 37.90618133544922, 64.4483642578125, 163.41708374023438, 21.529052734375, 50.829017639160156, 18.060943603515625, 19.361175537109375, 69.18384552001953, 89.28848266601562, 46.117462158203125, 14.798995971679688, 50.084716796875, 91.81236267089844, 35.28209686279297, 37.71673583984375, 42.961212158203125, 51.81398010253906, -1.2479591369628906, 64.86717224121094, 68.24293518066406, 102.9696044921875, 36.19819641113281, 22.106590270996094, -17.612060546875, 9.669078826904297, 64.84931945800781, 60.46116638183594, 54.92976379394531, 109.37652587890625, 59.778717041015625, 176.6494903564453, 24.536575317382812, 4.3943939208984375, 12.63504409790039, 74.79133605957031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000105.npy"} +{"epoch": 0.15418502202643172, "step": 106, "batch_size": 64, "mean": 28.927536010742188, "std": 35.46253204345703, "min": -52.652992248535156, "p10": -12.865041351318352, "median": 29.560436248779297, "p90": 75.69280548095703, "max": 131.78109741210938, "pos_frac": 0.84375, "sample": [28.099197387695312, 5.192502975463867, 8.243436813354492, -4.2825164794921875, 18.041030883789062, 1.3418407440185547, 47.33967590332031, -6.163734436035156, 30.097244262695312, 67.36772155761719, 36.23002243041992, 36.364959716796875, 2.625530242919922, 74.57917785644531, 26.00579071044922, 90.00129699707031, 6.951383590698242, 12.112049102783203, 76.17007446289062, 1.2876396179199219, 62.36016845703125, 40.96511459350586, 2.2731781005859375, 84.73297119140625, 23.784421920776367, 42.505287170410156, 7.508613586425781, -15.737030029296875, 52.962013244628906, -34.31975555419922, 48.669944763183594, 14.721923828125, 41.14445114135742, 40.23427963256836, -2.6506214141845703, 5.2270355224609375, 23.44329833984375, 1.8573150634765625, 88.70097351074219, 63.13587188720703, 63.06916809082031, 24.982349395751953, -52.652992248535156, 4.919242858886719, 131.78109741210938, 37.22462463378906, 29.02362823486328, 34.822166442871094, 32.01971435546875, 8.008026123046875, 0.4001941680908203, 56.853118896484375, 103.17774200439453, 35.73204803466797, 55.002899169921875, 55.56547546386719, -23.004989624023438, 33.65308380126953, -21.72119140625, 30.262374877929688, 79.95205688476562, -40.588478088378906, -22.945663452148438, 46.70288848876953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000106.npy"} +{"epoch": 0.15565345080763582, "step": 107, "batch_size": 64, "mean": 32.75652313232422, "std": 42.06280517578125, "min": -87.74777221679688, "p10": -12.6854543685913, "median": 22.30781650543213, "p90": 84.12707901000977, "max": 137.89906311035156, "pos_frac": 0.859375, "sample": [83.5419921875, 109.70796203613281, 47.47076416015625, 68.41621398925781, 25.572555541992188, 30.72797393798828, 50.2518310546875, -3.5200443267822266, -87.74777221679688, 8.593816757202148, 16.16836929321289, 76.16629028320312, -36.308799743652344, 28.255115509033203, 9.982728958129883, 8.472648620605469, 15.187324523925781, 84.3778305053711, 1.3871116638183594, -26.29242706298828, 104.2011489868164, 66.70418548583984, 78.80364990234375, 6.638078689575195, -17.380203247070312, 4.020151138305664, -1.1909141540527344, 5.410499572753906, 137.89906311035156, 72.3784408569336, 19.030742645263672, 16.214515686035156, 95.30193328857422, 44.483551025390625, 64.83589172363281, 87.06656646728516, 16.427988052368164, 28.238304138183594, 16.306013107299805, -16.613487243652344, 61.92704772949219, -46.33240509033203, 64.60395812988281, 74.61483764648438, 11.115882873535156, 81.99031829833984, 0.8688812255859375, 81.66374206542969, 48.61653137207031, 52.978179931640625, 23.158966064453125, 21.456666946411133, 5.345672607421875, -27.639076232910156, 1.3454360961914062, 5.163787841796875, 12.672439575195312, 73.49240112304688, 10.415863037109375, 23.912912368774414, 65.54583740234375, 97.646728515625, 2.6168975830078125, 10.048477172851562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000107.npy"} +{"epoch": 0.15712187958883994, "step": 108, "batch_size": 64, "mean": 39.17702102661133, "std": 56.82441329956055, "min": -59.98408508300781, "p10": -23.38497714996338, "median": 27.84130859375, "p90": 121.30213775634769, "max": 208.5067138671875, "pos_frac": 0.765625, "sample": [35.52565002441406, 132.8231964111328, 39.03673553466797, -43.592987060546875, -59.98408508300781, 104.119873046875, 30.52378273010254, 8.890342712402344, 208.5067138671875, -0.25481414794921875, -12.348548889160156, 126.7035140991211, 8.596145629882812, 6.7590484619140625, -6.036762237548828, 72.69539642333984, -6.868877410888672, 95.65443420410156, 158.58277893066406, 105.45050811767578, 114.30046081542969, 81.93330383300781, 1.5725898742675781, -22.73377799987793, 47.81282043457031, 10.757513046264648, 26.04393768310547, 15.378780364990234, 75.60104370117188, 4.896213531494141, 25.242206573486328, 0.5258941650390625, 70.28907012939453, -23.6640625, 171.96127319335938, -38.78388977050781, 78.4595947265625, -30.27871322631836, -14.833236694335938, 4.01671028137207, 27.33275604248047, 7.2572479248046875, 49.65736389160156, 150.83349609375, 33.682106018066406, 9.906990051269531, 87.18671417236328, 72.46476745605469, 30.261186599731445, -50.24517822265625, 15.109046936035156, 50.98381805419922, 124.3028564453125, 62.63323974609375, -1.41046142578125, -1.357187271118164, 39.736061096191406, 15.571006774902344, 72.97506713867188, -50.379642486572266, 28.34986114501953, 42.00772476196289, 64.14515686035156, 23.045547485351562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000108.npy"} +{"epoch": 0.15859030837004406, "step": 109, "batch_size": 64, "mean": 41.90185546875, "std": 49.764122009277344, "min": -83.11285400390625, "p10": -10.19664306640625, "median": 35.163631439208984, "p90": 105.73259582519532, "max": 184.87228393554688, "pos_frac": 0.84375, "sample": [48.72200012207031, 8.096179962158203, 132.45069885253906, 84.85974884033203, 29.75292205810547, 53.37421798706055, -56.72483825683594, 11.459213256835938, 17.640151977539062, 49.22105407714844, 128.97872924804688, 42.5666618347168, 71.72396850585938, 53.69969177246094, 53.968536376953125, 5.713197708129883, 21.904659271240234, 36.936073303222656, -12.727592468261719, -5.198368072509766, -6.224235534667969, 184.87228393554688, 0.5435256958007812, -10.283966064453125, 25.211305618286133, 4.607086181640625, 124.7783203125, 30.60740852355957, 31.776601791381836, -37.7994499206543, 63.02772521972656, 67.49427795410156, 9.133359909057617, 68.73312377929688, 56.732452392578125, 38.093223571777344, 96.84596252441406, 70.8803939819336, 106.77678680419922, 102.74202728271484, 0.6150741577148438, 12.477401733398438, -83.11285400390625, 28.86560821533203, 1.9951171875, 71.77265167236328, 31.9034423828125, 33.587406158447266, 19.621187210083008, 81.18055725097656, -47.615386962890625, 33.9407958984375, 36.70383834838867, 108.64497375488281, 126.02165222167969, 35.36614227294922, 86.21874237060547, 34.96112060546875, 89.63337707519531, 103.29615020751953, -9.992889404296875, -22.870330810546875, 2.276670455932617, 101.26334381103516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000109.npy"} +{"epoch": 0.16005873715124816, "step": 110, "batch_size": 64, "mean": 37.83190155029297, "std": 53.00170135498047, "min": -96.42166137695312, "p10": -14.197290802001952, "median": 32.8746976852417, "p90": 109.7043869018555, "max": 179.51242065429688, "pos_frac": 0.828125, "sample": [118.45149230957031, 54.009857177734375, 21.106319427490234, 52.90692138671875, 57.269065856933594, 36.71941375732422, 48.68010330200195, 1.8615760803222656, -96.42166137695312, 44.66719055175781, 66.62448120117188, 16.448455810546875, -62.64414978027344, 18.293519973754883, 11.75533676147461, 0.9408435821533203, 30.2410831451416, 167.05230712890625, 102.38404846191406, 26.7703857421875, 4.427587509155273, -8.335071563720703, 179.51242065429688, 134.18161010742188, 99.45707702636719, 77.14976501464844, 17.720298767089844, -16.799148559570312, -13.538322448730469, 59.262176513671875, -42.64286804199219, -14.479705810546875, 9.873926162719727, 18.799625396728516, 5.884189605712891, 155.96646118164062, 53.26930236816406, 83.54557800292969, 112.8416748046875, 4.371131896972656, 76.039794921875, -8.018546104431152, 38.23687744140625, 36.52418899536133, 14.368099212646484, 11.611358642578125, 79.77227020263672, -58.06060791015625, 75.4232177734375, -24.850692749023438, 38.47187042236328, 1.6371822357177734, 54.86815643310547, 35.5083122253418, 50.31056594848633, 43.666290283203125, 19.476821899414062, 19.941864013671875, 18.335308074951172, 44.900428771972656, 77.89622497558594, 2.260082244873047, -4.214061737060547, 139.55230712890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000110.npy"} +{"epoch": 0.16152716593245228, "step": 111, "batch_size": 64, "mean": 42.87803649902344, "std": 58.91316223144531, "min": -84.30311584472656, "p10": -31.93306274414062, "median": 32.723388671875, "p90": 118.4962600708008, "max": 193.18728637695312, "pos_frac": 0.765625, "sample": [193.18728637695312, -50.7633056640625, -83.49380493164062, 120.31512451171875, -6.72796630859375, 53.682708740234375, 13.587135314941406, 33.00440216064453, 26.822120666503906, 145.09400939941406, 31.994415283203125, 77.33377075195312, 42.79176330566406, 55.174407958984375, 28.405776977539062, 109.59747314453125, 110.4435043334961, -34.28338623046875, 97.90199279785156, 7.708234786987305, 79.26116180419922, -26.448974609375, -20.57418441772461, -9.452392578125, 64.70280456542969, 114.25224304199219, -6.599601745605469, 112.06776428222656, -34.55335998535156, 10.935195922851562, -23.689102172851562, 32.44237518310547, 132.53952026367188, 22.74262237548828, 148.21694946289062, 55.52473449707031, 29.63187026977539, 54.6786003112793, 66.96337127685547, 129.57958984375, 23.532546997070312, -41.06292724609375, 19.21302032470703, 28.692562103271484, 134.55288696289062, 69.19819641113281, 28.269180297851562, 59.51594543457031, 112.42466735839844, 2.2336978912353516, 111.73689270019531, 37.81394958496094, 92.5108413696289, 68.78631591796875, 49.92405700683594, 13.750482559204102, -84.30311584472656, 31.20539093017578, 27.819448471069336, 40.771697998046875, -5.256217956542969, 92.05010986328125, -62.33805847167969, -10.844276428222656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000111.npy"} +{"epoch": 0.16299559471365638, "step": 112, "batch_size": 64, "mean": 27.452232360839844, "std": 40.44499588012695, "min": -58.714088439941406, "p10": -24.229346466064452, "median": 24.879663467407227, "p90": 91.08398590087893, "max": 120.95355224609375, "pos_frac": 0.71875, "sample": [42.495121002197266, 120.95355224609375, 85.29252624511719, -6.631557464599609, -42.830753326416016, 32.58982849121094, 37.18614196777344, 31.892364501953125, 28.116897583007812, -5.802085876464844, 99.810791015625, 78.80447387695312, -0.2194538116455078, -2.9227371215820312, 93.5660400390625, 61.7071418762207, 38.70249557495117, 57.037689208984375, 41.1099967956543, 37.941375732421875, 45.619537353515625, 30.15966796875, 69.4403076171875, -14.61737060546875, 24.712600708007812, 22.34828758239746, 30.501136779785156, 32.74540328979492, -24.287841796875, 17.567394256591797, 3.427804946899414, -33.16605758666992, 31.135459899902344, 107.5897445678711, 109.87078094482422, -32.04756164550781, 40.288330078125, 14.80300521850586, 71.51754760742188, -0.37737464904785156, -58.714088439941406, 95.95586395263672, 25.04672622680664, 13.172225952148438, -11.649917602539062, 19.956279754638672, 61.139892578125, -0.06998634338378906, 15.308082580566406, 97.29733276367188, 24.596939086914062, 8.40811538696289, 44.28227233886719, -40.56121826171875, 11.816377639770508, -24.092857360839844, -25.318984985351562, -9.840263366699219, 72.59619903564453, 1.221221923828125, 24.457368850708008, 5.3794708251953125, -8.615463256835938, 39.14070129394531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000112.npy"} +{"epoch": 0.1644640234948605, "step": 113, "batch_size": 64, "mean": 29.506242752075195, "std": 38.20697784423828, "min": -39.20936584472656, "p10": -15.918980216979978, "median": 31.655122756958008, "p90": 82.0059410095215, "max": 118.00658416748047, "pos_frac": 0.734375, "sample": [-32.46240234375, 75.87986755371094, -17.24001693725586, 88.8624267578125, 9.515426635742188, 38.44245147705078, 14.556259155273438, 36.411842346191406, 3.6156368255615234, -9.277664184570312, 8.733909606933594, 84.8271484375, 54.40345001220703, 110.31784057617188, 38.09900665283203, -18.564939498901367, -6.3890380859375, -9.222648620605469, 9.463714599609375, 65.70472717285156, -20.1632080078125, 47.05712890625, 79.23719787597656, -22.115997314453125, 53.46842956542969, -7.2526397705078125, 2.714048385620117, 118.00658416748047, 48.044517517089844, -39.20936584472656, 18.514564514160156, 74.31199645996094, 35.68124771118164, -10.417724609375, 37.18562316894531, 52.33599090576172, 13.759803771972656, 82.20269012451172, 20.88629150390625, -11.523757934570312, 52.17992401123047, 69.97566986083984, -12.83656120300293, 1.015237808227539, -24.298904418945312, 44.90989303588867, 35.43059539794922, 40.610565185546875, -9.898178100585938, 15.380298614501953, 112.53274536132812, -2.7665977478027344, 33.814727783203125, 32.54383850097656, 97.07801055908203, 81.54685974121094, 33.41106033325195, 62.63441467285156, -7.11143684387207, 9.735549926757812, 12.318084716796875, 14.440372467041016, 30.766407012939453, 46.5865478515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000113.npy"} +{"epoch": 0.16593245227606462, "step": 114, "batch_size": 64, "mean": 47.027557373046875, "std": 64.63849639892578, "min": -41.00023651123047, "p10": -27.842127990722656, "median": 42.99824142456055, "p90": 110.94941329956059, "max": 350.599365234375, "pos_frac": 0.765625, "sample": [178.52322387695312, 89.67395782470703, 53.953453063964844, 50.480857849121094, 27.289772033691406, 116.95405578613281, 20.082273483276367, 45.640472412109375, -35.58912658691406, 67.63218688964844, -31.5628719329834, 73.6278076171875, 44.24541473388672, 20.877952575683594, -37.21166229248047, 83.41853332519531, 14.98459243774414, -31.895645141601562, 33.59767150878906, 124.13975524902344, 12.957674026489258, 54.72154235839844, -5.41845703125, 115.47753143310547, 41.03496551513672, 59.62250518798828, 34.86907196044922, 24.715389251708984, 36.839935302734375, 60.127567291259766, 64.94385528564453, -5.082756042480469, -8.941986083984375, 56.0816650390625, -25.756729125976562, 15.513677597045898, 49.313232421875, 68.90411376953125, 43.08892822265625, 100.38380432128906, 59.52431106567383, 87.68228149414062, 5.675090789794922, 49.65203094482422, 39.2242317199707, -13.529609680175781, 232.3474884033203, 6.561441421508789, 42.907554626464844, 143.18963623046875, -10.274389266967773, 72.36454772949219, -41.00023651123047, -28.735870361328125, 350.599365234375, 57.326053619384766, -4.181308746337891, 86.28054809570312, 34.38200378417969, -32.25343704223633, 26.866737365722656, 88.08810424804688, -18.905254364013672, 73.71434020996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000114.npy"} +{"epoch": 0.16740088105726872, "step": 115, "batch_size": 64, "mean": 32.295928955078125, "std": 65.49223327636719, "min": -151.76416015625, "p10": -27.20351638793945, "median": 19.100200653076172, "p90": 125.5210182189942, "max": 235.44894409179688, "pos_frac": 0.75, "sample": [0.31207275390625, -2.7215919494628906, 132.17616271972656, 58.14338684082031, -52.37744140625, -0.60479736328125, -47.59333038330078, 10.35489273071289, 12.64815902709961, 17.727752685546875, 25.170654296875, -7.263885498046875, 43.34316635131836, 57.212806701660156, 11.764001846313477, 167.08653259277344, 21.280420303344727, -7.7841644287109375, 235.44894409179688, -11.485067367553711, 73.67037200927734, 5.97955322265625, 139.625, 70.23206329345703, 170.72763061523438, -27.44061279296875, 23.680389404296875, 26.189815521240234, 7.894643783569336, 98.11860656738281, 171.9047393798828, 97.78094482421875, 0.958709716796875, -122.90023040771484, 9.38320541381836, 34.36687469482422, 58.98561096191406, 16.304100036621094, 9.706329345703125, 43.03423309326172, 27.251739501953125, 49.19001770019531, 25.809188842773438, 30.300697326660156, 145.52560424804688, -27.626007080078125, 21.338645935058594, 16.708778381347656, -8.066070556640625, 11.077812194824219, 20.47264862060547, 77.30644226074219, 3.2011642456054688, 14.394298553466797, -20.789072036743164, 109.99234771728516, 65.755859375, -16.161863327026367, 63.28593444824219, -151.76416015625, 107.60675048828125, -47.35327911376953, 5.091386795043945, -26.650291442871094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000115.npy"} +{"epoch": 0.16886930983847284, "step": 116, "batch_size": 64, "mean": 35.76426315307617, "std": 45.75696563720703, "min": -35.92939758300781, "p10": -15.212896919250486, "median": 30.528362274169922, "p90": 89.32471313476563, "max": 211.38636779785156, "pos_frac": 0.78125, "sample": [7.482624053955078, 2.5011329650878906, -32.8023567199707, -29.87206268310547, 25.22678565979004, 44.68798828125, 62.755767822265625, 23.057655334472656, 49.01988983154297, 59.79545593261719, 30.27143096923828, -8.761697769165039, 41.60626220703125, 28.17717742919922, 9.211837768554688, -6.539548873901367, 52.834877014160156, -35.92939758300781, 82.48655700683594, 87.79756164550781, 40.10002136230469, -15.749298095703125, 23.132980346679688, 17.90338897705078, 49.112815856933594, 25.064254760742188, 76.01971435546875, 105.92782592773438, 31.73870086669922, -32.75425720214844, 121.49942016601562, 35.86924743652344, 23.097835540771484, 43.10069274902344, -13.961294174194336, -25.702293395996094, 4.391246795654297, 9.609375, -3.303346633911133, 100.41415405273438, 4.710758209228516, 130.7514190673828, 30.785293579101562, 88.14976501464844, 77.44728088378906, 34.450870513916016, 54.422096252441406, -33.18255615234375, -8.590740203857422, 81.72283172607422, 23.860576629638672, -6.13275146484375, 211.38636779785156, 65.33985137939453, 12.4849853515625, 49.912254333496094, 66.09393310546875, 62.469730377197266, -10.48454475402832, 13.807647705078125, 0.7893943786621094, 41.593536376953125, 98.77742004394531, 89.82826232910156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000116.npy"} +{"epoch": 0.17033773861967694, "step": 117, "batch_size": 64, "mean": 29.33287239074707, "std": 50.608604431152344, "min": -100.14729309082031, "p10": -25.592084121704094, "median": 23.903563499450684, "p90": 86.1594192504883, "max": 186.491943359375, "pos_frac": 0.765625, "sample": [13.186748504638672, 53.01598358154297, 50.27308654785156, -16.855358123779297, 20.85799217224121, -2.7553787231445312, 19.270986557006836, 39.226051330566406, 19.801605224609375, 39.49726867675781, 43.80611038208008, 66.99923706054688, 131.38632202148438, 112.72702026367188, 16.810279846191406, 45.59236145019531, -56.31626892089844, 88.43780517578125, 4.8974151611328125, -94.92788696289062, -29.336395263671875, 8.763984680175781, 16.982213973999023, 61.461612701416016, 45.26649856567383, -12.893218994140625, -32.2060546875, 3.6599369049072266, 26.949134826660156, -13.689826965332031, 38.93342590332031, 120.119384765625, -3.7842941284179688, 4.678131103515625, 147.15875244140625, 15.004890441894531, 186.491943359375, 14.343231201171875, 30.182540893554688, 64.27962493896484, 15.497905731201172, 9.213878631591797, 7.799339294433594, -8.754730224609375, -100.14729309082031, 30.35692596435547, 80.7489013671875, 32.20099639892578, 90.27259063720703, 38.32157897949219, -45.32707214355469, -9.850481033325195, 7.401805877685547, 54.619197845458984, 80.84318542480469, 79.51887512207031, 6.299694061279297, 49.30116271972656, 71.91486358642578, 28.42584228515625, 42.309471130371094, -3.1161766052246094, -37.03205871582031, 69.18850708007812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000117.npy"} +{"epoch": 0.17180616740088106, "step": 118, "batch_size": 64, "mean": 35.825435638427734, "std": 46.314544677734375, "min": -53.457733154296875, "p10": -12.20424766540527, "median": 23.826995849609375, "p90": 108.2766525268555, "max": 158.86428833007812, "pos_frac": 0.765625, "sample": [-6.9847564697265625, 20.720844268798828, 0.4651508331298828, -25.765975952148438, 20.242189407348633, 15.67026138305664, 58.785831451416016, 42.98695373535156, 46.96466064453125, 111.60249328613281, 86.35107421875, 22.94561767578125, 124.35577392578125, 30.735713958740234, -7.533805847167969, 54.51783752441406, -18.025768280029297, -13.684127807617188, 32.93339920043945, 158.86428833007812, 119.28482055664062, -8.75119400024414, -53.457733154296875, 2.6863632202148438, -39.10643005371094, 21.484464645385742, 2.4652042388916016, 33.293739318847656, 13.849842071533203, 15.143325805664062, 11.861763000488281, 74.63417053222656, 4.381439208984375, 75.38774108886719, 24.344451904296875, 48.43699645996094, 23.218482971191406, 41.86091613769531, 148.0382080078125, -2.4832000732421875, 30.366119384765625, -18.08422088623047, -1.8868255615234375, 41.474754333496094, 143.53955078125, 85.07219696044922, 114.10931396484375, 42.14283752441406, 79.0127182006836, 13.988588333129883, -0.19472503662109375, 66.52306365966797, 100.516357421875, 70.67880249023438, 50.75982666015625, 24.415802001953125, 35.46691131591797, 17.434932708740234, -0.3840293884277344, 23.309539794921875, -19.999526977539062, 82.7694091796875, 4.072441101074219, -4.9970550537109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000118.npy"} +{"epoch": 0.17327459618208516, "step": 119, "batch_size": 64, "mean": 35.49361801147461, "std": 56.217437744140625, "min": -68.71939086914062, "p10": -30.091124725341796, "median": 36.97261047363281, "p90": 96.8390823364258, "max": 210.70895385742188, "pos_frac": 0.6875, "sample": [12.789323806762695, 56.16526794433594, 38.86872863769531, 146.79620361328125, 38.6512451171875, -30.379440307617188, -7.692878723144531, 50.70892333984375, -5.300197601318359, 50.978965759277344, -9.949737548828125, -11.697776794433594, 210.70895385742188, 50.41065216064453, 18.968605041503906, 121.04464721679688, 21.772480010986328, 11.153194427490234, 182.0742645263672, 48.45191955566406, 91.77810668945312, 74.14283752441406, 75.87226104736328, -5.860164642333984, 28.69324493408203, 37.56016540527344, -34.34980773925781, 18.367385864257812, 54.870391845703125, 55.621978759765625, 0.49568939208984375, -3.4489212036132812, 99.00807189941406, 37.1546745300293, 170.67343139648438, -3.4051589965820312, 65.95738983154297, 80.42399597167969, -1.584981918334961, 27.328453063964844, -54.30781555175781, 42.491294860839844, -57.442962646484375, 129.63211059570312, 50.93414306640625, 66.05777740478516, -23.2061767578125, -31.207962036132812, 83.53601837158203, 36.79054641723633, -29.41838836669922, -68.71939086914062, -1.6962127685546875, 85.71005249023438, 17.26740264892578, 57.20083999633789, 51.544700622558594, 10.17503547668457, 64.97140502929688, -6.865819931030273, -42.57806396484375, 3.2443389892578125, 41.70112609863281, -18.04491424560547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000119.npy"} +{"epoch": 0.17474302496328928, "step": 120, "batch_size": 64, "mean": 48.66257858276367, "std": 59.5260124206543, "min": -80.05691528320312, "p10": -19.49566459655761, "median": 38.879791259765625, "p90": 135.8168411254883, "max": 191.32427978515625, "pos_frac": 0.84375, "sample": [150.0984344482422, 85.88238525390625, 32.75996398925781, 109.61524963378906, 36.67154312133789, 9.329496383666992, -60.16650390625, 101.48382568359375, 20.04180335998535, 73.69547271728516, 54.05913543701172, 160.58926391601562, 80.43921661376953, 6.273612976074219, 17.42595863342285, 19.751983642578125, 138.51702880859375, 58.35914611816406, 69.85840606689453, -41.40259552001953, -39.966552734375, 127.96928405761719, 39.77568054199219, 32.634185791015625, 41.680381774902344, 37.26702880859375, 46.3035888671875, 5.890338897705078, 164.36764526367188, -80.05691528320312, -22.622779846191406, 39.62431335449219, 26.09882354736328, 4.2706451416015625, 104.9891357421875, 10.058576583862305, 191.32427978515625, 54.02812576293945, 69.17196655273438, 3.450716018676758, 11.683391571044922, -7.533452987670898, 55.363258361816406, 184.3031005859375, 33.50306701660156, 42.8055419921875, 129.5164031982422, 172.70974731445312, 38.13526916503906, 45.728851318359375, 0.6747817993164062, 88.5505599975586, -42.16685485839844, 19.82781219482422, 63.28502655029297, 34.42033386230469, -1.7818183898925781, 112.46054077148438, 74.92900085449219, -13.287471771240234, 75.75341033935547, -22.15631866455078, 35.5364990234375, 2.6031341552734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000120.npy"} +{"epoch": 0.1762114537444934, "step": 121, "batch_size": 64, "mean": 54.022308349609375, "std": 70.0307846069336, "min": -149.16888427734375, "p10": -9.537339782714842, "median": 47.504913330078125, "p90": 149.11922912597657, "max": 219.86756896972656, "pos_frac": 0.78125, "sample": [8.927013397216797, -6.646720886230469, -2.607757568359375, 219.86756896972656, -6.1167449951171875, 9.462333679199219, 96.38340759277344, -15.890857696533203, 78.30762481689453, 76.49102020263672, -1.79571533203125, 62.922935485839844, 138.6522979736328, 110.41570281982422, 81.11660766601562, 0.23003387451171875, 89.64590454101562, 65.65792846679688, 54.354522705078125, -40.335845947265625, 35.645118713378906, 112.90219116210938, -57.63299560546875, 145.55616760253906, 168.8936004638672, 66.45506286621094, 33.98400115966797, 16.93918228149414, 111.742431640625, 4.008966445922852, 77.79200744628906, -22.90062713623047, 47.91542053222656, 83.14448547363281, 2.381143569946289, 200.114013671875, 4.51629638671875, 180.19924926757812, 44.22016906738281, 206.30599975585938, 8.236091613769531, 126.7548599243164, -10.776176452636719, 17.524734497070312, -21.752227783203125, 10.450920104980469, 75.37342834472656, 24.283065795898438, -5.6242218017578125, 55.21392822265625, 5.554872512817383, 19.782119750976562, 63.59147644042969, 130.97589111328125, 150.64625549316406, 71.98445129394531, -149.16888427734375, 57.952239990234375, -5.67462158203125, 78.77418518066406, 207.23983764648438, 19.499778747558594, -1.7318572998046875, 47.09440612792969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000121.npy"} +{"epoch": 0.1776798825256975, "step": 122, "batch_size": 64, "mean": 67.32794952392578, "std": 66.2765884399414, "min": -90.13690185546875, "p10": 0.8278617858886727, "median": 52.821231842041016, "p90": 162.1453598022461, "max": 233.86724853515625, "pos_frac": 0.90625, "sample": [149.37168884277344, 21.622207641601562, 75.9307632446289, 83.59307861328125, 19.179704666137695, 26.61675262451172, 49.46311950683594, 96.06763458251953, 22.67456817626953, 115.16288757324219, 15.652351379394531, 130.01693725585938, 124.98816680908203, 233.86724853515625, 103.59888458251953, 35.351539611816406, 16.46143341064453, 54.11585235595703, 161.80421447753906, 49.993682861328125, 20.190099716186523, 140.6284942626953, 36.2274284362793, -9.45635986328125, 83.52561950683594, 11.640213012695312, 63.433250427246094, 141.21517944335938, 74.31771087646484, 65.21117401123047, 76.1354751586914, -26.068984985351562, 171.658447265625, -6.058256149291992, 52.47632598876953, 162.50518798828125, 63.921104431152344, 26.23760223388672, 175.38497924804688, 155.35745239257812, 9.469205856323242, 53.1661376953125, -32.48224639892578, 232.84400939941406, 17.407752990722656, 38.49848556518555, 32.16681671142578, 42.67489242553711, 24.92755889892578, -90.13690185546875, -6.9914703369140625, 27.700668334960938, 30.978906631469727, 66.60995483398438, 201.276123046875, 1.5823783874511719, 116.36424255371094, 54.48108673095703, 0.5044975280761719, 13.809181213378906, 162.29156494140625, 75.40797424316406, 20.21822166442871, 152.2049560546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000122.npy"} +{"epoch": 0.17914831130690162, "step": 123, "batch_size": 64, "mean": 44.054771423339844, "std": 73.77658081054688, "min": -150.53848266601562, "p10": -26.98945560455322, "median": 22.982162475585938, "p90": 143.1687057495118, "max": 235.95712280273438, "pos_frac": 0.65625, "sample": [78.35038757324219, 81.06877136230469, -8.819374084472656, 9.34628677368164, 88.48060607910156, 77.46011352539062, 121.39524841308594, 111.73101806640625, 80.44852447509766, 49.10289764404297, -0.5450611114501953, -21.284927368164062, 8.11069107055664, -41.415748596191406, 156.2360076904297, 87.53543090820312, 16.61172866821289, -12.643997192382812, -4.8573760986328125, 67.87748718261719, -6.1020050048828125, 123.53910827636719, 67.84591674804688, -76.12620544433594, 22.860877990722656, 96.6842041015625, 214.82196044921875, 23.10344696044922, 151.58139038085938, 164.98019409179688, -25.490942001342773, 64.0431900024414, 20.79178237915039, 7.763786315917969, 85.4143295288086, 120.22764587402344, 20.436195373535156, 47.681976318359375, 178.0261993408203, -20.244277954101562, 92.62721252441406, 83.11380004882812, -41.66560745239258, -67.86634826660156, 78.24888610839844, -8.835174560546875, -44.137332916259766, -5.046934127807617, -17.925071716308594, 235.95712280273438, -27.631675720214844, 65.814697265625, 51.781959533691406, 206.57965087890625, -9.671852111816406, 21.919031143188477, 121.83509826660156, 34.83704376220703, -150.53848266601562, -16.65375518798828, -1.5865936279296875, -10.744915008544922, 0.6416816711425781, 2.425291061401367], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000123.npy"} +{"epoch": 0.18061674008810572, "step": 124, "batch_size": 64, "mean": 64.96229553222656, "std": 95.20276641845703, "min": -118.80995178222656, "p10": -26.79657821655273, "median": 43.427635192871094, "p90": 177.07994079589847, "max": 377.9284362792969, "pos_frac": 0.78125, "sample": [148.08836364746094, -25.200424194335938, 41.71110534667969, 34.97039794921875, 25.814228057861328, 150.50941467285156, -97.65211486816406, -9.272476196289062, 160.0582275390625, 189.98007202148438, -3.1584396362304688, 30.75029754638672, 4.641260147094727, 69.79490661621094, 77.9240493774414, 229.95687866210938, 157.56768798828125, 52.33060836791992, -18.222259521484375, 51.17388153076172, -118.80995178222656, -40.85603332519531, 140.74856567382812, -4.138042449951172, 203.02816772460938, 131.1697540283203, 77.20277404785156, -4.36468505859375, 46.370811462402344, 120.42361450195312, -27.48064422607422, 32.80760955810547, 90.89198303222656, 14.79814338684082, 10.5411376953125, 344.72222900390625, 102.94146728515625, 179.16152954101562, 30.282455444335938, 172.222900390625, 104.86380767822266, -52.418418884277344, 17.732196807861328, -19.286331176757812, 40.05621337890625, 124.04754638671875, -103.48031616210938, 98.96784973144531, 18.16834831237793, 45.1441650390625, 39.742549896240234, 79.11890411376953, -67.62875366210938, 254.39944458007812, 22.89349365234375, 377.9284362792969, 46.707794189453125, 168.07455444335938, 1.0683746337890625, 32.54546356201172, 52.3577880859375, 78.28301239013672, 14.367759704589844, 10.503551483154297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000124.npy"} +{"epoch": 0.18208516886930984, "step": 125, "batch_size": 64, "mean": 57.670806884765625, "std": 77.84563446044922, "min": -91.95611572265625, "p10": -29.077206420898435, "median": 33.228705406188965, "p90": 164.0551177978516, "max": 251.9818115234375, "pos_frac": 0.78125, "sample": [-3.2580394744873047, 150.86631774902344, 91.48394775390625, 23.260223388671875, -14.698097229003906, 9.716075897216797, 114.07386779785156, -27.396484375, 140.758056640625, 16.36631202697754, 10.85556411743164, -29.797515869140625, 24.75690269470215, 1.0759124755859375, 101.5174789428711, 110.04786682128906, 87.32417297363281, 53.98957061767578, 18.857194900512695, 3.431537628173828, 105.60113525390625, 18.188339233398438, -55.11973571777344, -10.856842041015625, 90.6788558959961, 251.9818115234375, -35.391822814941406, 30.862340927124023, 108.19024658203125, 36.07282257080078, -4.434898376464844, -79.87454986572266, 35.067100524902344, 17.231374740600586, 30.981178283691406, 211.66294860839844, 6.967082977294922, 207.99899291992188, 120.76873779296875, -23.475936889648438, 12.003990173339844, 102.83053588867188, 189.14434814453125, 92.21150970458984, 229.2197265625, 91.46397399902344, 55.060218811035156, 47.47251510620117, 168.91897583007812, 51.814735412597656, 12.194168090820312, 126.86177062988281, 41.75889587402344, -16.865446090698242, 110.99185180664062, 27.33401107788086, -43.39814758300781, 151.7960968017578, 31.390310287475586, -91.95611572265625, 207.05665588378906, 152.70611572265625, -33.480796813964844, 28.0714054107666], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000125.npy"} +{"epoch": 0.18355359765051396, "step": 126, "batch_size": 64, "mean": 53.836448669433594, "std": 72.14073181152344, "min": -77.97016143798828, "p10": -22.35788116455078, "median": 38.011186599731445, "p90": 158.9620376586914, "max": 251.86541748046875, "pos_frac": 0.828125, "sample": [119.90411376953125, 64.87643432617188, 10.907796859741211, 88.77872467041016, 13.420791625976562, 52.34899139404297, -38.21954345703125, 2.192136764526367, 60.32823181152344, 156.92938232421875, -52.2987060546875, 1.6846275329589844, 251.86541748046875, 19.87763214111328, 86.5013198852539, 1.5753097534179688, 131.25595092773438, 41.982276916503906, 131.14694213867188, 67.094970703125, 44.45964050292969, 52.99155044555664, -7.982269287109375, 37.91312026977539, 64.88787841796875, 186.54736328125, -22.55168914794922, 159.8331756591797, 32.56342697143555, 47.74836730957031, 19.366756439208984, 25.590370178222656, 74.21783447265625, 14.851974487304688, 179.33966064453125, 38.1092529296875, 56.72837829589844, -75.68637084960938, 146.4072265625, -15.81088638305664, -61.050384521484375, -36.31837463378906, 237.60452270507812, -3.463743209838867, 27.651159286499023, 10.045360565185547, 1.4134254455566406, 20.893985748291016, 125.00161743164062, 116.55886840820312, -77.97016143798828, 190.99549865722656, 84.28475952148438, 23.126068115234375, 35.47807312011719, 95.58912658691406, 30.153564453125, 63.25835418701172, 17.147430419921875, -21.905662536621094, 14.005073547363281, 91.99147033691406, 11.041961669921875, 178.32293701171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000126.npy"} +{"epoch": 0.18502202643171806, "step": 127, "batch_size": 64, "mean": 39.496803283691406, "std": 62.4508056640625, "min": -124.76553344726562, "p10": -34.757039642333986, "median": 40.90530014038086, "p90": 116.57612228393555, "max": 182.59117126464844, "pos_frac": 0.78125, "sample": [55.345069885253906, 81.0044174194336, -9.174047470092773, 36.829811096191406, 2.74884033203125, -94.82041931152344, 151.74879455566406, 8.545562744140625, 98.36266326904297, -124.76553344726562, 8.201959609985352, 60.978843688964844, 75.05534362792969, -33.19261932373047, 18.254175186157227, 36.4964599609375, 92.82891082763672, 10.695486068725586, -122.93278503417969, 73.6542739868164, 25.92926788330078, 72.31669616699219, 125.88238525390625, 20.692489624023438, 68.92781066894531, 168.50418090820312, 21.785526275634766, 82.177978515625, -21.00708770751953, 15.339195251464844, 119.08171844482422, 77.0457992553711, 107.17951965332031, 25.27960968017578, 52.35993957519531, 146.6350860595703, -2.0668487548828125, -19.1138916015625, 20.45020294189453, -1.5613975524902344, 17.452770233154297, 115.88005828857422, 9.172233581542969, 75.80023193359375, 47.00958251953125, 64.41486358642578, 44.98078918457031, 116.87443542480469, 110.3149185180664, 52.664154052734375, -45.36241912841797, 48.494415283203125, 19.873918533325195, 182.59117126464844, -59.63105773925781, 60.61408996582031, 46.500572204589844, 92.06034851074219, 47.12040710449219, 13.5810546875, 31.402191162109375, -24.640701293945312, -35.648406982421875, -35.42750549316406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000127.npy"} +{"epoch": 0.18649045521292218, "step": 128, "batch_size": 64, "mean": 41.37461853027344, "std": 73.61337280273438, "min": -171.81829833984375, "p10": -47.87363204956054, "median": 36.40241622924805, "p90": 145.2020721435547, "max": 250.60215759277344, "pos_frac": 0.6875, "sample": [46.427494049072266, 43.1114387512207, 134.2632293701172, -14.7244873046875, 90.66543579101562, -61.4578742980957, 101.09422302246094, 143.94671630859375, 36.05010223388672, 42.32557678222656, 164.9189453125, 51.566802978515625, 48.54713439941406, 36.02568054199219, -84.98448944091797, 97.53184509277344, 151.24334716796875, 4.685493469238281, -0.1740131378173828, 75.01766967773438, -71.43292236328125, -3.8356285095214844, 250.60215759277344, -171.81829833984375, 148.95169067382812, 48.71685028076172, 159.43768310546875, 145.74008178710938, 69.2773666381836, 48.64259338378906, -10.956134796142578, -57.11676025390625, 102.58824920654297, -4.0853729248046875, -18.302459716796875, 18.706619262695312, -78.72836303710938, -4.810955047607422, 34.18519592285156, 47.585208892822266, 87.70594024658203, -40.14661407470703, 54.40867614746094, -24.238269805908203, -8.582639694213867, 147.3457794189453, 32.26248550415039, 36.754730224609375, 23.175487518310547, 50.94187545776367, 21.91046905517578, -8.83125114440918, 125.961669921875, -1.3370838165283203, 13.05168342590332, 2.546051025390625, -11.336418151855469, 54.35753631591797, 101.78213500976562, 11.897918701171875, -51.185211181640625, 127.58087158203125, 141.343505859375, 1.1791458129882812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000128.npy"} +{"epoch": 0.18795888399412627, "step": 129, "batch_size": 64, "mean": 43.762454986572266, "std": 83.70062255859375, "min": -141.66180419921875, "p10": -42.321146392822264, "median": 30.899341583251953, "p90": 145.31244201660155, "max": 363.89959716796875, "pos_frac": 0.65625, "sample": [3.343973159790039, 32.218231201171875, -44.56209182739258, 110.13655090332031, -47.285133361816406, 122.00967407226562, 10.499610900878906, -42.386077880859375, 167.6837158203125, 26.080896377563477, -10.554214477539062, 105.82518005371094, 26.864604949951172, 43.23602294921875, 55.980342864990234, 72.61412811279297, 147.55531311035156, -37.748382568359375, -31.798843383789062, 54.972625732421875, 215.49603271484375, 88.80406188964844, 218.48968505859375, -94.32982635498047, 86.11973571777344, -87.47344970703125, -42.169639587402344, 38.989845275878906, 363.89959716796875, 139.99508666992188, 27.816730499267578, 16.018218994140625, -0.9134521484375, 76.40705108642578, -10.473831176757812, -16.413959503173828, 28.379287719726562, 122.29380798339844, 29.58045196533203, -14.826980590820312, -72.11573028564453, -7.422018051147461, 48.95860290527344, 79.11836242675781, 145.57870483398438, -141.66180419921875, 16.179189682006836, 19.084766387939453, -8.313562393188477, -7.046573638916016, 135.30325317382812, 169.42527770996094, 38.247528076171875, 48.047760009765625, 68.53596496582031, -16.90191650390625, 42.843505859375, 53.92144775390625, -37.41661834716797, 85.87963104248047, 144.691162109375, -30.648849487304688, 76.67069244384766, -0.5362491607666016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000129.npy"} +{"epoch": 0.1894273127753304, "step": 130, "batch_size": 64, "mean": 66.1448974609375, "std": 83.9789810180664, "min": -109.03595733642578, "p10": -20.165449523925776, "median": 46.191192626953125, "p90": 193.49052734375002, "max": 321.244384765625, "pos_frac": 0.78125, "sample": [73.54243469238281, 39.78194046020508, 31.11028289794922, 186.25152587890625, -15.876838684082031, 11.783889770507812, 200.29302978515625, 80.34416198730469, -51.37409210205078, -7.400510787963867, 15.564420700073242, 57.93839645385742, 35.49531936645508, 188.00857543945312, 202.4931640625, 21.96936798095703, 93.307861328125, -7.595512390136719, 37.232521057128906, 72.66840362548828, -10.543697357177734, 31.977676391601562, 7.183250427246094, 87.13096618652344, 92.35610961914062, -4.266853332519531, 33.8143310546875, 89.44489288330078, 58.966209411621094, 178.48101806640625, 138.44032287597656, 210.998291015625, -34.49628448486328, 27.740859985351562, 148.61575317382812, -103.96891784667969, 34.36732482910156, 29.220165252685547, 34.40388488769531, -22.00342559814453, 60.544593811035156, 52.60044479370117, 74.01825714111328, 240.1696319580078, 155.61053466796875, -7.780239105224609, 321.244384765625, -40.811607360839844, -3.886920928955078, 89.1952896118164, 33.705589294433594, 39.28950881958008, 195.83993530273438, -31.74787139892578, 146.12403869628906, -109.03595733642578, 130.28602600097656, 1.918661117553711, 120.57754516601562, 115.31028747558594, 208.194580078125, 56.12718200683594, 33.30031204223633, 59.078704833984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000130.npy"} +{"epoch": 0.19089574155653452, "step": 131, "batch_size": 64, "mean": 66.29624938964844, "std": 72.53195190429688, "min": -148.121826171875, "p10": -0.901673126220703, "median": 50.27856636047363, "p90": 158.08023986816406, "max": 261.44140625, "pos_frac": 0.875, "sample": [50.1761360168457, 45.797027587890625, 51.140281677246094, 29.80417251586914, 261.44140625, -148.121826171875, 57.804508209228516, 39.14654541015625, 33.33501434326172, 104.10987091064453, 111.32611083984375, 20.600421905517578, 58.39189147949219, 13.417055130004883, 98.16749572753906, 34.98468017578125, 40.5786247253418, -1.5269355773925781, 138.34115600585938, 72.55284118652344, 41.54020690917969, 157.8388671875, -12.798851013183594, -12.855083465576172, 50.38099670410156, 42.97602844238281, 82.47377014160156, 7.9594879150390625, 198.44601440429688, 39.70403289794922, 41.795108795166016, -80.99015808105469, 158.18368530273438, 9.995307922363281, 220.28182983398438, 4.354511260986328, 100.54511260986328, 29.969200134277344, 138.1305389404297, 47.19389343261719, 128.07196044921875, 31.384923934936523, 202.84735107421875, 26.981082916259766, -90.03488159179688, 124.36282348632812, 7.253814697265625, 105.7599105834961, -0.6912384033203125, 25.948673248291016, -0.9918594360351562, 135.5665283203125, 9.026420593261719, 177.04698181152344, 151.03602600097656, 164.84255981445312, 89.92439270019531, 64.16082763671875, 29.835220336914062, 79.04931640625, 127.82054901123047, 93.9249038696289, 100.13874816894531, 83.10435485839844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000131.npy"} +{"epoch": 0.19236417033773862, "step": 132, "batch_size": 64, "mean": 54.16513442993164, "std": 86.51465606689453, "min": -153.27203369140625, "p10": -17.02062759399414, "median": 42.25692939758301, "p90": 143.92914276123048, "max": 369.04376220703125, "pos_frac": 0.8125, "sample": [14.696651458740234, 8.759418487548828, 146.06201171875, -61.68965148925781, 67.07083129882812, 36.961585998535156, 64.80401611328125, 287.3134460449219, 309.7875061035156, 8.5335693359375, 71.98910522460938, 108.5212173461914, 20.84514045715332, 75.1898193359375, -153.27203369140625, 5.001960754394531, 218.12384033203125, -36.29931640625, 66.39800262451172, -13.788579940795898, 2.5926761627197266, -12.368064880371094, 125.82015228271484, 68.64643859863281, 21.09728240966797, -18.25684356689453, 0.4541168212890625, 112.81241607666016, 7.2836761474609375, 3.55352783203125, -14.136123657226562, -12.32415771484375, 19.505022048950195, 92.07891082763672, 77.69749450683594, 144.83706665039062, 50.41717529296875, 66.37615966796875, 74.90739440917969, 49.000789642333984, 53.240386962890625, 33.22250747680664, 31.395946502685547, 141.81065368652344, -103.81703186035156, 47.55227279663086, 117.86685943603516, -66.77953338623047, 12.851016998291016, 93.49269104003906, -19.895456314086914, 57.572486877441406, 105.56626892089844, 17.15808868408203, 55.957672119140625, -12.565948486328125, 30.25121307373047, 101.91435241699219, 26.387451171875, 12.420061111450195, 177.73504638671875, 26.392730712890625, 52.79131317138672, 369.04376220703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000132.npy"} +{"epoch": 0.19383259911894274, "step": 133, "batch_size": 64, "mean": 48.25655746459961, "std": 75.76319885253906, "min": -105.94534301757812, "p10": -19.086682128906247, "median": 36.03285598754883, "p90": 145.51405944824225, "max": 250.90113830566406, "pos_frac": 0.765625, "sample": [11.494873046875, 69.71218872070312, -7.0427398681640625, 109.83273315429688, 8.54312515258789, -40.34016418457031, -7.47540283203125, 35.85259246826172, -74.84310913085938, 250.90113830566406, 52.378990173339844, 71.30196380615234, -6.873626708984375, 102.19855499267578, -78.59814453125, 17.618423461914062, 88.71565246582031, -5.13970947265625, 212.38426208496094, 4.753000259399414, 67.36102294921875, 80.17501831054688, -13.395683288574219, 130.09536743164062, 2.3604812622070312, 106.20637512207031, 46.897857666015625, 77.97621154785156, 84.62932586669922, 65.50503540039062, 11.2071533203125, 20.254852294921875, 25.741979598999023, 244.9425048828125, -1.8969268798828125, 51.694915771484375, 71.26802825927734, -67.4830322265625, 207.57421875, 36.206695556640625, 38.62028884887695, 35.85901641845703, 53.25859832763672, 241.17877197265625, -105.94534301757812, -14.509445190429688, 67.73577880859375, 178.47198486328125, 18.378707885742188, 49.23698043823242, 8.182868957519531, 117.95448303222656, -54.59150695800781, 152.1220703125, 101.60106658935547, -21.048355102539062, 66.47337341308594, 46.434844970703125, -10.043190002441406, 20.546201705932617, 7.680534362792969, 0.055332183837890625, 9.544990539550781, 18.52564239501953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000133.npy"} +{"epoch": 0.19530102790014683, "step": 134, "batch_size": 64, "mean": 51.59928894042969, "std": 67.50420379638672, "min": -43.891868591308594, "p10": -31.62993984222412, "median": 42.31174087524414, "p90": 130.68828735351565, "max": 330.7307434082031, "pos_frac": 0.828125, "sample": [15.787384033203125, -43.891868591308594, -36.709259033203125, 125.98603820800781, 198.2984619140625, 33.59003448486328, 19.989734649658203, 41.48810958862305, -33.389610290527344, 47.77906036376953, 119.45246887207031, 23.80788803100586, 12.037057876586914, 5.987571716308594, 52.163307189941406, 41.89768981933594, -27.5240421295166, 47.281158447265625, 76.69087219238281, 24.964073181152344, 6.216264724731445, 11.3809814453125, 98.10074615478516, 17.32555389404297, 167.36012268066406, 2.4098968505859375, 107.6024169921875, 95.63525390625, 53.6746940612793, 18.242393493652344, 48.710784912109375, 131.69503784179688, 2.026296615600586, 48.065216064453125, 165.81280517578125, 113.1448745727539, 128.33920288085938, -39.73412322998047, 85.79296875, -18.191471099853516, 57.27923583984375, -34.583648681640625, 65.24533081054688, 6.614690780639648, 330.7307434082031, 11.998493194580078, 82.04017639160156, 41.10979461669922, 71.81813049316406, -4.405523300170898, 66.05581665039062, -25.035789489746094, -34.35431671142578, 176.575439453125, 59.07740020751953, 3.5196075439453125, 59.983787536621094, 39.14915084838867, 42.725791931152344, 71.13385772705078, -39.9813232421875, 80.57398986816406, 31.87143325805664, 153.91629028320312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000134.npy"} +{"epoch": 0.19676945668135096, "step": 135, "batch_size": 64, "mean": 73.99042510986328, "std": 94.29330444335938, "min": -87.03752136230469, "p10": -33.80209045410156, "median": 53.408809661865234, "p90": 218.49710540771488, "max": 360.47564697265625, "pos_frac": 0.796875, "sample": [28.93181610107422, 133.14767456054688, 31.514469146728516, 241.54049682617188, 123.7872314453125, 73.12828826904297, 245.7684783935547, 41.207454681396484, 242.95883178710938, 222.23854064941406, 131.6090850830078, 89.82505798339844, 190.130615234375, 128.22116088867188, 273.077880859375, 118.14096069335938, 31.133384704589844, 198.9365234375, 11.532051086425781, -38.907798767089844, 0.9807643890380859, 113.00762939453125, 157.09713745117188, 77.57559204101562, 38.26976013183594, 98.77117919921875, 3.4374237060546875, 360.47564697265625, -22.632495880126953, 112.92906188964844, -62.210235595703125, 202.36972045898438, 209.76708984375, -75.27316284179688, 39.54129409790039, -16.462902069091797, 27.10957145690918, 56.15752410888672, 127.49664306640625, -1.8249664306640625, -58.687477111816406, 54.23432922363281, 85.51119995117188, 56.70018005371094, -0.5917015075683594, 52.583290100097656, 2.867328643798828, -14.307907104492188, 47.625450134277344, 16.276107788085938, 33.95172119140625, 111.96432495117188, 93.32244873046875, 87.0701675415039, 6.81329345703125, 99.02732849121094, 42.743324279785156, 226.94857788085938, 13.524131774902344, -87.03752136230469, -29.4718017578125, -35.657928466796875, -41.29267883300781, 6.7667999267578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000135.npy"} +{"epoch": 0.19823788546255505, "step": 136, "batch_size": 64, "mean": 49.19337463378906, "std": 92.4898452758789, "min": -77.71170043945312, "p10": -46.02523689270019, "median": 28.505130767822266, "p90": 121.8586311340332, "max": 408.912353515625, "pos_frac": 0.6875, "sample": [61.5748405456543, 74.39032745361328, 102.74459838867188, 98.62091064453125, -7.115028381347656, -49.70475387573242, 295.1488037109375, 130.9999237060547, -18.59136962890625, 66.3145751953125, 102.25746154785156, 65.06619262695312, -22.926727294921875, 13.025760650634766, 75.88844299316406, 54.099822998046875, 79.68527221679688, 34.969215393066406, 114.18586730957031, 321.5898132324219, -2.29296875, 72.26190185546875, 408.912353515625, 3.0607833862304688, 115.45835876464844, -0.01485443115234375, 223.92010498046875, -7.139240264892578, 259.64813232421875, 24.66079330444336, 9.879554748535156, 2.9683685302734375, 58.49513244628906, -73.7508544921875, 54.89991760253906, -66.12907409667969, 20.21563720703125, -25.18824005126953, 89.00834655761719, 89.1658935546875, 10.420503616333008, -15.900646209716797, -14.192184448242188, 63.635719299316406, 9.240203857421875, 2.1029186248779297, -25.416934967041016, 32.34946823120117, 93.13494873046875, 121.25202178955078, -46.555442810058594, -59.797142028808594, 35.55644226074219, 79.45941925048828, -77.71170043945312, 1.7166290283203125, 70.36282348632812, -22.234752655029297, -44.788089752197266, 24.26941680908203, -3.671703338623047, 122.11860656738281, 3.8480148315429688, -61.08638000488281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000136.npy"} +{"epoch": 0.19970631424375918, "step": 137, "batch_size": 64, "mean": 77.27559661865234, "std": 113.53436279296875, "min": -157.488525390625, "p10": -27.47138290405273, "median": 50.84297180175781, "p90": 251.29396667480495, "max": 387.982421875, "pos_frac": 0.84375, "sample": [-42.33808898925781, 9.479061126708984, 73.22935485839844, 118.9801254272461, 151.87991333007812, 32.973548889160156, 47.67291259765625, 90.32206726074219, -22.925281524658203, 103.24600219726562, 3.832571029663086, 95.26972961425781, 386.78326416015625, 18.923734664916992, 38.104408264160156, -113.40586853027344, 112.71278381347656, 10.999164581298828, 36.5692138671875, 151.6620635986328, 156.17733764648438, 80.63904571533203, 2.835296630859375, 387.982421875, 110.6546630859375, 330.55987548828125, 189.4927978515625, 125.69798278808594, 18.629384994506836, 168.71246337890625, -93.71888732910156, 9.948333740234375, 68.2763900756836, 11.103569030761719, 131.24429321289062, 9.484901428222656, 126.47988891601562, 52.173667907714844, 3.5076370239257812, 50.808128356933594, 64.59728240966797, 43.46119689941406, -4.146278381347656, -157.488525390625, -45.57697296142578, -73.42081451416016, 363.8770751953125, -29.41971206665039, 38.761802673339844, 297.9104919433594, 55.43914794921875, 129.76504516601562, 3.5207347869873047, 93.63496398925781, -17.422090530395508, 27.655466079711914, 50.87781524658203, 9.392192840576172, 1.1478862762451172, 104.31865692138672, 307.6292724609375, 34.07908630371094, 124.60396575927734, 277.7801818847656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000137.npy"} +{"epoch": 0.2011747430249633, "step": 138, "batch_size": 64, "mean": 58.77688217163086, "std": 72.48036193847656, "min": -113.36895751953125, "p10": -17.420133209228513, "median": 42.27101707458496, "p90": 152.15726013183598, "max": 276.49908447265625, "pos_frac": 0.78125, "sample": [159.74322509765625, -14.18670654296875, 72.62713623046875, 139.44009399414062, 6.771644592285156, 113.81885528564453, -17.944229125976562, -1.3329753875732422, 8.418487548828125, 174.9386749267578, 32.340824127197266, 20.984298706054688, 40.17964172363281, 11.280981063842773, 27.17133140563965, -113.36895751953125, 16.009363174438477, -20.990570068359375, -2.1047744750976562, -11.906455993652344, 191.237060546875, 177.47732543945312, 15.012245178222656, 9.744659423828125, 116.99505615234375, 129.3750762939453, 132.57803344726562, -3.9827346801757812, 92.3818588256836, 138.28663635253906, 58.150909423828125, 28.857282638549805, 128.4695281982422, 8.358306884765625, 138.79916381835938, 168.7644500732422, 74.19454956054688, 44.36239242553711, -68.28228759765625, 61.745277404785156, 65.43647003173828, 28.441024780273438, 89.32826232910156, 0.5788249969482422, -6.151947021484375, -54.5048828125, 99.44575500488281, 141.670654296875, 14.996334075927734, 39.688720703125, 112.30612182617188, 33.66163635253906, 134.3835906982422, -33.080345153808594, -22.261106491088867, 58.26736068725586, -16.197242736816406, 71.36654663085938, 20.026878356933594, 276.49908447265625, 104.20125579833984, 56.514991760253906, 106.03632354736328, 156.65151977539062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000138.npy"} +{"epoch": 0.2026431718061674, "step": 139, "batch_size": 64, "mean": 70.17027282714844, "std": 87.16915893554688, "min": -109.48736572265625, "p10": -29.78574066162109, "median": 57.03584671020508, "p90": 166.10056457519534, "max": 372.2355651855469, "pos_frac": 0.8125, "sample": [-19.651336669921875, 157.65841674804688, 100.1812515258789, 170.99461364746094, -31.530181884765625, 66.27108001708984, 120.8639144897461, -12.111244201660156, 82.96791076660156, 145.77227783203125, -109.48736572265625, 57.77565002441406, -47.504024505615234, 271.63427734375, 140.07913208007812, 47.22887420654297, 86.53133392333984, 56.296043395996094, 33.488555908203125, 62.906253814697266, 143.27981567382812, 73.48422241210938, -72.5782241821289, 123.01726531982422, 48.06819152832031, 1.7939300537109375, 159.0737762451172, 83.77668762207031, 16.073759078979492, 39.65686798095703, 155.29893493652344, 24.813074111938477, 125.74465942382812, 116.48963165283203, 37.74903869628906, 92.03291320800781, -19.92792510986328, -2.0039234161376953, 9.25788688659668, 17.733192443847656, 79.87163543701172, 0.5948104858398438, 15.980781555175781, 150.48968505859375, 23.089988708496094, 264.9932861328125, 95.57083892822266, -25.715377807617188, 76.13140106201172, -46.86614227294922, 83.3724136352539, -45.76500701904297, 34.23963165283203, 85.44007110595703, 372.2355651855469, 39.65120315551758, 251.3206024169922, 38.172943115234375, 49.524436950683594, -39.76969909667969, 190.87594604492188, 30.889297485351562, 44.258338928222656, 169.11204528808594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000139.npy"} +{"epoch": 0.20411160058737152, "step": 140, "batch_size": 64, "mean": 63.89509582519531, "std": 89.90765380859375, "min": -128.57273864746094, "p10": -49.25709457397461, "median": 65.22920989990234, "p90": 174.25756072998053, "max": 344.6651611328125, "pos_frac": 0.734375, "sample": [-25.842857360839844, 123.05000305175781, 344.6651611328125, -75.71186065673828, -13.819091796875, 123.83750915527344, 84.4466781616211, 147.4739532470703, 58.21617126464844, 162.1934356689453, 116.9503173828125, 72.12751007080078, 113.89610290527344, 41.54342269897461, 22.413467407226562, 85.56907653808594, 272.8030090332031, 12.931774139404297, 6.167736053466797, 76.21904754638672, 21.732162475585938, 66.05870819091797, 87.14035034179688, 137.28799438476562, -1.9120674133300781, -56.78550720214844, 256.03460693359375, -49.50288391113281, 36.53558349609375, 84.25886535644531, 22.857301712036133, 63.561180114746094, 8.915948867797852, 114.54135131835938, -33.255950927734375, 62.93048095703125, 92.87144470214844, 62.915916442871094, -8.28692626953125, 163.275146484375, 132.97686767578125, -33.47967529296875, 178.9643096923828, -128.57273864746094, -1.7961807250976562, 64.83851623535156, 65.61990356445312, 92.66996002197266, 190.15634155273438, 25.26715087890625, -8.450654983520508, 156.3026885986328, -13.173168182373047, 19.843521118164062, -90.13905334472656, -55.981449127197266, -74.35088348388672, 67.23655700683594, 185.48416137695312, 179.58059692382812, -48.68358612060547, 86.2785415649414, 83.61138916015625, 134.77859497070312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000140.npy"} +{"epoch": 0.2055800293685756, "step": 141, "batch_size": 64, "mean": 60.35598373413086, "std": 95.6855697631836, "min": -246.20004272460938, "p10": -46.79419326782226, "median": 59.79778861999512, "p90": 183.61264038085943, "max": 298.39166259765625, "pos_frac": 0.78125, "sample": [61.21563720703125, 38.539207458496094, -15.14303207397461, 100.55327606201172, 80.05473327636719, -32.09275817871094, 105.9190673828125, 60.37633514404297, 0.7710800170898438, -8.784660339355469, 59.219242095947266, 257.30975341796875, -246.20004272460938, 49.678123474121094, -5.0782928466796875, 35.590240478515625, 0.14740562438964844, -54.13397216796875, 261.87774658203125, -63.29803466796875, -36.79778289794922, 204.0365753173828, 48.52797317504883, 84.57138061523438, 145.6392822265625, 82.87350463867188, 106.43080139160156, 3.8524551391601562, -56.660247802734375, 73.36923217773438, 111.89576721191406, 78.846923828125, 33.470420837402344, 171.272705078125, 253.72219848632812, 27.15971565246582, 117.53089141845703, 24.672273635864258, -19.77369499206543, 17.11172103881836, -24.02686309814453, 50.849143981933594, -51.85059356689453, 188.90118408203125, 4.978340148925781, 140.09759521484375, 194.8494873046875, 121.86763000488281, 11.56364631652832, -161.385986328125, 18.687217712402344, 97.24562072753906, 62.817626953125, 88.27218627929688, 111.86395263671875, 134.24859619140625, 298.39166259765625, 127.3574447631836, 1.3051471710205078, 162.20111083984375, 68.34632873535156, 102.33563232421875, -51.078369140625, 6.6718597412109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000141.npy"} +{"epoch": 0.20704845814977973, "step": 142, "batch_size": 64, "mean": 68.87380981445312, "std": 108.02769470214844, "min": -211.85919189453125, "p10": -47.24861564636229, "median": 35.30228042602539, "p90": 238.8000457763672, "max": 297.62432861328125, "pos_frac": 0.765625, "sample": [-20.76752471923828, 265.2082214355469, 33.59907531738281, 41.92504119873047, -30.69274139404297, 245.1492462158203, -26.215557098388672, -22.047842025756836, -91.99703979492188, 33.36629867553711, 137.72329711914062, 95.46499633789062, 156.80401611328125, 30.85857582092285, 34.266014099121094, 226.5522918701172, 15.95479965209961, -211.85919189453125, -155.1126708984375, 75.52753448486328, 19.505233764648438, 31.756927490234375, 17.922061920166016, 297.62432861328125, 168.80885314941406, 36.33854675292969, 146.64065551757812, 26.01181983947754, 145.7069091796875, -22.227008819580078, 106.08895874023438, 120.19844055175781, 236.95301818847656, 84.59840393066406, -1.5792388916015625, 209.27467346191406, 215.91477966308594, 17.145248413085938, 18.597875595092773, 41.139930725097656, -65.37321472167969, -54.343990325927734, 3.1690292358398438, 239.5916290283203, 68.8595962524414, 249.3000030517578, 14.502208709716797, 22.613075256347656, 70.29396057128906, -60.2296142578125, 56.70759963989258, -20.00177001953125, 6.828157424926758, -81.19171142578125, 192.0675506591797, 242.07745361328125, 26.833908081054688, 193.70587158203125, 26.16857147216797, -9.966573715209961, 125.45336151123047, 244.32052612304688, 42.21653747558594, 124.19438934326172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000142.npy"} +{"epoch": 0.20851688693098386, "step": 143, "batch_size": 64, "mean": 44.20860290527344, "std": 94.7577896118164, "min": -154.72769165039062, "p10": -56.80705184936523, "median": 28.569091796875, "p90": 146.6190368652344, "max": 337.7685546875, "pos_frac": 0.65625, "sample": [84.80064392089844, -7.83099365234375, 52.95500946044922, 25.77631378173828, 283.9988098144531, 93.06377410888672, -57.85539245605469, 100.40531158447266, 106.67410278320312, 24.396699905395508, 144.17672729492188, -83.62644958496094, -54.360923767089844, 135.22787475585938, 84.7527847290039, -126.85535430908203, -48.977638244628906, -2.5711708068847656, 105.7412109375, -5.56976318359375, 34.08076477050781, 259.3481140136719, -70.33784484863281, 99.02389526367188, 19.77233123779297, -17.701213836669922, 44.284889221191406, 147.66574096679688, 30.64856719970703, 15.417285919189453, -12.903274536132812, -48.489288330078125, -60.36366271972656, 3.5500850677490234, 133.51512145996094, 5.551513671875, 45.255210876464844, 68.7283935546875, 25.64208984375, 337.7685546875, -36.12207794189453, -11.985170364379883, 33.292823791503906, 84.10305786132812, -2.408672332763672, 57.30657958984375, 156.44290161132812, 140.24127197265625, -4.881839752197266, -52.63909912109375, -89.036376953125, 286.36944580078125, -35.50098419189453, 79.4896011352539, 64.06587219238281, 31.376245498657227, 111.56904602050781, 26.48961639404297, 179.75390625, -154.72769165039062, -10.780784606933594, 41.46534729003906, 3.1809616088867188, 17.507843017578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000143.npy"} +{"epoch": 0.20998531571218795, "step": 144, "batch_size": 64, "mean": 54.41508483886719, "std": 79.04264831542969, "min": -107.42367553710938, "p10": -34.26722450256348, "median": 44.7833366394043, "p90": 171.44002685546874, "max": 244.90689086914062, "pos_frac": 0.734375, "sample": [-34.47966003417969, 109.9692611694336, -4.475797653198242, 171.6605224609375, -102.3866195678711, -18.572784423828125, 69.7853775024414, 117.0390396118164, 44.498863220214844, 11.156330108642578, 65.02989959716797, -41.34194564819336, 76.9268798828125, 104.95558166503906, 58.37725067138672, 170.925537109375, 51.57170867919922, 204.6151123046875, 106.30242156982422, 141.32241821289062, 177.59439086914062, 192.686279296875, -23.289941787719727, -107.42367553710938, 105.7931137084961, -22.71295166015625, 48.40415954589844, -2.9422225952148438, 69.05830383300781, 22.292139053344727, -53.37322235107422, 75.2075424194336, 66.86463928222656, 15.57958984375, -47.823890686035156, 138.16278076171875, 47.546966552734375, 26.903423309326172, 24.313581466674805, 91.21378326416016, 33.42633056640625, 92.96183013916016, 244.90689086914062, 28.4915828704834, 28.58416748046875, 75.03654479980469, -33.771541595458984, 24.931732177734375, 19.022354125976562, 102.49250793457031, 122.67671966552734, 231.19969177246094, -2.5275421142578125, -30.52362060546875, -19.881141662597656, -22.928565979003906, 221.00718688964844, 15.63021469116211, 23.733793258666992, 1.7893600463867188, 45.06781005859375, 29.808387756347656, 150.24427795410156, -45.74773025512695], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000144.npy"} +{"epoch": 0.21145374449339208, "step": 145, "batch_size": 64, "mean": 57.850833892822266, "std": 81.21188354492188, "min": -119.04986572265625, "p10": -44.74619102478026, "median": 44.90338134765625, "p90": 148.4140411376953, "max": 265.99853515625, "pos_frac": 0.75, "sample": [24.00463104248047, -49.481475830078125, -97.28335571289062, 265.99853515625, 148.68740844726562, 113.71077728271484, 244.59832763671875, 1.8983879089355469, -119.04986572265625, 57.148990631103516, 5.970703125, 75.31915283203125, 144.9279327392578, -55.25053024291992, 10.7857666015625, 120.4184799194336, 52.341773986816406, 43.00611114501953, 120.78108215332031, -49.994361877441406, -11.22294807434082, 37.734161376953125, 43.60649490356445, 137.11611938476562, 89.83064270019531, -65.76864624023438, -9.594436645507812, 29.69515609741211, -8.5687255859375, 179.72946166992188, 22.26753807067871, 14.545654296875, 116.42584228515625, -33.69719314575195, -54.854217529296875, 120.53343200683594, 16.37763214111328, 22.467756271362305, 78.57070922851562, -10.071823120117188, 68.19613647460938, -6.536479949951172, 116.77220916748047, -17.310443878173828, 113.90055847167969, 196.8171844482422, 102.54383850097656, -22.912662506103516, 50.10610580444336, 124.52843475341797, 29.301860809326172, 42.367488861083984, 140.5105438232422, 156.60113525390625, 57.10116195678711, 18.87148666381836, -6.112297058105469, 67.32653045654297, 135.04429626464844, 121.18793487548828, 234.8734130859375, 46.20026779174805, 11.637374877929688, 147.77618408203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000145.npy"} +{"epoch": 0.21292217327459617, "step": 146, "batch_size": 64, "mean": 44.6298828125, "std": 89.0619125366211, "min": -239.08795166015625, "p10": -72.05926055908202, "median": 58.244956970214844, "p90": 159.42329406738284, "max": 243.17625427246094, "pos_frac": 0.671875, "sample": [34.60021209716797, -36.334007263183594, 109.15556335449219, -64.8556137084961, 18.845735549926758, 2.7386398315429688, 76.62682342529297, 64.07638549804688, 75.43032836914062, 63.48164367675781, 214.22109985351562, 97.17681121826172, 43.08574676513672, -10.148330688476562, 25.406784057617188, -8.296958923339844, 91.63689422607422, -12.0662841796875, 39.334938049316406, -17.825227737426758, -5.659708023071289, 30.051727294921875, 60.89392852783203, 120.37089538574219, -103.38706970214844, 68.79268646240234, 150.70245361328125, -239.08795166015625, -75.14653778076172, 207.521728515625, -80.33086395263672, 74.00881958007812, -4.676753997802734, 74.43954467773438, 190.19407653808594, -79.65657806396484, 47.92631530761719, 62.54151153564453, 117.65096282958984, 243.17625427246094, 6.202230453491211, -7.479118347167969, 59.70004653930664, 78.86764526367188, 58.53143310546875, 104.88741302490234, 101.68980407714844, -99.07026672363281, 57.95848083496094, -77.29512023925781, 80.89787292480469, 140.26602172851562, 163.16079711914062, 74.3213882446289, -9.080101013183594, 93.89701080322266, 35.231597900390625, -37.40953826904297, 84.78053283691406, -53.006202697753906, 228.3108367919922, -34.48457336425781, 189.88308715820312, -51.065521240234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000146.npy"} +{"epoch": 0.2143906020558003, "step": 147, "batch_size": 64, "mean": 61.472869873046875, "std": 91.58397674560547, "min": -147.00863647460938, "p10": -31.1400634765625, "median": 46.413923263549805, "p90": 196.75840454101566, "max": 298.83465576171875, "pos_frac": 0.75, "sample": [106.273681640625, -64.73309326171875, 40.647796630859375, 235.45269775390625, -31.350181579589844, -9.250823974609375, 98.10391235351562, 97.60576629638672, 50.078147888183594, -41.83655548095703, 29.293981552124023, 78.15398406982422, 59.74059295654297, 126.34884643554688, 26.575130462646484, 201.54324340820312, 116.72430419921875, 18.24538230895996, -5.226470947265625, 185.59378051757812, 160.6193389892578, -24.68201446533203, 174.8195343017578, 22.70201301574707, 278.262451171875, 46.93669891357422, 202.3181915283203, 108.13908386230469, -13.340547561645508, 45.18476104736328, 29.820636749267578, -135.1792449951172, 4.784738540649414, 115.65617370605469, 108.1939697265625, -28.04913330078125, 62.45189666748047, 298.83465576171875, 77.06625366210938, 21.425495147705078, 183.15423583984375, 14.2540283203125, -16.13017463684082, 15.8836669921875, 33.38584899902344, 59.762474060058594, -11.2623291015625, 84.44491577148438, 65.23963165283203, -30.64978790283203, 19.0911865234375, 52.6548957824707, 20.82217025756836, 175.5537567138672, -23.44232940673828, 59.37899398803711, 244.72097778320312, -35.27948760986328, 32.97312927246094, -147.00863647460938, 213.60675048828125, -58.75856018066406, 62.0281982421875, 45.89114761352539], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000147.npy"} +{"epoch": 0.21585903083700442, "step": 148, "batch_size": 64, "mean": 52.11481475830078, "std": 83.58572387695312, "min": -105.47306823730469, "p10": -29.133935546874998, "median": 35.203460693359375, "p90": 145.26886901855468, "max": 346.486572265625, "pos_frac": 0.796875, "sample": [-7.633319854736328, 52.36592102050781, 5.81268310546875, 81.67587280273438, -61.180625915527344, 100.69750213623047, 78.15132141113281, 40.400508880615234, -41.127708435058594, 199.43385314941406, 18.088829040527344, 144.72030639648438, 20.186124801635742, -16.58635711669922, 150.40652465820312, 251.7200927734375, 3.6423683166503906, 15.48333740234375, -39.985626220703125, -70.46244812011719, 134.5639190673828, 128.90499877929688, -10.085474014282227, 89.10348510742188, 96.29489135742188, 41.34965515136719, 61.592010498046875, 49.75773239135742, 108.00242614746094, 0.40062713623046875, 145.50396728515625, 33.584564208984375, 35.814476013183594, 68.35136413574219, -30.53900909423828, 48.101078033447266, 31.82970428466797, 17.284133911132812, 111.31517791748047, 57.09524154663086, 134.79122924804688, 34.592445373535156, -83.0631103515625, 46.795326232910156, 38.04930877685547, 22.456321716308594, 183.55413818359375, 50.18971252441406, -4.366752624511719, -25.855430603027344, -105.47306823730469, 38.174591064453125, 9.228139877319336, 7.483488082885742, 33.8612060546875, 6.79716682434082, 33.319190979003906, 346.486572265625, 321.360107421875, 7.67283821105957, -1.2001209259033203, 2.2793045043945312, 87.77691650390625, 6.404384613037109], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000148.npy"} +{"epoch": 0.2173274596182085, "step": 149, "batch_size": 64, "mean": 65.15960693359375, "std": 82.16009521484375, "min": -186.63270568847656, "p10": -8.316867828369139, "median": 58.25334358215332, "p90": 152.20992889404297, "max": 307.31597900390625, "pos_frac": 0.828125, "sample": [114.63289642333984, 65.216796875, 206.88504028320312, 59.085018157958984, 107.68561553955078, 68.91970825195312, 75.50936889648438, -12.136369705200195, -19.22760009765625, -9.297992706298828, 22.17793083190918, -5.616569519042969, 59.39565658569336, 44.30908203125, 59.714996337890625, -64.7286376953125, 90.66218566894531, 93.66558837890625, 45.057220458984375, 133.43734741210938, 110.34022521972656, 174.80490112304688, 113.90082550048828, 29.51166343688965, 14.606847763061523, 98.3896484375, 49.077606201171875, 125.87764739990234, 76.10614013671875, 131.718505859375, 63.13451385498047, -1.9330997467041016, 307.31597900390625, 37.402488708496094, 152.86996459960938, 25.376754760742188, 224.328857421875, 66.5268325805664, 29.06047821044922, 108.53719329833984, 24.07168960571289, 75.73924255371094, 11.692689895629883, 44.904327392578125, -1.3366317749023438, 54.62268829345703, 14.081775665283203, 57.421669006347656, -14.90167236328125, -186.63270568847656, 96.49658203125, 12.82379150390625, 2.3873748779296875, 150.6698455810547, 11.917448043823242, 278.5148620605469, 16.06282615661621, 41.68231964111328, 140.7079315185547, 266.544921875, 6.760978698730469, -6.027576446533203, 86.18138122558594, -86.4720687866211], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000149.npy"} +{"epoch": 0.21879588839941264, "step": 150, "batch_size": 64, "mean": 63.47180938720703, "std": 94.86284637451172, "min": -83.58927917480469, "p10": -21.89505920410156, "median": 38.54088020324707, "p90": 170.87386016845704, "max": 418.6424560546875, "pos_frac": 0.8125, "sample": [293.2186279296875, 322.04901123046875, 18.32830047607422, 30.680130004882812, 60.61286926269531, -40.030067443847656, 185.7330322265625, 38.250892639160156, -79.83841705322266, 72.06806182861328, 418.6424560546875, -6.580015182495117, -18.317481994628906, 18.172149658203125, -8.380661010742188, 130.4802703857422, 19.358001708984375, 50.611610412597656, 48.5509033203125, 72.4264144897461, 38.830867767333984, 62.04033660888672, 148.80520629882812, 26.73956298828125, 108.84466552734375, 87.5064926147461, 191.4807586669922, 130.8261260986328, 15.486099243164062, 69.89532470703125, 14.348136901855469, -48.974647521972656, 84.1410140991211, 173.3190460205078, 67.95111083984375, 7.794502258300781, 165.16842651367188, 21.888593673706055, 143.05474853515625, -23.428306579589844, 92.77201843261719, -40.607215881347656, -40.7701416015625, 5.937225341796875, 118.48992156982422, 3.7037391662597656, 26.867149353027344, 63.97875213623047, 15.214500427246094, 39.224090576171875, 56.14131164550781, 69.24310302734375, 64.43132019042969, 19.097137451171875, 33.001853942871094, 349.72564697265625, 13.207128524780273, 16.510812759399414, 9.943628311157227, 32.0328369140625, 97.25977325439453, -10.909744262695312, -83.58927917480469, -0.46396636962890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000150.npy"} +{"epoch": 0.22026431718061673, "step": 151, "batch_size": 64, "mean": 63.88605499267578, "std": 79.71385955810547, "min": -146.40765380859375, "p10": -27.99817848205566, "median": 55.997718811035156, "p90": 181.56038208007814, "max": 249.3121337890625, "pos_frac": 0.828125, "sample": [-146.40765380859375, 221.0986785888672, 44.24147033691406, -2.3449325561523438, 40.25122833251953, 56.64768981933594, 65.78724670410156, 112.63809204101562, 71.70445251464844, 28.265243530273438, 180.78091430664062, -42.693359375, -29.061180114746094, 105.89137268066406, 55.410919189453125, -43.792118072509766, -6.115879058837891, 41.98082733154297, 57.22975158691406, 50.536537170410156, 231.54156494140625, 56.67561721801758, 143.67149353027344, 55.545753479003906, -52.479225158691406, 177.72247314453125, 24.950424194335938, 5.885040283203125, -25.517841339111328, 23.271194458007812, 37.06462097167969, 12.927129745483398, 19.89072036743164, 9.612321853637695, 109.51519775390625, 181.89443969726562, -62.728485107421875, 63.91344451904297, 105.54256439208984, 98.92013549804688, 8.090383529663086, 59.18054962158203, 96.27629089355469, 81.31095886230469, 12.141525268554688, 193.4599609375, 104.65089416503906, 56.449684143066406, 120.22158813476562, -6.009025573730469, 210.4626922607422, 9.323097229003906, 3.3878040313720703, 139.52749633789062, 14.629440307617188, 14.182975769042969, 201.35842895507812, 111.24303436279297, 249.3121337890625, 175.8504638671875, -50.27668762207031, 75.93717956542969, 22.719528198242188, 105.40901184082031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000151.npy"} +{"epoch": 0.22173274596182085, "step": 152, "batch_size": 64, "mean": 77.40455627441406, "std": 94.28097534179688, "min": -89.18795776367188, "p10": -37.7243927001953, "median": 60.74921989440918, "p90": 214.30006561279302, "max": 314.8253173828125, "pos_frac": 0.796875, "sample": [30.776901245117188, 53.87877655029297, 161.41387939453125, 174.63194274902344, 74.84439086914062, 204.499755859375, 60.41489791870117, 140.22576904296875, 314.8253173828125, 35.72113037109375, 153.04824829101562, 129.4169158935547, 37.45321273803711, -66.89177703857422, -3.9911651611328125, 227.72113037109375, 139.85702514648438, 49.674713134765625, 67.10342407226562, 62.22206115722656, 56.78160095214844, 52.66063690185547, -46.80510711669922, 73.33633422851562, 2.241374969482422, 309.70257568359375, 129.66513061523438, 224.70899963378906, 43.22874450683594, 21.227256774902344, 65.16950988769531, -53.242095947265625, 22.424983978271484, -7.6202392578125, 2.269502639770508, 61.08354187011719, 18.55425262451172, 193.12896728515625, 31.925819396972656, 45.86907958984375, 280.0369567871094, -24.687965393066406, 54.24091339111328, 203.0565185546875, 67.93863677978516, -30.52899169921875, 239.37709045410156, 33.15251159667969, -74.59317779541016, 107.92375183105469, 5.064542770385742, 142.24757385253906, 88.20010375976562, 71.04888916015625, 218.5001983642578, -44.19940948486328, 160.29373168945312, -89.18795776367188, -6.875312805175781, 116.30381774902344, -40.808135986328125, -29.841575622558594, 133.4400634765625, 80.63113403320312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000152.npy"} +{"epoch": 0.22320117474302498, "step": 153, "batch_size": 64, "mean": 65.91687774658203, "std": 100.3318862915039, "min": -175.88458251953125, "p10": -34.90672378540039, "median": 53.84783935546875, "p90": 194.97297363281254, "max": 410.70330810546875, "pos_frac": 0.75, "sample": [84.38493347167969, -1.9478912353515625, 9.605255126953125, -5.471824645996094, 21.307720184326172, -7.979949951171875, 27.889244079589844, 163.66452026367188, -38.5904541015625, 184.2993621826172, 92.33993530273438, 9.030708312988281, -39.874698638916016, 81.26820373535156, 241.2956085205078, -67.51117706298828, 76.54588317871094, 16.729272842407227, 55.04595947265625, 103.18819427490234, 52.64971923828125, 410.70330810546875, 38.18329620361328, 63.540565490722656, 81.6961669921875, 183.0355224609375, -27.61243438720703, -41.634483337402344, 199.54737854003906, 202.46621704101562, 140.1328887939453, 39.231285095214844, 236.197998046875, 6.241159439086914, 2.0634899139404297, 1.1024017333984375, 165.26580810546875, 94.30821990966797, -33.96818161010742, -35.308956146240234, 122.52481079101562, 14.06695556640625, 12.639297485351562, 263.57208251953125, 21.870948791503906, -15.595504760742188, -59.794830322265625, 63.978912353515625, 8.274940490722656, -175.88458251953125, 89.04908752441406, 353.1943359375, 74.91964721679688, -31.09807586669922, 67.59424591064453, -4.1974639892578125, 95.97315216064453, 44.00959777832031, 132.370849609375, 134.8035888671875, 75.61236572265625, -20.315467834472656, 62.37866973876953, 105.67245483398438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000153.npy"} +{"epoch": 0.22466960352422907, "step": 154, "batch_size": 64, "mean": 63.05192947387695, "std": 105.05284881591797, "min": -200.57623291015625, "p10": -53.813185119628905, "median": 54.552215576171875, "p90": 191.7267272949219, "max": 298.9565734863281, "pos_frac": 0.65625, "sample": [-10.916601181030273, 55.067726135253906, -200.57623291015625, 23.273767471313477, 298.9565734863281, 54.036705017089844, -9.270109176635742, 146.48724365234375, 175.90521240234375, -53.83151626586914, 92.21576690673828, 230.9051055908203, -86.74845886230469, 124.38713073730469, 117.05819702148438, 56.77096939086914, 2.3943614959716797, 20.671630859375, 36.09945297241211, -4.192560195922852, 109.94111633300781, 58.22068405151367, -5.649333953857422, 63.39921951293945, -28.18609619140625, 185.85733032226562, -53.77041244506836, 206.6873779296875, 151.16116333007812, -88.31611633300781, -33.362709045410156, 178.95208740234375, -80.09441375732422, 169.99005126953125, -49.87754821777344, 196.31900024414062, 294.3086853027344, 173.10128784179688, -4.125823974609375, 176.06040954589844, 192.89271545410156, 114.67868041992188, 18.049724578857422, 81.26406860351562, 189.00608825683594, -12.525543212890625, -16.982620239257812, -4.418039321899414, -91.08818817138672, 113.88459777832031, 155.16249084472656, -21.471923828125, 3.1725597381591797, 29.3994140625, 6.5364227294921875, -77.23643493652344, 108.30978393554688, 139.28109741210938, 119.54877471923828, -52.179443359375, -8.10821533203125, 1.1225872039794922, 81.15821838378906, 276.5565185546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000154.npy"} +{"epoch": 0.2261380323054332, "step": 155, "batch_size": 64, "mean": 63.900962829589844, "std": 94.6701431274414, "min": -210.8380584716797, "p10": -22.153705787658687, "median": 61.44193649291992, "p90": 167.96864624023445, "max": 358.04290771484375, "pos_frac": 0.75, "sample": [-7.8691864013671875, 91.06450653076172, 55.9879150390625, -6.16845703125, 94.57026672363281, 14.619964599609375, 78.7343521118164, -26.187545776367188, 75.88190460205078, 22.130020141601562, -7.59417724609375, 62.114524841308594, 101.1413345336914, -79.031005859375, 107.26961517333984, 73.24211883544922, -24.20156478881836, 47.10749053955078, 60.76934814453125, 0.515289306640625, 76.72052001953125, 66.00920104980469, 42.99165344238281, 358.04290771484375, 185.91749572753906, 8.756940841674805, 135.45758056640625, 104.4605941772461, 9.271621704101562, -119.56999206542969, 22.845394134521484, -2.2329559326171875, 23.037322998046875, 91.615478515625, 189.06143188476562, -210.8380584716797, 276.49822998046875, 44.505287170410156, 129.55165100097656, -16.85985565185547, 1.1400718688964844, 93.57225799560547, 149.266845703125, 43.214874267578125, 283.4810485839844, 94.83193969726562, 283.436767578125, 147.93707275390625, -76.14779663085938, 97.438232421875, -7.6178131103515625, 175.98370361328125, -10.87143325805664, 110.13443756103516, 88.3212890625, -48.60723114013672, 59.82440185546875, 130.68771362304688, 122.9209213256836, 10.95907974243164, 137.65309143066406, -2.480978012084961, -17.375368118286133, 72.61936950683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000155.npy"} +{"epoch": 0.2276064610866373, "step": 156, "batch_size": 64, "mean": 36.46794128417969, "std": 107.2270736694336, "min": -277.05517578125, "p10": -103.93064880371092, "median": 25.446046829223633, "p90": 135.71790161132813, "max": 284.664794921875, "pos_frac": 0.734375, "sample": [128.1805419921875, 57.89234161376953, 284.664794921875, 249.5410614013672, 171.35276794433594, 111.26934814453125, -196.51467895507812, 2.27923583984375, -89.675048828125, 20.856307983398438, -199.9597930908203, 132.0955810546875, 95.1369857788086, -23.5479736328125, -63.327640533447266, -138.01748657226562, 38.75115966796875, 108.12985229492188, 255.3667755126953, 75.82579040527344, 20.0850830078125, 123.28793334960938, 48.3961181640625, 21.067745208740234, 4.292764663696289, -7.415580749511719, 15.749122619628906, 29.84221649169922, 250.14300537109375, -18.933883666992188, 242.40289306640625, 137.27032470703125, 120.22373962402344, 66.63674926757812, 9.508209228515625, 7.790914535522461, 73.31468963623047, -119.83920288085938, -277.05517578125, -78.78369903564453, 81.54358673095703, -52.118797302246094, 3.2586517333984375, 127.08143615722656, 10.650527954101562, -110.04019165039062, 81.51676940917969, 120.27155303955078, 77.59602355957031, 10.815652847290039, -3.6816883087158203, 77.01018524169922, 58.98326873779297, 53.558815002441406, -111.6755599975586, -46.09684753417969, 10.573989868164062, 25.051162719726562, 7.1846160888671875, -1.1096954345703125, 25.840930938720703, 74.97174835205078, 6.622859954833984, 117.85519409179688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000156.npy"} +{"epoch": 0.2290748898678414, "step": 157, "batch_size": 64, "mean": 81.8030776977539, "std": 105.34732055664062, "min": -299.86138916015625, "p10": -28.51651439666748, "median": 72.6314468383789, "p90": 201.25445861816408, "max": 380.6551513671875, "pos_frac": 0.828125, "sample": [159.73434448242188, 166.65628051757812, 87.87677764892578, 194.62791442871094, 131.31402587890625, 9.337890625, 75.99356079101562, 150.0084228515625, 3.5249099731445312, 157.4693603515625, 57.19856643676758, 216.19046020507812, 50.48583984375, 87.88044738769531, 223.14404296875, 204.0944061279297, 149.60198974609375, 103.77110290527344, 41.44544982910156, 160.58181762695312, 13.300941467285156, 36.715576171875, 69.26933288574219, -29.354312896728516, 112.06503295898438, 252.53443908691406, -4.509178161621094, 31.246925354003906, -2.521392822265625, 118.89097595214844, -299.86138916015625, -37.510169982910156, 27.180740356445312, 50.439109802246094, 125.40913391113281, 80.70724487304688, 250.15206909179688, 131.64102172851562, 104.84870910644531, -31.47247886657715, 26.656940460205078, 283.30303955078125, 35.057044982910156, 59.80146789550781, 9.121173858642578, 177.48463439941406, 107.47868347167969, -26.5616512298584, 178.38568115234375, 124.89376831054688, 183.79623413085938, 52.95591735839844, -54.81620407104492, 21.4095458984375, 183.64443969726562, 152.19570922851562, 380.6551513671875, 55.348487854003906, 50.29319763183594, -140.94403076171875, 17.721466064453125, -64.97724914550781, 13.027223587036133, -20.643341064453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000157.npy"} +{"epoch": 0.2305433186490455, "step": 158, "batch_size": 64, "mean": 87.10411071777344, "std": 131.95655822753906, "min": -169.78067016601562, "p10": -43.38369026184081, "median": 51.796875, "p90": 277.84151916503913, "max": 486.9445495605469, "pos_frac": 0.71875, "sample": [9.666500091552734, 11.120046615600586, 44.471336364746094, 52.04387664794922, 51.30345916748047, -29.031402587890625, 32.16090393066406, 83.51569366455078, 37.34776306152344, -46.59078598022461, 213.67990112304688, -113.91900634765625, 253.5044708251953, 80.47962188720703, -12.810134887695312, -55.46552276611328, -15.618772506713867, 109.20399475097656, -8.740652084350586, 15.416641235351562, 229.62835693359375, -35.90046691894531, 284.047119140625, -169.78067016601562, 302.15069580078125, 73.52234649658203, 141.56475830078125, 20.47418785095215, 35.29100036621094, 11.063236236572266, 82.70840454101562, 87.37667083740234, -18.730506896972656, 150.95916748046875, -54.18984603881836, -1.9693756103515625, 336.77716064453125, 102.29254150390625, -116.47994232177734, 334.5081787109375, 417.9927978515625, -5.3508148193359375, 66.14728546142578, 486.9445495605469, 21.6571044921875, 12.857856750488281, -49.20014572143555, 119.07144165039062, 232.397705078125, 109.32158660888672, 380.00506591796875, 166.5319366455078, 98.17533874511719, 172.6070098876953, -16.275480270385742, 175.95291137695312, 100.36601257324219, -8.852701187133789, 25.530006408691406, 102.9439926147461, 263.3617858886719, 51.54987335205078, 164.07037353515625, -20.1932315826416], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000158.npy"} +{"epoch": 0.23201174743024963, "step": 159, "batch_size": 64, "mean": 59.67416000366211, "std": 105.94503784179688, "min": -208.2112274169922, "p10": -58.72546310424805, "median": 59.50743865966797, "p90": 192.63998107910157, "max": 314.3858947753906, "pos_frac": 0.75, "sample": [9.703369140625, 44.78986358642578, 141.8675994873047, -46.92625427246094, 251.23971557617188, 140.33413696289062, -208.2112274169922, -39.079402923583984, 125.16041564941406, 166.84039306640625, 91.33285522460938, -58.12882995605469, 14.266168594360352, -26.786712646484375, 88.95091247558594, -69.61035919189453, -122.65121459960938, 54.3068733215332, 188.63717651367188, 142.44961547851562, 314.3858947753906, 7.740978240966797, -44.87959289550781, 19.294021606445312, 28.678199768066406, -74.45045471191406, 20.18359375, -53.468963623046875, 87.0811767578125, 54.58961486816406, -20.79163360595703, 194.35546875, 88.74645233154297, 10.083026885986328, 120.31710052490234, -21.547903060913086, 58.42442321777344, -98.61346435546875, -4.138786315917969, 230.29898071289062, 146.23043823242188, 221.7841796875, 61.738441467285156, 71.83148193359375, 1.5124435424804688, 277.026611328125, 12.763605117797852, 1.2852458953857422, 73.62732696533203, 60.5904541015625, 135.24908447265625, 29.048667907714844, -58.981163024902344, -164.83334350585938, 69.4139175415039, 96.31523132324219, 97.35086822509766, 103.07821655273438, 28.279626846313477, 303.7242126464844, 187.67230224609375, 64.06515502929688, 70.02175903320312, 125.57794189453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000159.npy"} +{"epoch": 0.23348017621145375, "step": 160, "batch_size": 64, "mean": 79.51667785644531, "std": 129.3326873779297, "min": -150.59056091308594, "p10": -68.55548477172852, "median": 63.48670196533203, "p90": 241.99942626953126, "max": 464.6781005859375, "pos_frac": 0.703125, "sample": [106.22396087646484, -11.49165153503418, 181.75457763671875, 154.34605407714844, -12.186748504638672, -70.43228149414062, 188.81643676757812, 186.8223419189453, 169.04339599609375, 12.455547332763672, -80.19326782226562, -8.343093872070312, 138.93276977539062, 239.87033081054688, 119.00350189208984, -72.12677001953125, 39.24528121948242, -36.39170837402344, -142.29908752441406, 228.53919982910156, 284.26141357421875, -116.61308288574219, 242.91189575195312, -64.03617095947266, 141.96319580078125, 44.82960510253906, 6.555610656738281, -14.661422729492188, 464.6781005859375, 62.43214416503906, 15.376861572265625, 64.541259765625, -30.52239990234375, 387.0714111328125, 1.85296630859375, 74.0106201171875, 107.2820816040039, 163.42701721191406, 372.132080078125, 304.18402099609375, 32.67637634277344, 217.436767578125, -150.59056091308594, 203.57470703125, 94.3259048461914, 16.202850341796875, -27.403108596801758, 21.048917770385742, 111.57186126708984, 114.24870300292969, -36.996238708496094, 32.473838806152344, 112.38554382324219, -64.1762924194336, 91.74282836914062, 80.62432861328125, -9.81059455871582, 0.00299835205078125, -97.6690902709961, 154.6090087890625, 28.644447326660156, 251.82122802734375, -43.84051513671875, 112.89747619628906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000160.npy"} +{"epoch": 0.23494860499265785, "step": 161, "batch_size": 64, "mean": 69.5224380493164, "std": 103.48494720458984, "min": -135.74172973632812, "p10": -48.67658233642578, "median": 49.45800971984863, "p90": 232.22120208740242, "max": 318.586669921875, "pos_frac": 0.75, "sample": [74.75606536865234, 195.85931396484375, -49.815521240234375, 131.62477111816406, 298.0411376953125, -135.74172973632812, -52.994781494140625, -66.93475341796875, 96.50230407714844, -43.66332244873047, 45.343963623046875, 93.77864837646484, 78.4654541015625, -70.47478485107422, 4.279514312744141, 118.04408264160156, 318.586669921875, 82.63313293457031, 141.1278076171875, 265.69415283203125, 15.880683898925781, 56.64133834838867, -7.304389953613281, -34.423439025878906, 37.77128601074219, -10.403018951416016, 127.77301788330078, 38.43645477294922, 277.53826904296875, 297.8089904785156, 121.91380310058594, -32.92125701904297, -0.7756614685058594, -28.27303695678711, 241.32635498046875, 117.79076385498047, 33.678802490234375, -46.01905822753906, 15.108566284179688, 80.44622802734375, -11.171493530273438, 24.28314971923828, 11.415931701660156, 32.66229248046875, 17.82190704345703, 27.592681884765625, 206.07461547851562, 73.39569854736328, 15.929603576660156, 141.61190795898438, 210.97584533691406, -105.55693054199219, 73.32206726074219, 168.79513549804688, 46.898155212402344, 41.399688720703125, 32.516239166259766, 126.49815368652344, 89.53501892089844, -89.19725036621094, 52.01786422729492, 252.9334716796875, 112.57861328125, 69.99686431884766], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000161.npy"} +{"epoch": 0.23641703377386197, "step": 162, "batch_size": 64, "mean": 69.31608581542969, "std": 110.30030059814453, "min": -180.29393005371094, "p10": -41.98650283813476, "median": 60.63106918334961, "p90": 199.01107482910157, "max": 443.7065734863281, "pos_frac": 0.75, "sample": [-0.5768814086914062, 53.44204330444336, 104.45840454101562, 241.64341735839844, 56.29864501953125, 219.68771362304688, 29.902311325073242, 135.3688201904297, 59.315345764160156, -7.967077255249023, -20.331165313720703, 195.83050537109375, 69.8980941772461, -105.06688690185547, -39.08941650390625, -18.8963623046875, 160.02960205078125, 163.55404663085938, -4.877166748046875, 122.53025817871094, 82.36397552490234, 27.745216369628906, 52.23005676269531, 7.055206298828125, 21.078405380249023, 200.37417602539062, 130.69046020507812, -32.40690612792969, -180.29393005371094, 145.3821563720703, 103.16130828857422, -112.08192443847656, -14.761064529418945, 19.366735458374023, 147.44784545898438, 69.21757507324219, 231.17807006835938, 443.7065734863281, 64.03225708007812, 123.60039520263672, 85.47437286376953, 93.99647521972656, 156.0597381591797, 167.69480895996094, 144.48182678222656, 4.2658233642578125, -43.228111267089844, 0.29537200927734375, 90.49801635742188, 43.82158660888672, -89.26911926269531, 164.06201171875, 0.5667572021484375, -93.72576141357422, 384.9212646484375, 23.488876342773438, -113.5337142944336, 29.600296020507812, 76.85614013671875, 82.91742706298828, 61.94679260253906, -32.79052734375, 38.587738037109375, 215.0006103515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000162.npy"} +{"epoch": 0.23788546255506607, "step": 163, "batch_size": 64, "mean": 66.97450256347656, "std": 109.43936157226562, "min": -161.67919921875, "p10": -49.3012710571289, "median": 61.109683990478516, "p90": 253.3556823730469, "max": 347.054931640625, "pos_frac": 0.671875, "sample": [-12.139579772949219, -7.2708892822265625, 262.69366455078125, 347.054931640625, -26.960575103759766, -120.53945922851562, 98.15253448486328, -8.905471801757812, -8.764907836914062, 78.30412292480469, 130.6109619140625, -35.521034240722656, 103.51956939697266, 316.50048828125, 107.63117218017578, 18.673187255859375, 94.78533172607422, -18.498580932617188, 15.467079162597656, -62.49695587158203, 55.80207824707031, 157.17462158203125, 105.29507446289062, -37.94422149658203, 107.43460083007812, -1.5083770751953125, 121.97189331054688, -161.67919921875, -66.88643646240234, 106.24849700927734, 257.708740234375, 13.592113494873047, 136.40130615234375, 12.551763534545898, 70.1473617553711, -10.544239044189453, -52.80757141113281, 298.38067626953125, 127.6543197631836, 70.34092712402344, -71.354248046875, 33.13164520263672, -41.119903564453125, 243.19854736328125, 43.71012496948242, 104.93582153320312, -32.490028381347656, 302.29833984375, 148.3749237060547, 30.0048885345459, 61.00282287597656, 109.02130126953125, 61.24980163574219, 175.47665405273438, 61.21654510498047, -24.621170043945312, 81.4727554321289, 14.88250732421875, -95.17464447021484, 1.8885326385498047, 258.2104187011719, 91.28469848632812, 158.6983642578125, -10.559957504272461], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000163.npy"} +{"epoch": 0.2393538913362702, "step": 164, "batch_size": 64, "mean": 82.75531005859375, "std": 104.64041137695312, "min": -180.34097290039062, "p10": -21.13095531463623, "median": 68.37448120117188, "p90": 199.94809875488284, "max": 429.48455810546875, "pos_frac": 0.828125, "sample": [-137.98902893066406, 165.11090087890625, 164.32028198242188, 164.821533203125, 225.39797973632812, 53.61469268798828, 79.59831237792969, 129.63511657714844, 68.93611145019531, 76.81366729736328, 243.54449462890625, 49.24464416503906, 21.103715896606445, 50.603515625, -62.62690734863281, 128.9029541015625, 63.68816375732422, 65.11331939697266, -22.612581253051758, 67.73079681396484, 5.000089645385742, 133.0472412109375, 73.38480377197266, 194.14334106445312, 32.48271942138672, 150.52313232421875, -23.485912322998047, -88.88236236572266, 150.27880859375, 15.425775527954102, 45.44017028808594, -17.59851837158203, 149.34861755371094, 181.76348876953125, 139.33547973632812, 113.46266174316406, 55.377769470214844, 103.61851501464844, 31.17333221435547, 164.71424865722656, 39.06262969970703, 88.2462158203125, 67.81285095214844, 33.134490966796875, 45.21246337890625, 0.909820556640625, 72.90556335449219, 33.37346649169922, -180.34097290039062, 138.29417419433594, 76.72605895996094, 38.490142822265625, 334.7912292480469, 6.820343017578125, 342.12432861328125, 202.43585205078125, -1.3624420166015625, 216.0386962890625, 80.90385437011719, -55.66508483886719, -17.673828125, 104.83963012695312, 429.48455810546875, -3.7295684814453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000164.npy"} +{"epoch": 0.24082232011747431, "step": 165, "batch_size": 64, "mean": 54.09429931640625, "std": 100.5995864868164, "min": -158.5150604248047, "p10": -76.75446701049805, "median": 39.637046813964844, "p90": 203.2449981689454, "max": 282.136962890625, "pos_frac": 0.703125, "sample": [92.30752563476562, 90.24571990966797, -87.13752746582031, 18.97637939453125, -93.54248046875, -2.1565628051757812, 78.1643295288086, 112.76142883300781, -14.655929565429688, -81.49610137939453, 130.85159301757812, -148.17080688476562, -75.02986145019531, 37.67186737060547, 33.80711364746094, -95.1075210571289, -8.146671295166016, -48.344757080078125, 154.817138671875, 154.48062133789062, 82.24021911621094, 24.623626708984375, 16.118961334228516, 282.136962890625, -158.5150604248047, 39.14385986328125, -18.17518424987793, -46.91563415527344, -1.642364501953125, 138.29473876953125, -77.49358367919922, 17.660734176635742, 210.05517578125, 182.89651489257812, 49.300689697265625, -25.17346954345703, 79.8660659790039, 60.3240966796875, 77.02750396728516, 4.18522834777832, 40.13023376464844, 63.840003967285156, 157.4480743408203, 78.627197265625, 217.38864135742188, -18.512969970703125, 96.89723205566406, 60.6700439453125, 7.405860900878906, -70.38616943359375, 31.534423828125, 227.3531494140625, 26.27660369873047, -42.879737854003906, 60.996219635009766, 267.147216796875, 28.715118408203125, 266.16851806640625, 223.21978759765625, 121.68754577636719, 117.31953430175781, 13.820182800292969, 113.55911254882812, 187.35458374023438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000165.npy"} +{"epoch": 0.2422907488986784, "step": 166, "batch_size": 64, "mean": 99.35612487792969, "std": 140.33229064941406, "min": -92.65542602539062, "p10": -33.83179512023925, "median": 67.42060089111328, "p90": 273.2290679931642, "max": 574.2780151367188, "pos_frac": 0.796875, "sample": [453.5870361328125, 310.77484130859375, 32.235931396484375, 487.10357666015625, -37.62836456298828, 5.559429168701172, 19.91610336303711, 70.22097778320312, 171.70025634765625, 46.07806396484375, 126.59861755371094, -11.018295288085938, 10.873613357543945, 160.7397003173828, -65.97222900390625, 83.20497131347656, 159.80599975585938, 13.508331298828125, -8.513744354248047, 193.06710815429688, 3.0469608306884766, 574.2780151367188, 43.7295036315918, -50.79738998413086, -50.68121337890625, -50.879825592041016, 41.84727478027344, 146.51693725585938, -92.65542602539062, 239.68740844726562, 287.60406494140625, 469.3785705566406, 110.94709014892578, 71.57246398925781, 27.226852416992188, 368.8623046875, 11.628864288330078, 203.79385375976562, 26.5184326171875, 157.2769012451172, -19.333194732666016, 87.64891052246094, 18.458053588867188, 11.616127014160156, 168.68862915039062, 131.90354919433594, 225.5569610595703, -7.404956817626953, 77.05779266357422, 169.2545166015625, 31.67612075805664, 83.94569396972656, -62.218841552734375, 64.62022399902344, -6.669036865234375, -24.973133087158203, 113.51447296142578, 1.732208251953125, 119.61203002929688, 5.626861572265625, 93.47830200195312, 18.840856552124023, 206.81436157226562, 88.60189056396484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000166.npy"} +{"epoch": 0.24375917767988253, "step": 167, "batch_size": 64, "mean": 105.8395767211914, "std": 117.45293426513672, "min": -140.01513671875, "p10": -18.0773681640625, "median": 90.33937454223633, "p90": 260.4908264160157, "max": 431.74078369140625, "pos_frac": 0.828125, "sample": [103.54214477539062, 63.604373931884766, 146.00888061523438, 212.1926727294922, 111.94784545898438, 125.34742736816406, 95.74310302734375, 69.53184509277344, 98.54399108886719, 43.86616134643555, 431.74078369140625, 186.3111572265625, 71.85477447509766, -17.257781982421875, 23.21392822265625, 74.20515441894531, 83.56482696533203, -59.37084197998047, 131.93313598632812, 185.12753295898438, 23.93572425842285, 40.58750915527344, 79.21315002441406, 58.45716857910156, 105.42430114746094, -140.01513671875, 278.5299072265625, 246.53372192382812, -18.428619384765625, 173.0789794921875, -11.458833694458008, 107.09854125976562, 78.95458984375, 70.29521942138672, 376.535888671875, 175.73333740234375, 270.1706848144531, 84.9356460571289, 206.0802459716797, -126.86300659179688, 232.9504852294922, 266.4724426269531, 334.9249572753906, 149.56399536132812, 408.25177001953125, -14.178970336914062, 181.84288024902344, 149.2381591796875, 219.734619140625, 175.47970581054688, 104.37153625488281, 13.001205444335938, -2.3576488494873047, 31.932723999023438, 66.02880096435547, 136.64114379882812, 105.1851577758789, 58.744850158691406, 22.060012817382812, -100.50450897216797, 24.277759552001953, 14.374399185180664, -44.94239807128906, -19.810272216796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000167.npy"} +{"epoch": 0.24522760646108663, "step": 168, "batch_size": 64, "mean": 76.53134155273438, "std": 105.47482299804688, "min": -85.82365417480469, "p10": -51.78319282531738, "median": 54.31381034851074, "p90": 198.0081970214844, "max": 427.7503662109375, "pos_frac": 0.796875, "sample": [16.631210327148438, 112.81270599365234, 27.252967834472656, 20.061603546142578, 5.661623001098633, 33.37119674682617, 138.20468139648438, -47.32015609741211, 19.04141616821289, 11.744300842285156, -85.82365417480469, 189.3270721435547, 427.7503662109375, 94.99845886230469, -53.6959228515625, 195.7357177734375, 180.37637329101562, 238.41734313964844, 198.98211669921875, 48.83885955810547, 219.10684204101562, 273.06622314453125, 178.77212524414062, 175.00125122070312, -46.588722229003906, -57.79411315917969, -17.563560485839844, 128.42799377441406, 148.7082977294922, 137.30960083007812, 84.78739929199219, 27.23992919921875, -63.32215118408203, 99.03366088867188, 38.82093811035156, 116.73513793945312, -70.76715087890625, -10.645687103271484, -31.976154327392578, 30.416061401367188, 29.028261184692383, 89.56267547607422, 65.21279907226562, 94.77159118652344, 114.32249450683594, 69.50200653076172, 27.741300582885742, -61.20899200439453, 268.5356750488281, 56.62177276611328, 159.9774932861328, 22.17560386657715, 27.161142349243164, 26.523208618164062, 127.04895782470703, 59.38484191894531, 28.25459098815918, -83.38253021240234, 52.0058479309082, -32.178436279296875, 378.4818420410156, 16.60137176513672, 94.09054565429688, 136.63589477539062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000168.npy"} +{"epoch": 0.24669603524229075, "step": 169, "batch_size": 64, "mean": 66.98680114746094, "std": 130.0533447265625, "min": -276.7462158203125, "p10": -67.96896743774413, "median": 43.72690773010254, "p90": 221.20569610595706, "max": 563.3004150390625, "pos_frac": 0.75, "sample": [76.28128814697266, 86.99462890625, 4.952724456787109, 17.578529357910156, 215.97166442871094, 112.80123138427734, 18.830490112304688, 159.43190002441406, 90.59236907958984, 38.00875473022461, -0.8367195129394531, 44.527915954589844, -276.7462158203125, 1.314706802368164, 58.24154281616211, -24.818710327148438, 28.230804443359375, -84.65406799316406, 86.48318481445312, 165.82989501953125, 149.34901428222656, -55.557708740234375, 65.5216064453125, -88.46073913574219, 208.51512145996094, 223.4488525390625, 68.10806274414062, 5.7250518798828125, -16.893814086914062, 30.068214416503906, 215.04144287109375, -7.967567443847656, 19.59752082824707, 70.1307601928711, 91.57955932617188, 128.32516479492188, 253.27706909179688, 25.91548728942871, 66.81643676757812, 230.97227478027344, 5.442205429077148, 388.2625732421875, -124.41178131103516, 92.11700439453125, 3.7693252563476562, 54.941322326660156, 29.811447143554688, -73.28807830810547, 337.54559326171875, -16.16912078857422, 563.3004150390625, -3.9211273193359375, 93.28447723388672, 15.143287658691406, 287.48779296875, -9.5174560546875, 82.91331481933594, 18.730422973632812, 42.925899505615234, -25.39525604248047, -122.93299865722656, 92.42362976074219, 196.0880889892578, -143.92306518554688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000169.npy"} +{"epoch": 0.24816446402349487, "step": 170, "batch_size": 64, "mean": 79.06198120117188, "std": 106.79735565185547, "min": -225.87057495117188, "p10": -32.792518615722656, "median": 64.7021255493164, "p90": 219.3711151123047, "max": 327.4330139160156, "pos_frac": 0.828125, "sample": [187.76934814453125, -136.03488159179688, 140.53826904296875, 82.0031509399414, 217.99920654296875, 80.44866943359375, 121.60330200195312, 12.619163513183594, 142.69635009765625, 5.2996368408203125, 91.69849395751953, -2.5084190368652344, 74.23194885253906, -37.69898986816406, 9.59501838684082, -49.05662536621094, 115.15007781982422, 234.84414672851562, 17.499435424804688, 304.93218994140625, 4.935020446777344, -21.161001205444336, 176.82261657714844, 48.3819694519043, 2.1078414916992188, 18.24140167236328, -225.87057495117188, -32.39026641845703, 69.94903564453125, 197.94692993164062, 70.17637634277344, 126.78636169433594, 55.260074615478516, 76.29341125488281, 178.92825317382812, -50.280738830566406, 313.1411437988281, 6.115394592285156, 10.625608444213867, 327.4330139160156, 74.8183364868164, -95.9468994140625, 58.19780349731445, 121.36819458007812, 202.38232421875, -32.96491241455078, 26.52397346496582, 1.389841079711914, 38.55199432373047, 59.45521545410156, 89.42154693603516, 45.647544860839844, 234.72488403320312, 42.940731048583984, 133.59674072265625, 54.864776611328125, 186.5785369873047, 219.95907592773438, -18.083667755126953, 191.15762329101562, 232.24740600585938, 12.712594985961914, 193.30288696289062, 20.049114227294922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000170.npy"} +{"epoch": 0.24963289280469897, "step": 171, "batch_size": 64, "mean": 63.538963317871094, "std": 133.1363983154297, "min": -267.56365966796875, "p10": -89.32990493774413, "median": 59.53072738647461, "p90": 213.01500244140627, "max": 581.0468139648438, "pos_frac": 0.703125, "sample": [-82.10015869140625, 222.3173065185547, 16.65361785888672, 92.63883972167969, -102.77147674560547, -49.475341796875, 6.640838623046875, 106.72259521484375, -78.56488037109375, 135.82302856445312, 12.327411651611328, 155.60372924804688, 85.97807312011719, 110.23811340332031, 52.59520721435547, -77.10575103759766, 98.70606994628906, -99.51478576660156, 110.04275512695312, -6.794654846191406, 205.4569091796875, 99.30841064453125, 51.955780029296875, 87.5998306274414, 326.1554870605469, 68.42472839355469, -92.7939224243164, 581.0468139648438, -92.4283676147461, 48.84678649902344, 67.95701599121094, -267.56365966796875, 420.45977783203125, 142.83706665039062, 85.69297790527344, -119.84259033203125, 208.57809448242188, 104.66943359375, 11.300491333007812, 300.20159912109375, 3.55645751953125, 67.26126861572266, 214.91653442382812, -25.25611686706543, -94.33118438720703, 13.81536865234375, 66.46624755859375, 86.53480529785156, -59.20852279663086, 104.05705261230469, -25.43260955810547, -57.92237854003906, 135.2867431640625, 42.76349639892578, 79.37380981445312, -11.010560989379883, 34.09979248046875, 155.21939086914062, -20.866783142089844, -9.452056884765625, 88.22936248779297, 16.86541748046875, 265.27935791015625, 48.42551803588867], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000171.npy"} +{"epoch": 0.2511013215859031, "step": 172, "batch_size": 64, "mean": 67.64393615722656, "std": 111.52900695800781, "min": -197.2257080078125, "p10": -69.25734634399413, "median": 73.9396858215332, "p90": 193.76623382568363, "max": 459.6619873046875, "pos_frac": 0.703125, "sample": [-41.272926330566406, 101.99439239501953, -106.01052856445312, 90.34697723388672, 36.34820556640625, -46.600669860839844, 148.54995727539062, 100.84759521484375, 74.70928192138672, -74.6277084350586, -123.74559020996094, 51.75046920776367, 50.11870574951172, -78.87652587890625, 149.73623657226562, 88.28390502929688, 20.207012176513672, 69.11410522460938, -46.986080169677734, 67.61255645751953, -32.664283752441406, 95.67498779296875, 184.7940673828125, 145.237548828125, 20.055091857910156, 73.17008972167969, 230.66848754882812, 196.6287078857422, -75.62979125976562, 247.6201171875, -27.933609008789062, 111.30281829833984, 101.31836700439453, 91.29217529296875, 119.34722900390625, 303.8686218261719, 459.6619873046875, 43.22925567626953, 119.39376068115234, 31.543468475341797, -19.259536743164062, -44.79151153564453, 77.51162719726562, 78.16468048095703, 187.08712768554688, -25.869415283203125, 13.345273971557617, 23.350698471069336, -30.338394165039062, 170.55740356445312, 158.03135681152344, -4.66632080078125, -56.72650146484375, 103.17256164550781, 107.8042221069336, -197.2257080078125, 66.88897705078125, 100.41046142578125, 230.69769287109375, 93.52978515625, -17.220252990722656, -78.66239166259766, 242.10928344726562, 181.23251342773438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000172.npy"} +{"epoch": 0.2525697503671072, "step": 173, "batch_size": 64, "mean": 66.7756118774414, "std": 96.24974822998047, "min": -184.03781127929688, "p10": -25.491563796997063, "median": 50.05641174316406, "p90": 205.71159973144538, "max": 348.30926513671875, "pos_frac": 0.765625, "sample": [45.11476135253906, 75.19837188720703, 36.5914306640625, -7.931755065917969, -2.297191619873047, 43.58564758300781, 0.024593353271484375, 136.25914001464844, 117.38591003417969, 9.360122680664062, -18.613525390625, -41.697601318359375, 15.95869255065918, -3.8788375854492188, 82.7706298828125, 263.76397705078125, 69.49715423583984, 56.91330337524414, 67.85598754882812, 189.95034790039062, 114.61184692382812, 83.29229736328125, 81.94723510742188, -53.48011016845703, 51.410606384277344, 26.286582946777344, 348.30926513671875, 28.62944793701172, 157.58349609375, 212.21881103515625, 64.96754455566406, 20.389211654663086, -63.3280029296875, 111.89086151123047, 103.74281311035156, -19.152633666992188, 119.53844451904297, 9.30267333984375, -2.1822376251220703, 232.09628295898438, 65.43788146972656, 190.52810668945312, -28.208248138427734, 39.897727966308594, -8.813194274902344, 16.16824722290039, 235.13363647460938, 116.24940490722656, -46.75178527832031, -2.829833984375, 93.33280181884766, 48.70221710205078, 129.20155334472656, -68.50390625, -184.03781127929688, 330.30218505859375, 35.771202087402344, 5.300439834594727, 93.69256591796875, 19.88113021850586, 270.0771179199219, 59.84516906738281, 56.28541946411133, 43.0914306640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000173.npy"} +{"epoch": 0.2540381791483113, "step": 174, "batch_size": 64, "mean": 79.48760986328125, "std": 119.0953598022461, "min": -164.3258514404297, "p10": -48.7787841796875, "median": 59.65779113769531, "p90": 257.36902923583995, "max": 461.0216979980469, "pos_frac": 0.765625, "sample": [21.364578247070312, -15.7557373046875, -49.28166961669922, -92.85350036621094, -2.8642120361328125, 7.253776550292969, -12.978748321533203, 9.900609970092773, 113.74384307861328, 200.60606384277344, 232.2491912841797, -0.5658035278320312, 66.54679107666016, 18.28502655029297, 59.013145446777344, -39.874183654785156, 125.11979675292969, 167.71958923339844, 143.6991729736328, 268.1346740722656, 54.76708984375, 33.1930046081543, 60.30243682861328, -16.329206466674805, 327.1883850097656, -84.05783081054688, 133.9375, 296.93603515625, 28.1910400390625, 93.58056640625, 11.439725875854492, -14.212108612060547, 62.486839294433594, -80.50129699707031, 137.7008056640625, -47.605384826660156, 162.70864868164062, 107.0009994506836, -125.78549194335938, 274.43487548828125, 313.49017333984375, 332.7065124511719, 32.94176483154297, 125.58355712890625, 15.940774917602539, -164.3258514404297, 3.609567642211914, 129.9034423828125, 122.13652038574219, 140.1182861328125, 83.15013122558594, 48.05348205566406, 22.822471618652344, 140.23919677734375, -92.64582061767578, 187.7132110595703, 148.91770935058594, 96.4306640625, 46.74121856689453, 13.198558807373047, 461.0216979980469, 119.36456298828125, 92.19581604003906, 33.060447692871094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000174.npy"} +{"epoch": 0.2555066079295154, "step": 175, "batch_size": 64, "mean": 109.42562866210938, "std": 109.26695251464844, "min": -90.48745727539062, "p10": -9.744813346862792, "median": 86.40789413452148, "p90": 243.76709289550783, "max": 436.1126708984375, "pos_frac": 0.84375, "sample": [22.33150291442871, 37.329010009765625, 58.883323669433594, -90.48745727539062, 86.45988464355469, 165.57101440429688, 229.40179443359375, 86.35590362548828, 23.35663604736328, 147.22447204589844, 309.42681884765625, -69.95317077636719, -10.454559326171875, 311.8235778808594, 114.42219543457031, 31.185272216796875, 214.8741455078125, 232.2630157470703, 235.6063232421875, 156.35025024414062, 185.78512573242188, -65.7910385131836, 195.5602264404297, 436.1126708984375, 42.50151062011719, 135.88461303710938, 317.76519775390625, 47.378684997558594, 139.40182495117188, 81.32405853271484, -11.152885437011719, 55.90423583984375, 308.2157897949219, 19.02200698852539, 8.542655944824219, 35.45262908935547, -2.7260475158691406, 263.0758361816406, 8.004400253295898, 82.6883773803711, 129.23072814941406, 117.89404296875, 123.66313934326172, 208.31967163085938, -0.45645713806152344, 220.47817993164062, 238.00091552734375, 188.7317657470703, -26.617969512939453, 107.9677963256836, 177.41131591796875, 91.18515014648438, 86.26856994628906, 154.85574340820312, 41.649444580078125, 70.31889343261719, 144.51039123535156, -8.088739395141602, 43.531700134277344, 246.23831176757812, -53.054054260253906, 68.63007354736328, 0.2984466552734375, 57.353424072265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000175.npy"} +{"epoch": 0.25697503671071953, "step": 176, "batch_size": 64, "mean": 91.84109497070312, "std": 120.14725494384766, "min": -64.30500793457031, "p10": -32.21166458129883, "median": 64.30959129333496, "p90": 227.912223815918, "max": 499.69915771484375, "pos_frac": 0.78125, "sample": [14.668914794921875, 28.336196899414062, 100.04417419433594, 222.7571258544922, 62.1881217956543, 29.352558135986328, 12.127899169921875, 330.4628601074219, 17.795364379882812, -27.7113037109375, -31.699134826660156, 476.4566955566406, 17.450632095336914, -64.30500793457031, 230.50494384765625, 69.9774169921875, 31.651275634765625, 217.7361297607422, 177.89068603515625, -3.5072479248046875, -9.094314575195312, 71.96814727783203, 143.18101501464844, -47.54920959472656, 230.12155151367188, 2.274026870727539, 29.869043350219727, 197.68663024902344, 182.7978515625, 7.188941955566406, 119.68049621582031, -62.210906982421875, 152.07009887695312, -47.52818298339844, 37.532470703125, 169.8167724609375, -46.09184646606445, 79.9388198852539, 181.36361694335938, 134.9381866455078, 191.83731079101562, 0.4934520721435547, -44.17301940917969, -2.714130401611328, 148.66961669921875, 139.6537322998047, 132.00491333007812, 69.0013198852539, 136.18795776367188, 288.0315246582031, 34.64685821533203, -32.43132019042969, 20.997482299804688, 17.37147331237793, 138.1483612060547, 66.43106079101562, -18.411392211914062, 15.400203704833984, 134.27101135253906, -0.23562240600585938, 164.8306121826172, 499.69915771484375, 16.822601318359375, 321.16583251953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000176.npy"} +{"epoch": 0.25844346549192365, "step": 177, "batch_size": 64, "mean": 90.87483215332031, "std": 120.2770767211914, "min": -109.20533752441406, "p10": -25.93174209594726, "median": 63.101768493652344, "p90": 252.2091842651368, "max": 581.9981689453125, "pos_frac": 0.75, "sample": [167.57162475585938, 47.427154541015625, 235.1686553955078, 125.0119400024414, 66.58100891113281, 127.68229675292969, -109.20533752441406, 92.07853698730469, 191.71270751953125, 170.091064453125, 259.51226806640625, 43.303504943847656, 89.01506042480469, 264.5694580078125, 129.8927001953125, 78.82460021972656, 21.16693878173828, 364.2300720214844, 29.676780700683594, 178.4788818359375, 59.622528076171875, 175.81126403808594, -76.05218505859375, 150.1978759765625, -3.3647899627685547, 43.678466796875, 203.26470947265625, -22.31195831298828, -2.91461181640625, 1.630950927734375, 55.427574157714844, 289.83843994140625, 103.19046020507812, -19.469207763671875, 11.788787841796875, 83.16752624511719, -19.970489501953125, 260.604736328125, 0.2106037139892578, -30.199003219604492, -83.82044982910156, 159.12213134765625, -4.711330413818359, 51.30829620361328, -83.03480529785156, 156.35325622558594, 130.8352508544922, 15.99947738647461, -8.274116516113281, -5.14996337890625, 203.3424530029297, 9.141969680786133, 84.39218139648438, -13.901744842529297, 185.23468017578125, 38.062313079833984, 37.91639709472656, -28.72919273376465, 163.50625610351562, 46.97553253173828, 581.9981689453125, 289.506103515625, 80.45809173583984, -27.483078002929688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000177.npy"} +{"epoch": 0.2599118942731278, "step": 178, "batch_size": 64, "mean": 114.88732147216797, "std": 143.4801788330078, "min": -147.6109619140625, "p10": -35.21310348510741, "median": 93.71886825561523, "p90": 321.5360229492189, "max": 559.1182861328125, "pos_frac": 0.796875, "sample": [49.2655143737793, 23.514617919921875, 158.10269165039062, -9.164093017578125, 139.56536865234375, 93.37212371826172, 112.6077651977539, 38.640769958496094, 18.302745819091797, 297.38739013671875, 0.13994598388671875, 110.61434936523438, 279.79241943359375, 110.7291259765625, 50.694183349609375, 162.57086181640625, 405.7373046875, 116.96560668945312, 559.1182861328125, -56.78218078613281, 210.34410095214844, 60.858299255371094, 13.483245849609375, 68.3922348022461, -10.113100051879883, 182.05555725097656, 381.4115295410156, -51.26287078857422, 53.8096923828125, 0.7902908325195312, -75.72508239746094, 94.06561279296875, 223.72540283203125, 23.606124877929688, 127.12963104248047, 192.1561737060547, 135.9356689453125, -96.49232482910156, 68.6381607055664, 268.6238708496094, 32.3109245300293, 117.778076171875, -147.6109619140625, 445.1012878417969, -10.193199157714844, 159.53317260742188, 117.12025451660156, 161.7412109375, -39.44965362548828, 455.7757263183594, 34.24114990234375, 142.4251708984375, 351.1546325683594, -4.089227676391602, 21.3603515625, 221.90220642089844, 93.02197265625, -25.32781982421875, 64.93873596191406, 221.46566772460938, 331.88543701171875, -90.82954406738281, 178.9384765625, -13.01287841796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000178.npy"} +{"epoch": 0.26138032305433184, "step": 179, "batch_size": 64, "mean": 81.46770477294922, "std": 141.28077697753906, "min": -272.66778564453125, "p10": -82.86427307128905, "median": 77.42520523071289, "p90": 302.96589355468757, "max": 385.9840393066406, "pos_frac": 0.6875, "sample": [120.25371551513672, 162.09130859375, 181.32681274414062, 385.9840393066406, 63.69664764404297, -94.83082580566406, 185.1058807373047, 24.727489471435547, -11.242843627929688, 14.915075302124023, 100.76564025878906, 59.90873718261719, 49.09803771972656, 92.67662811279297, -22.12848663330078, 102.39552307128906, -75.33480834960938, 312.0858459472656, -55.11982345581055, -51.77105712890625, 77.15879821777344, -86.0911865234375, 361.1108703613281, 22.95293617248535, 230.35939025878906, 126.3782958984375, 163.1620635986328, 313.79107666015625, 169.64796447753906, 30.9879150390625, 82.33650207519531, 283.5009460449219, 344.36077880859375, 97.16519165039062, 77.15926361083984, 130.5341796875, -38.220947265625, -27.533069610595703, -0.7141494750976562, 86.1415023803711, 85.5258560180664, -35.545372009277344, 77.69114685058594, -93.67530822753906, 130.13189697265625, 345.1254577636719, -96.46349334716797, 251.8174591064453, 114.34737396240234, -133.26116943359375, -52.81262969970703, 239.70741271972656, 33.46925354003906, 249.863037109375, 311.3080139160156, 216.7528533935547, -160.08245849609375, 164.76138305664062, -60.372901916503906, -73.48336791992188, -272.66778564453125, 16.153594970703125, 23.54474449157715, -56.693443298339844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000179.npy"} +{"epoch": 0.26284875183553597, "step": 180, "batch_size": 64, "mean": 104.57914733886719, "std": 126.15655517578125, "min": -201.7624053955078, "p10": -28.3197919845581, "median": 87.63663101196289, "p90": 266.47852783203126, "max": 503.383056640625, "pos_frac": 0.828125, "sample": [98.84490203857422, 164.32369995117188, -74.60459899902344, 1.7825279235839844, -65.58895111083984, 503.383056640625, -24.55135726928711, 68.75520324707031, 104.33428955078125, 59.77769470214844, 7.4621429443359375, 234.527587890625, -201.7624053955078, 137.96957397460938, 185.49070739746094, 121.05511474609375, 85.81575012207031, 85.89289855957031, 105.9726791381836, 64.20988464355469, 169.28012084960938, -146.3801727294922, 69.86088562011719, 296.11163330078125, 75.48905181884766, 231.85797119140625, 158.29867553710938, 4.6359100341796875, 40.20457458496094, 54.64998245239258, 280.22052001953125, 3.8637847900390625, 256.8854675292969, 170.77691650390625, 79.00582122802734, -4.921285629272461, 75.6901626586914, 207.51638793945312, -23.011337280273438, 178.84268188476562, 87.3144760131836, 20.352943420410156, 252.28854370117188, 69.92730712890625, -130.11959838867188, 87.95878601074219, 1.3458251953125, 267.9324951171875, 236.9935760498047, 92.4801254272461, 139.07664489746094, 263.0859375, -29.93483543395996, 338.28546142578125, 94.02945709228516, -23.612762451171875, 194.21792602539062, 158.3773193359375, 28.709632873535156, -73.10138702392578, 268.8966369628906, 286.51678466796875, 29.16637420654297, 190.90969848632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000180.npy"} +{"epoch": 0.2643171806167401, "step": 181, "batch_size": 64, "mean": 64.92919921875, "std": 116.48666381835938, "min": -205.09262084960938, "p10": -57.13630599975586, "median": 45.409767150878906, "p90": 216.84354705810554, "max": 389.651611328125, "pos_frac": 0.765625, "sample": [130.88206481933594, -60.29564666748047, 15.298131942749023, 67.02491760253906, 112.5565414428711, -3.3814620971679688, 31.109607696533203, 39.594932556152344, 86.13037872314453, 121.67543029785156, -3.2736549377441406, 17.308061599731445, 5.378969192504883, 16.66657257080078, -155.13568115234375, 224.70486450195312, 116.23652648925781, -27.164093017578125, 76.29621887207031, -16.20230484008789, 29.431095123291016, -57.174888610839844, -17.690750122070312, 226.05897521972656, -80.69680786132812, 85.98391723632812, 131.38125610351562, 275.68524169921875, 64.93537902832031, 53.085296630859375, 131.09405517578125, 84.02144622802734, -204.61390686035156, 45.984107971191406, 23.814987182617188, 84.38644409179688, 13.163938522338867, 191.12376403808594, 137.86058044433594, -57.04627990722656, 16.719207763671875, -205.09262084960938, 224.66920471191406, -23.204486846923828, 353.74688720703125, 168.72679138183594, -158.41661071777344, 52.33698272705078, -32.281211853027344, 135.46112060546875, 198.58367919921875, 389.651611328125, 150.58486938476562, 36.4952507019043, 133.1604461669922, 6.736347198486328, 4.588489532470703, 1.1865158081054688, 293.12603759765625, 40.266117095947266, 30.009986877441406, 185.0833740234375, 152.29693603515625, 44.835426330566406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000181.npy"} +{"epoch": 0.2657856093979442, "step": 182, "batch_size": 64, "mean": 111.99694061279297, "std": 134.331787109375, "min": -242.88052368164062, "p10": -53.79853019714354, "median": 108.58872604370117, "p90": 273.9595672607422, "max": 380.5345153808594, "pos_frac": 0.84375, "sample": [293.32989501953125, 80.25300598144531, 324.05133056640625, 194.68698120117188, 88.05012512207031, 223.0124969482422, 241.26266479492188, 271.6553649902344, 350.064208984375, 72.626953125, 132.8455352783203, 48.77267837524414, 29.097986221313477, -17.94253921508789, 266.41082763671875, -242.88052368164062, 204.48663330078125, 66.21145629882812, 4.957927703857422, 48.58514404296875, 371.94720458984375, 1.1632862091064453, 257.6125793457031, 120.56913757324219, 153.07833862304688, -59.765281677246094, 250.22027587890625, 97.13929748535156, 18.310672760009766, 71.06484985351562, 147.64639282226562, 254.31588745117188, -107.34807586669922, -119.8609848022461, 164.88375854492188, 237.08856201171875, 44.787200927734375, 150.85488891601562, 215.73883056640625, 227.69468688964844, -64.92350006103516, 77.3632583618164, 33.48332214355469, 127.84091186523438, 149.45359802246094, 178.42984008789062, 181.13348388671875, 120.03815460205078, -128.24957275390625, 6.638126373291016, 380.5345153808594, 144.3177490234375, 51.05604553222656, 5.605623245239258, -31.25762939453125, 6.500299453735352, -129.87173461914062, 74.9781265258789, 274.94708251953125, 15.985084533691406, 345.1695556640625, -39.8761100769043, 49.98692321777344, 161.8409423828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000182.npy"} +{"epoch": 0.26725403817914833, "step": 183, "batch_size": 64, "mean": 60.72479248046875, "std": 109.3719253540039, "min": -162.70738220214844, "p10": -77.47502822875977, "median": 58.51387596130371, "p90": 189.0671264648438, "max": 342.7542724609375, "pos_frac": 0.75, "sample": [14.163162231445312, 71.00765991210938, -5.507925033569336, 60.220542907714844, 13.557964324951172, 173.34178161621094, 152.5071563720703, 142.6646728515625, 46.9490966796875, 52.867095947265625, -78.19435119628906, 138.49334716796875, 61.33308792114258, 18.19151496887207, 28.628448486328125, 83.3087387084961, 339.4774169921875, 248.35150146484375, 142.13096618652344, 28.844762802124023, 68.8838882446289, 120.00038146972656, 300.69122314453125, 99.44305419921875, -9.562095642089844, 4.858959197998047, 78.88905334472656, 56.80720901489258, 0.946075439453125, -162.70738220214844, 171.06967163085938, 78.08122253417969, -54.618408203125, 13.354835510253906, 177.42877197265625, 62.235198974609375, 46.73927307128906, 95.7229995727539, 133.98040771484375, 227.780517578125, -129.5953369140625, -44.429222106933594, 342.7542724609375, -75.7966079711914, 29.741981506347656, -31.188232421875, 79.62989044189453, -98.7939453125, -108.87140655517578, -108.33941650390625, 160.23663330078125, 108.25221252441406, 201.19337463378906, 194.05499267578125, 87.97293090820312, 51.849632263183594, 27.066322326660156, -64.54717254638672, 137.28118896484375, 84.4029541015625, -153.22320556640625, -62.872066497802734, 19.540206909179688, -2.29473876953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000183.npy"} +{"epoch": 0.2687224669603524, "step": 184, "batch_size": 64, "mean": 64.11538696289062, "std": 111.73335266113281, "min": -155.009521484375, "p10": -48.047241592407225, "median": 33.33728790283203, "p90": 210.97258605957035, "max": 339.360107421875, "pos_frac": 0.734375, "sample": [29.62158203125, -108.7529525756836, 66.96404266357422, 78.05182647705078, 198.04762268066406, 186.321533203125, 10.445571899414062, 14.5850830078125, 156.57644653320312, -18.220359802246094, 313.2129821777344, 339.360107421875, 61.21385192871094, 214.9520263671875, -43.85851287841797, 191.73300170898438, -0.2216644287109375, 221.5567626953125, 98.6557846069336, 272.07781982421875, -25.712322235107422, 305.1702575683594, -89.45449829101562, 16.853118896484375, 42.532840728759766, 33.53015899658203, 42.54774475097656, 9.897560119628906, 89.76220703125, 3.295766830444336, 42.304931640625, 96.97454833984375, 145.10275268554688, 198.37477111816406, -89.74955749511719, 201.68722534179688, 15.21075439453125, -25.49810028076172, -38.44758224487305, 23.530628204345703, 70.76488494873047, -18.853607177734375, 30.35726547241211, 6.656335830688477, 15.388816833496094, 197.8193817138672, 69.84645080566406, 55.58481216430664, -53.24169921875, -131.7093505859375, -49.842411041259766, 27.620527267456055, -1.1857795715332031, 6.5552215576171875, -155.009521484375, 34.68019104003906, -35.99617004394531, 175.8530731201172, 33.14441680908203, 103.77957916259766, 10.766487121582031, -1.5858993530273438, 99.83961486816406, 331.9163513183594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000184.npy"} +{"epoch": 0.2701908957415565, "step": 185, "batch_size": 64, "mean": 50.61490249633789, "std": 120.40911102294922, "min": -192.69537353515625, "p10": -64.02278327941893, "median": 26.461185455322266, "p90": 199.8696685791016, "max": 424.2530212402344, "pos_frac": 0.640625, "sample": [-161.91232299804688, -67.57341003417969, -17.317827224731445, 86.20513916015625, 75.99507141113281, 17.833284378051758, -6.573137283325195, 77.97447204589844, 399.1315612792969, 2.5436477661132812, 16.328243255615234, 110.5897445678711, 106.12306213378906, -8.777732849121094, 252.83102416992188, 37.823577880859375, -11.006391525268555, 38.300132751464844, 136.8306427001953, -101.19668579101562, -10.101421356201172, 34.7949104309082, 21.817859649658203, 71.38829040527344, 78.85708618164062, -6.9322662353515625, -117.54539489746094, 21.873382568359375, 87.07062530517578, -133.91571044921875, 34.4224853515625, 20.793697357177734, 94.63394165039062, 89.20399475097656, 25.496826171875, 366.779541015625, -54.09711456298828, 33.736412048339844, 189.59335327148438, -43.5293083190918, 33.91062545776367, 5.26905632019043, -18.56000518798828, -8.485000610351562, -23.404245376586914, -3.357858657836914, 320.62530517578125, 138.29530334472656, 149.9291229248047, 0.3011035919189453, 424.2530212402344, -29.653602600097656, 27.42554473876953, 67.08351135253906, -16.33348846435547, 204.2738037109375, 35.7999267578125, 125.76190185546875, -192.69537353515625, 259.40679931640625, -75.87100982666016, 91.43122100830078, -8.807205200195312, -55.73798751831055], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000185.npy"} +{"epoch": 0.27165932452276065, "step": 186, "batch_size": 64, "mean": 95.4560317993164, "std": 116.65131378173828, "min": -190.415771484375, "p10": -22.139906311035155, "median": 74.6743278503418, "p90": 231.5572906494141, "max": 450.62042236328125, "pos_frac": 0.8125, "sample": [-190.415771484375, 128.7786102294922, 312.27349853515625, 150.2666015625, 145.48269653320312, -1.59747314453125, 75.82711791992188, 48.764522552490234, 207.76882934570312, 153.92723083496094, 65.95904541015625, 219.38955688476562, 1.6890945434570312, 81.21255493164062, 36.788761138916016, -19.81247329711914, 88.8565673828125, -115.78681945800781, 40.7735710144043, 41.986053466796875, 295.61126708984375, 43.66659927368164, -5.535575866699219, 172.71826171875, 41.148948669433594, 72.88615417480469, 98.03573608398438, 143.89401245117188, 315.91131591796875, 107.09835815429688, 178.32772827148438, -25.016626358032227, 154.303955078125, 66.34648132324219, 30.062177658081055, 124.95954895019531, 388.40789794921875, 236.77203369140625, 48.67390823364258, 2.8561363220214844, 156.03680419921875, 28.097394943237305, 73.52153778076172, 62.71746826171875, -63.44647216796875, 393.5738525390625, -21.200424194335938, 69.36734771728516, -22.54254150390625, -45.80027770996094, 450.62042236328125, 154.8907470703125, 68.31507873535156, 142.212158203125, 2.23095703125, 143.877685546875, 93.13914489746094, -26.249794006347656, 2.6298370361328125, 119.08729553222656, 145.59426879882812, 133.39889526367188, -7.286285400390625, 93.14035034179688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000186.npy"} +{"epoch": 0.27312775330396477, "step": 187, "batch_size": 64, "mean": 79.94140625, "std": 107.06732177734375, "min": -149.99734497070312, "p10": -37.361049270629884, "median": 62.117462158203125, "p90": 214.85820922851565, "max": 388.050048828125, "pos_frac": 0.78125, "sample": [-149.99734497070312, 205.84686279296875, -20.929906845092773, 59.45492935180664, -79.84890747070312, 227.45147705078125, 83.83729553222656, 162.24310302734375, 54.569854736328125, 40.365726470947266, 74.54664611816406, 67.65164947509766, 334.85821533203125, 79.61650848388672, 124.06935119628906, 211.04815673828125, 161.99746704101562, 66.78236389160156, 117.60392761230469, -46.64931869506836, 55.75440216064453, 35.54100799560547, 388.050048828125, -37.987701416015625, 151.71116638183594, 57.75205993652344, 239.91473388671875, -34.27097702026367, 188.81175231933594, 41.47349548339844, -49.183441162109375, 86.4629135131836, 18.31884765625, -60.44500732421875, -35.898860931396484, 147.6285400390625, -35.40904998779297, 3.013212203979492, -8.029319763183594, 209.13580322265625, 148.4507598876953, 347.21258544921875, 97.23143768310547, 45.97815704345703, 63.94842529296875, 37.317649841308594, 40.22310256958008, 45.52659606933594, -125.74922943115234, 40.41803741455078, 104.3748550415039, -10.296073913574219, 156.90773010253906, 92.84622192382812, 76.37952423095703, 195.1916961669922, 89.80725860595703, 0.9036178588867188, 60.2864990234375, 227.73670959472656, 216.4910888671875, 36.757102966308594, -24.73000144958496, 16.174705505371094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000187.npy"} +{"epoch": 0.2745961820851689, "step": 188, "batch_size": 64, "mean": 98.32379150390625, "std": 120.28564453125, "min": -119.6957778930664, "p10": -16.91444931030273, "median": 71.38194274902344, "p90": 285.94708251953125, "max": 499.34722900390625, "pos_frac": 0.796875, "sample": [308.90313720703125, 123.31417083740234, 242.12684631347656, 70.88313293457031, 71.06078338623047, -3.0700836181640625, -31.217178344726562, -111.80365753173828, 293.92620849609375, 79.77482604980469, 52.0694580078125, -29.349082946777344, 118.5275650024414, 77.1514892578125, 286.96221923828125, 253.09877014160156, 90.7980728149414, -23.237403869628906, 53.56404113769531, 7.432947158813477, -1.8438568115234375, 28.742347717285156, 41.670989990234375, 141.70346069335938, 70.4390640258789, 25.596580505371094, 15.606353759765625, -5.338748931884766, 319.4116516113281, 49.482025146484375, 368.79998779296875, -119.6957778930664, 283.57843017578125, 107.49839782714844, 156.66079711914062, 118.51773071289062, 65.51632690429688, 188.2677001953125, -10.12158203125, 188.25726318359375, 167.44483947753906, 25.067092895507812, -86.95040893554688, -18.999181747436523, 190.7163848876953, 39.982643127441406, 217.03091430664062, 31.03976058959961, 5.9429779052734375, 332.994873046875, 90.74385833740234, 167.63720703125, 56.38624954223633, -11.364383697509766, 165.86813354492188, 81.71516418457031, 102.80866241455078, 499.34722900390625, 94.90118408203125, 71.7031021118164, 76.401123046875, -12.050073623657227, 23.14276885986328, 17.54669761657715], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000188.npy"} +{"epoch": 0.27606461086637296, "step": 189, "batch_size": 64, "mean": 79.6611328125, "std": 133.67152404785156, "min": -169.3841552734375, "p10": -65.78843688964844, "median": 60.62401580810547, "p90": 196.64479980468752, "max": 566.9457397460938, "pos_frac": 0.78125, "sample": [40.35785675048828, 175.57749938964844, -125.38148498535156, 344.5438232421875, -67.56684875488281, -61.63880920410156, 45.201438903808594, 6.528373718261719, 2.356222152709961, 34.4205322265625, 151.90162658691406, 148.9072265625, 148.46163940429688, -11.026336669921875, 162.92715454101562, 45.866058349609375, 189.5015869140625, 45.43012237548828, -169.3841552734375, 6.097267150878906, -82.2009048461914, 25.929170608520508, 81.34687805175781, 177.29925537109375, 142.22901916503906, 83.88600158691406, -43.94932556152344, 57.43368148803711, 36.95906448364258, 182.80517578125, 4.876789093017578, 20.518789291381836, 94.54798889160156, 482.15234375, 60.972320556640625, -25.90544891357422, 22.69525146484375, 213.8051300048828, 85.83155822753906, 10.997611999511719, -166.7821502685547, 123.25841522216797, -57.68292236328125, 122.96092987060547, 62.5509147644043, 331.55621337890625, 176.1103973388672, 566.9457397460938, -21.50792694091797, 123.3465576171875, 172.04867553710938, 30.566024780273438, 80.2962646484375, 151.35389709472656, 110.76641845703125, 199.7061767578125, 255.13351440429688, 94.95535278320312, -3.0740432739257812, -169.04595947265625, 60.27571105957031, -81.15860748291016, 27.46612548828125, 162.9556884765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000189.npy"} +{"epoch": 0.2775330396475771, "step": 190, "batch_size": 64, "mean": 95.53968811035156, "std": 140.0072479248047, "min": -214.4184112548828, "p10": -64.4300651550293, "median": 65.91733169555664, "p90": 301.8294555664063, "max": 422.9517517089844, "pos_frac": 0.796875, "sample": [-3.785552978515625, 112.06848907470703, 189.53005981445312, 163.58596801757812, 17.14914321899414, -88.25518035888672, 249.46408081054688, 44.541419982910156, 12.372480392456055, 363.40960693359375, 82.95599365234375, 64.59896087646484, 137.83042907714844, 21.24591827392578, 367.38458251953125, 145.24447631835938, 110.19837951660156, 78.95747375488281, 195.71536254882812, 23.388198852539062, 3.5635547637939453, 265.39984130859375, -214.4184112548828, 64.11595916748047, 272.115478515625, -20.579376220703125, 61.65740966796875, 328.4510498046875, 154.77781677246094, 54.19038391113281, -16.370407104492188, 303.62890625, 20.78522491455078, -3.2158069610595703, 138.59158325195312, 74.88728332519531, -95.58951568603516, -65.63909912109375, -151.00909423828125, 14.691970825195312, 109.26773834228516, -61.608985900878906, 110.23400115966797, 53.09571838378906, -27.890344619750977, -92.39080810546875, 103.95149230957031, 297.6307373046875, 50.346160888671875, 54.38562774658203, -190.5675811767578, 386.1904296875, 124.92135620117188, 23.3666934967041, 208.17845153808594, 105.6469497680664, 67.23570251464844, 385.90521240234375, 4.88878059387207, 422.9517517089844, 203.5194091796875, 223.12098693847656, 33.69071578979492, 40.834808349609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000190.npy"} +{"epoch": 0.2790014684287812, "step": 191, "batch_size": 64, "mean": 137.9830322265625, "std": 152.693603515625, "min": -146.19528198242188, "p10": -10.3233959197998, "median": 114.20819091796875, "p90": 326.99763183593757, "max": 572.3182373046875, "pos_frac": 0.859375, "sample": [488.5576477050781, 109.07638549804688, 572.3182373046875, 30.541702270507812, 298.937744140625, 71.99418640136719, 143.17601013183594, 268.32403564453125, 15.922346115112305, 361.43865966796875, 101.7724609375, 171.07168579101562, -11.949417114257812, 271.7674865722656, 293.70257568359375, 31.660316467285156, 36.23535919189453, 81.01471710205078, 27.899063110351562, 69.5641860961914, 261.2974853515625, 53.278900146484375, 43.1488037109375, 294.414306640625, 125.47022247314453, 8.951217651367188, 8.603309631347656, 543.2888793945312, 188.1042938232422, -21.317298889160156, 228.67803955078125, -75.64543151855469, 17.39134979248047, -48.512454986572266, 142.19464111328125, 176.380126953125, 182.15272521972656, 147.8889923095703, 333.16656494140625, 31.402128219604492, 8.30459976196289, 251.61056518554688, 225.1480712890625, -146.19528198242188, 67.46253967285156, 508.2746887207031, 36.374412536621094, -65.12306213378906, 119.33999633789062, 95.49516296386719, 184.14598083496094, 312.60345458984375, 36.084007263183594, -6.529346466064453, 124.3748779296875, 173.9681854248047, -73.33721160888672, 121.12127685546875, -3.5466766357421875, 126.53583526611328, 363.2738037109375, 34.59343719482422, 88.31802368164062, 175.25494384765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000191.npy"} +{"epoch": 0.28046989720998533, "step": 192, "batch_size": 64, "mean": 91.70048522949219, "std": 148.4692840576172, "min": -260.8277893066406, "p10": -50.542318725585936, "median": 67.80596542358398, "p90": 316.1694702148438, "max": 548.2846069335938, "pos_frac": 0.734375, "sample": [-49.702491760253906, -144.0408935546875, 41.803977966308594, 173.09434509277344, 129.52383422851562, -37.592979431152344, -50.902244567871094, -2.0516223907470703, 32.80694580078125, -260.8277893066406, 25.826202392578125, 97.07647705078125, -36.3262939453125, 230.14820861816406, 18.216659545898438, 39.17643737792969, -71.57546997070312, 101.11634063720703, -43.137542724609375, 174.55357360839844, -119.1482925415039, 418.8036193847656, -48.587894439697266, 76.92755889892578, -16.140945434570312, 321.49560546875, 358.28631591796875, 377.3251647949219, 153.62814331054688, 356.833251953125, 95.77035522460938, 77.31504821777344, 548.2846069335938, 119.6451416015625, 181.3440704345703, 28.651643753051758, 117.67234802246094, 216.0067596435547, 273.0760803222656, 60.84670639038086, 47.71647644042969, 303.7418212890625, 7.380075454711914, 40.43788146972656, 133.75413513183594, 59.946754455566406, 59.494712829589844, -172.0224151611328, 196.36209106445312, 70.82486724853516, 82.68022918701172, -9.237579345703125, -44.506736755371094, -67.62775421142578, 16.764495849609375, 229.29937744140625, 71.47702026367188, -33.94688415527344, 38.208099365234375, 207.79238891601562, 140.80661010742188, 121.96989440917969, 64.78706359863281, 337.5074768066406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000192.npy"} +{"epoch": 0.28193832599118945, "step": 193, "batch_size": 64, "mean": 52.65251159667969, "std": 141.3990020751953, "min": -413.68243408203125, "p10": -87.26598739624022, "median": 52.867305755615234, "p90": 228.6952011108399, "max": 369.922119140625, "pos_frac": 0.65625, "sample": [115.22686767578125, -26.95525360107422, 29.480918884277344, 186.73541259765625, -411.18603515625, 96.04673767089844, 63.096763610839844, 52.21155548095703, -413.68243408203125, 301.2044372558594, 64.94950103759766, -11.399175643920898, 199.019287109375, -34.3800163269043, -24.520889282226562, 94.03555297851562, 105.09532928466797, 126.3165283203125, -2.2920303344726562, 232.1187744140625, 2.966062545776367, -20.38568878173828, 13.76430892944336, 151.345947265625, 27.28511619567871, 134.45925903320312, -58.281490325927734, -44.85358428955078, 3.6917877197265625, -59.13874816894531, -26.088396072387695, 220.7068634033203, -98.22467041015625, -94.6559829711914, 242.36508178710938, -125.40914154052734, 179.478759765625, -92.86284637451172, 19.062501907348633, 53.52305603027344, 10.153289794921875, 139.73162841796875, 272.77880859375, 163.28082275390625, 116.6553955078125, -135.31370544433594, 315.74530029296875, 81.4584732055664, 125.55973052978516, 124.17110443115234, -64.07080078125, 96.64480590820312, -1.4830131530761719, -74.20664978027344, -47.70213317871094, 143.02044677734375, 369.922119140625, 127.77665710449219, -66.82020568847656, 21.338699340820312, 282.173828125, 76.99785614013672, 14.96466064453125, 107.11365509033203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000193.npy"} +{"epoch": 0.2834067547723935, "step": 194, "batch_size": 64, "mean": 86.26107788085938, "std": 99.67212677001953, "min": -103.73922729492188, "p10": -49.88416595458984, "median": 68.35005187988281, "p90": 242.6201629638672, "max": 305.6581726074219, "pos_frac": 0.8125, "sample": [138.03121948242188, 21.336936950683594, 32.37614440917969, 11.35888671875, 120.37139892578125, 246.72567749023438, 103.61843872070312, 165.07476806640625, -18.271011352539062, 19.930030822753906, 29.90403938293457, 246.84182739257812, -67.65086364746094, -58.06719970703125, 43.100074768066406, 62.55790710449219, 212.36605834960938, 55.47406005859375, 70.20050048828125, 129.92520141601562, 35.55289077758789, 81.76053619384766, 12.755069732666016, 7.054973602294922, 118.3114242553711, -79.83699035644531, -4.191095352172852, 226.29013061523438, 63.565773010253906, 79.55570983886719, 246.1024932861328, 131.8894805908203, 239.70228576660156, 173.09896850585938, 25.12602996826172, 282.72613525390625, 62.93989562988281, 26.305707931518555, 143.6127471923828, 46.3291015625, 189.85789489746094, -75.250244140625, 45.141632080078125, -15.238533020019531, 104.21980285644531, 164.38876342773438, -2.3935508728027344, -103.73922729492188, 37.32182312011719, -53.03656005859375, 109.89227294921875, -64.2438735961914, 63.80525207519531, 243.8706817626953, 74.99534606933594, 305.6581726074219, 133.0102081298828, 66.49960327148438, 286.6011962890625, 93.45247650146484, 175.094970703125, 183.6859130859375, 115.78829956054688, -42.52857971191406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000194.npy"} +{"epoch": 0.28487518355359764, "step": 195, "batch_size": 64, "mean": 93.17719268798828, "std": 118.43743896484375, "min": -127.9245834350586, "p10": -49.713039779663085, "median": 76.71570205688477, "p90": 246.85775756835943, "max": 399.26861572265625, "pos_frac": 0.78125, "sample": [82.37439727783203, -23.65960693359375, 222.9453582763672, 42.67995834350586, 71.35479736328125, 399.26861572265625, 68.59284210205078, 328.31060791015625, 214.300537109375, -7.9368133544921875, -4.261871337890625, 47.47523498535156, -6.260414123535156, -99.59117126464844, 258.38726806640625, 113.74983215332031, 50.54437255859375, 226.2075958251953, -42.95024871826172, 329.30023193359375, -49.741207122802734, 106.39717864990234, -101.66058349609375, 37.348297119140625, 96.53948974609375, 106.19959259033203, 233.25125122070312, 299.3463134765625, 152.53819274902344, 128.8384246826172, 148.00942993164062, -80.74251556396484, 76.29264831542969, 186.09249877929688, 1.207061767578125, 152.18199157714844, -30.626028060913086, 11.724327087402344, 166.89279174804688, 206.5001678466797, -127.9245834350586, 252.68911743164062, 43.53482437133789, 73.27291107177734, 389.3971862792969, 141.2830810546875, 77.13875579833984, 58.15618896484375, -52.753501892089844, 10.235937118530273, 159.79794311523438, -69.70980834960938, 221.68524169921875, 107.52577209472656, 24.977828979492188, 80.827880859375, 29.62622833251953, 54.39667510986328, 17.664915084838867, 84.11314392089844, -49.647315979003906, 55.18954849243164, 151.46595764160156, 112.97561645507812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000195.npy"} +{"epoch": 0.28634361233480177, "step": 196, "batch_size": 64, "mean": 105.90283203125, "std": 143.84640502929688, "min": -223.51126098632812, "p10": -56.199887084960935, "median": 79.71743392944336, "p90": 309.5011413574219, "max": 451.0350341796875, "pos_frac": 0.78125, "sample": [189.772705078125, 202.3842010498047, 311.91064453125, 215.86778259277344, -57.917091369628906, 285.6785888671875, 303.87896728515625, 434.3759765625, 232.32186889648438, 69.14894104003906, 8.71109390258789, 51.87787628173828, 283.83868408203125, 194.5666961669922, 49.044708251953125, 10.719423294067383, -13.88290786743164, 88.39756774902344, -8.404190063476562, 45.944244384765625, 436.44097900390625, -16.45022201538086, 61.67677307128906, 36.18263626098633, 128.7584991455078, 128.552001953125, 124.79638671875, 139.16928100585938, 46.397552490234375, -11.483112335205078, 114.61679077148438, 77.53973388671875, 434.24945068359375, 451.0350341796875, -18.67572784423828, 181.57020568847656, 84.46211242675781, 59.97224426269531, 109.36207580566406, 83.31765747070312, 63.084434509277344, 56.367889404296875, 219.007568359375, -2.3175601959228516, 132.74917602539062, 30.57944107055664, 81.89513397216797, -157.10968017578125, 66.3693618774414, -223.51126098632812, -67.5281982421875, 156.61764526367188, 90.173095703125, -62.83774185180664, 318.5107421875, -52.193077087402344, -133.27850341796875, 66.19371032714844, 0.736236572265625, 128.61398315429688, 263.1473083496094, 322.230712890625, -75.3564453125, 5.911455154418945], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000196.npy"} +{"epoch": 0.2878120411160059, "step": 197, "batch_size": 64, "mean": 81.5740737915039, "std": 113.25190734863281, "min": -117.93977355957031, "p10": -38.90371398925781, "median": 52.21286392211914, "p90": 235.64595489501954, "max": 401.7397155761719, "pos_frac": 0.796875, "sample": [59.071983337402344, 332.5087585449219, 137.44398498535156, -20.7607421875, 160.83139038085938, 7.0533599853515625, 19.073226928710938, -64.6995849609375, 64.02617645263672, 401.7397155761719, 354.4153137207031, 237.5862274169922, 66.76575469970703, 123.76785278320312, -97.78943634033203, -0.7020034790039062, 128.22323608398438, 279.40814208984375, 27.82598114013672, 37.665470123291016, 103.54721069335938, 213.0468292236328, 42.38712692260742, -5.51641845703125, 58.02722930908203, 8.64605712890625, -103.49820709228516, 130.07333374023438, 75.70764923095703, 91.91349792480469, 259.183349609375, 144.8836212158203, 35.11604309082031, 231.11865234375, 2.829254150390625, 191.9445037841797, -41.291015625, 0.0666046142578125, -75.61239624023438, -117.93977355957031, 33.29755401611328, 119.61483001708984, -0.42635536193847656, 14.130912780761719, 26.72231101989746, 78.87226867675781, 309.69378662109375, 202.37918090820312, 26.181472778320312, 188.92495727539062, -12.254610061645508, 45.373226165771484, 89.21149444580078, 22.68677520751953, 19.05156707763672, 120.45052337646484, 169.23159790039062, 77.15380859375, 202.19699096679688, -33.333343505859375, -64.50146484375, 21.722312927246094, 19.874420166015625, 46.39849853515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000197.npy"} +{"epoch": 0.28928046989721, "step": 198, "batch_size": 64, "mean": 103.49455261230469, "std": 142.75363159179688, "min": -234.0670166015625, "p10": -32.193214607238765, "median": 86.87192153930664, "p90": 229.39146575927737, "max": 590.58251953125, "pos_frac": 0.828125, "sample": [-120.88996124267578, 95.15122985839844, -34.03771209716797, 526.42138671875, 118.95263671875, 203.260498046875, 326.5648498535156, 55.051509857177734, 142.2070770263672, 48.064048767089844, 119.30078125, 163.70428466796875, -142.39804077148438, 6.894508361816406, 2.61395263671875, 231.6037139892578, 143.74444580078125, 119.77157592773438, 79.71826171875, 87.54479217529297, -27.889387130737305, -38.15623474121094, 34.82025909423828, 157.61256408691406, 11.738348007202148, 69.64600372314453, -73.20614624023438, 195.16134643554688, 86.19905090332031, 82.99617004394531, 90.40877532958984, 116.8322982788086, 138.464111328125, 72.96327209472656, 129.0074005126953, 14.16732406616211, 75.39399719238281, 149.47006225585938, 153.81814575195312, 224.22955322265625, -234.0670166015625, 532.65771484375, 95.51323699951172, 63.25476837158203, -3.1615676879882812, 12.958999633789062, 238.40292358398438, -39.545066833496094, 3.3646621704101562, 74.86094665527344, 36.368953704833984, 91.03408813476562, 85.13897705078125, 136.35911560058594, -6.681901931762695, 144.27745056152344, 152.64266967773438, 206.16656494140625, 11.94144058227539, 414.8182373046875, -4.9298095703125, 590.58251953125, 48.35340881347656, 136.41880798339844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000198.npy"} +{"epoch": 0.2907488986784141, "step": 199, "batch_size": 64, "mean": 104.01869201660156, "std": 139.33583068847656, "min": -165.69708251953125, "p10": -55.58073844909667, "median": 86.64188385009766, "p90": 282.2914459228516, "max": 454.783447265625, "pos_frac": 0.78125, "sample": [205.46685791015625, 170.12962341308594, -134.09664916992188, -8.631866455078125, 272.6278381347656, 75.30664825439453, -60.62152099609375, 203.0434112548828, -164.46240234375, 114.83397674560547, -0.9009323120117188, -77.85214233398438, -165.69708251953125, 213.31593322753906, 205.82589721679688, 65.89447784423828, 14.109764099121094, 137.2200927734375, 384.0369873046875, 45.561119079589844, 87.07476806640625, 167.5951690673828, 375.20220947265625, 299.84674072265625, -93.01840209960938, 46.09864807128906, -36.509864807128906, 122.64983367919922, 208.68154907226562, 25.2308349609375, 35.09919738769531, 249.65664672851562, 454.783447265625, 79.93601989746094, 76.8988265991211, 197.50860595703125, -22.340087890625, 113.80888366699219, 33.919219970703125, 95.92762756347656, 63.07008361816406, 275.6991882324219, 135.23556518554688, 34.05379867553711, 285.11669921875, 123.4804458618164, 71.1168212890625, -41.23439025878906, -43.818912506103516, -164.1507110595703, 264.3172302246094, 80.97822570800781, 103.3712158203125, 86.20899963378906, 347.96820068359375, 177.2297821044922, 5.09703254699707, 92.73546600341797, 55.31016540527344, 88.48362731933594, 48.60826110839844, 152.79074096679688, 422.9185791015625, -20.549713134765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000199.npy"} +{"epoch": 0.2922173274596182, "step": 200, "batch_size": 64, "mean": 102.84646606445312, "std": 155.4116668701172, "min": -290.94134521484375, "p10": -45.29775428771971, "median": 84.64266586303711, "p90": 326.2521484375, "max": 531.9830322265625, "pos_frac": 0.78125, "sample": [231.9246826171875, 105.23777770996094, 59.67341232299805, 387.5226135253906, 65.29183959960938, 150.30398559570312, 15.5325927734375, -1.7749481201171875, 407.79681396484375, -92.30795288085938, 322.9635925292969, 239.98248291015625, 100.61031341552734, 3.1141319274902344, 85.23886108398438, 322.42498779296875, 216.3351287841797, 390.5396728515625, 5.4694976806640625, 327.6615295410156, 66.55996704101562, 0.46549034118652344, 161.09486389160156, 216.4155731201172, -25.61697769165039, 71.94158172607422, 182.75369262695312, -192.53509521484375, 531.9830322265625, 87.73102569580078, 103.51786804199219, -28.506595611572266, -5.1153411865234375, 73.9285888671875, 52.236663818359375, -28.21998405456543, -290.94134521484375, 5.001373291015625, 98.05519104003906, 350.0564270019531, 4.856351852416992, 89.9665756225586, 56.61650085449219, -12.062171936035156, 4.312564849853516, -158.95367431640625, 84.04647064208984, -52.49396514892578, 295.8688659667969, -27.356985092163086, 7.32305908203125, 414.57232666015625, 43.813568115234375, 27.67424774169922, 98.47581481933594, 253.73402404785156, 169.24758911132812, 159.14801025390625, 105.60479736328125, 167.42306518554688, -119.13604736328125, 140.27537536621094, -68.94148254394531, 123.81201934814453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000200.npy"} +{"epoch": 0.2936857562408223, "step": 201, "batch_size": 64, "mean": 101.25636291503906, "std": 143.0900115966797, "min": -281.8214416503906, "p10": -46.71069183349608, "median": 81.652587890625, "p90": 278.0167175292969, "max": 479.517822265625, "pos_frac": 0.8125, "sample": [252.13214111328125, 17.920286178588867, 212.99974060058594, -222.94656372070312, 48.598106384277344, -90.6611328125, 11.363895416259766, 14.739633560180664, -26.618881225585938, 85.60223388671875, 3.754425048828125, 27.126739501953125, 35.849212646484375, -22.068832397460938, 466.9206237792969, 106.66580963134766, 62.80314636230469, 99.34493255615234, 26.447242736816406, 232.94662475585938, 341.33331298828125, 129.29052734375, 10.442657470703125, 93.81657409667969, 219.16073608398438, 280.6267395019531, -78.57797241210938, 213.1425018310547, 191.05258178710938, -13.211698532104492, 73.45756530761719, 120.60137939453125, 126.69152069091797, 120.95077514648438, -32.821380615234375, -69.27418518066406, 176.160888671875, 71.6688003540039, 51.075645446777344, 479.517822265625, 104.4541015625, -22.37126922607422, 166.96484375, 77.70294189453125, 47.05467224121094, 166.18260192871094, 152.53146362304688, 245.18275451660156, 386.8250427246094, 76.56082153320312, 87.1095962524414, -68.43130493164062, 144.8993682861328, 71.2364501953125, -281.8214416503906, 295.05572509765625, 410.0599670410156, 139.64442443847656, 12.985834121704102, -52.66325378417969, 271.9266662597656, 50.20360565185547, 11.474853515625, 139.6148681640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000201.npy"} +{"epoch": 0.29515418502202645, "step": 202, "batch_size": 64, "mean": 71.93101501464844, "std": 142.24163818359375, "min": -211.66818237304688, "p10": -99.36132812499997, "median": 60.20570373535156, "p90": 256.66062622070314, "max": 526.6281127929688, "pos_frac": 0.703125, "sample": [526.6281127929688, -49.309329986572266, 106.27149200439453, 311.93505859375, 2.9747791290283203, -53.753868103027344, 212.54522705078125, -52.05113220214844, 123.39051818847656, 92.39949035644531, 49.89034652709961, -195.7517852783203, 173.23056030273438, 244.27850341796875, 208.02078247070312, 50.79832458496094, -199.77114868164062, 47.186614990234375, 103.8427963256836, 330.6583251953125, 113.5085678100586, 287.55609130859375, -133.1424560546875, 191.40582275390625, 24.269546508789062, 30.107131958007812, 401.897705078125, -17.169164657592773, 104.79502868652344, 139.42166137695312, 102.43626403808594, 124.25283813476562, 253.74014282226562, -26.722091674804688, -37.55438232421875, -115.255126953125, 80.14552307128906, 59.7470703125, 9.741950988769531, 225.84765625, 105.18539428710938, -8.33953857421875, 22.540103912353516, 262.6190490722656, 156.31246948242188, 111.77021789550781, 78.20578002929688, -211.66818237304688, 60.664337158203125, 94.00946044921875, 14.910770416259766, 43.78880310058594, -44.70585632324219, -69.3332748413086, 38.07171630859375, -110.83000183105469, 82.35084533691406, -72.60108947753906, -47.16397476196289, 53.570899963378906, 257.9122619628906, -134.71575927734375, 90.05972290039062, -21.473270416259766], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000202.npy"} +{"epoch": 0.2966226138032305, "step": 203, "batch_size": 64, "mean": 88.74185180664062, "std": 122.0002670288086, "min": -183.9979248046875, "p10": -64.21826019287109, "median": 71.45519256591797, "p90": 253.94359436035163, "max": 447.37249755859375, "pos_frac": 0.796875, "sample": [18.271747589111328, 133.57167053222656, 160.84982299804688, 66.21399688720703, -28.046428680419922, 286.0398254394531, -73.96928405761719, 135.84573364257812, 28.27666473388672, 117.8863525390625, 1.9560775756835938, 104.36136627197266, 105.22808074951172, 164.81655883789062, -108.01438903808594, 185.3732452392578, 129.8240966796875, -24.460189819335938, 191.14895629882812, 144.4223175048828, 264.6802673339844, 412.63018798828125, -74.71954345703125, 115.25592041015625, 129.6944580078125, 38.996055603027344, 447.37249755859375, 13.917709350585938, -84.72935485839844, 143.5240936279297, 0.7860641479492188, 168.90298461914062, 192.94802856445312, 261.86566162109375, 31.212646484375, 177.00289916992188, 82.04227447509766, 105.3673324584961, 75.01214599609375, 123.81525421142578, 31.323867797851562, 62.98786163330078, 45.38671875, 235.45877075195312, -54.96100616455078, -183.9979248046875, -59.61054992675781, 67.89823913574219, -26.133264541625977, 9.803058624267578, 165.3621826171875, 13.708183288574219, 25.90578842163086, 47.00249481201172, -66.1929931640625, -76.72109985351562, 141.40621948242188, 59.12322998046875, 267.0324401855469, 327.2779846191406, -6.952180862426758, 230.55174255371094, 35.22985076904297, 23.415206909179688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000203.npy"} +{"epoch": 0.29809104258443464, "step": 204, "batch_size": 64, "mean": 81.89997863769531, "std": 143.6380615234375, "min": -212.22216796875, "p10": -69.88979339599607, "median": 64.21564483642578, "p90": 304.6741271972658, "max": 475.9053955078125, "pos_frac": 0.703125, "sample": [219.25267028808594, 74.47735595703125, 43.474849700927734, 41.36686706542969, 121.7256851196289, -13.138229370117188, 221.09075927734375, -102.57477569580078, 91.04943084716797, 20.460474014282227, -41.56081771850586, 22.068082809448242, 14.201484680175781, 85.10277557373047, 55.700439453125, -38.029823303222656, -212.22216796875, 170.2845458984375, 51.553192138671875, 7.632097244262695, 262.82464599609375, -127.47061920166016, -27.838333129882812, -6.216327667236328, 70.86343383789062, -15.24893569946289, 71.90664672851562, 150.9446563720703, 152.5467071533203, -79.40974426269531, 397.67657470703125, -21.22077178955078, 62.4879150390625, 28.507299423217773, -46.15620422363281, 146.47915649414062, 49.25193786621094, 153.36868286132812, 428.247802734375, -199.50514221191406, 322.609619140625, 165.7071533203125, 163.25033569335938, 73.73530578613281, 94.84844970703125, -13.257570266723633, 42.10755157470703, 222.2102508544922, 332.1716613769531, -3.65057373046875, 84.2003173828125, 163.47317504882812, -121.43740844726562, 65.94337463378906, 475.9053955078125, 370.5435791015625, -29.071027755737305, 147.16139221191406, 23.597810745239258, 351.9154968261719, -119.53973388671875, 82.09140014648438, 110.80523681640625, -47.67657470703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000204.npy"} +{"epoch": 0.29955947136563876, "step": 205, "batch_size": 64, "mean": 108.63500213623047, "std": 140.55197143554688, "min": -252.134765625, "p10": -67.90677413940429, "median": 111.54692077636719, "p90": 275.973129272461, "max": 397.0291442871094, "pos_frac": 0.78125, "sample": [40.172950744628906, 133.660888671875, 221.05963134765625, 376.73529052734375, 114.91358184814453, 5.956827163696289, -8.789819717407227, 28.159423828125, 10.443649291992188, 228.08804321289062, 260.1221923828125, -89.07861328125, -99.65460205078125, 93.30618286132812, -15.099884033203125, 78.11692810058594, 16.869775772094727, 368.3751525878906, 232.07057189941406, 27.232601165771484, 102.90897369384766, 100.73199462890625, -140.55059814453125, 171.0452423095703, 304.6904296875, 242.01791381835938, 193.58694458007812, -2.8393592834472656, -67.0300064086914, 209.9935302734375, -44.89011764526367, 397.0291442871094, 127.61929321289062, 83.75403594970703, 37.786399841308594, 124.28410339355469, 391.0887145996094, 263.8450012207031, 120.62025451660156, 134.68711853027344, -207.78363037109375, 140.88002014160156, 120.28424835205078, 237.6821746826172, 150.62033081054688, -68.28253173828125, 146.4514923095703, 86.12165832519531, 281.1708984375, 80.75817108154297, 250.4630584716797, 196.13319396972656, 171.38327026367188, 71.12355041503906, 152.4199981689453, 350.2114562988281, 108.18025970458984, -12.413368225097656, 22.512222290039062, -252.134765625, -75.42219543457031, 219.170166015625, -31.273223876953125, 41.344207763671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000205.npy"} +{"epoch": 0.3010279001468429, "step": 206, "batch_size": 64, "mean": 117.12614440917969, "std": 127.1107177734375, "min": -239.77175903320312, "p10": -35.52505664825438, "median": 106.53827285766602, "p90": 290.371258544922, "max": 406.49346923828125, "pos_frac": 0.84375, "sample": [52.57080078125, 184.94659423828125, 86.17326354980469, 122.87895202636719, 406.49346923828125, 303.2294921875, 82.25331115722656, 50.995567321777344, 217.11412048339844, 258.67681884765625, 55.190887451171875, 132.93150329589844, 19.97505760192871, 106.21595001220703, -239.77175903320312, 67.55877685546875, 77.27410888671875, 162.4696502685547, 60.175148010253906, -19.674283981323242, 215.2197265625, -42.31824493408203, 234.487548828125, 351.1875305175781, 124.57766723632812, 192.54635620117188, 28.34101104736328, 202.57896423339844, -92.91252136230469, -107.14009857177734, 192.0812530517578, 310.482177734375, 211.76321411132812, 221.4788055419922, 4.919221878051758, 250.07424926757812, 74.43360137939453, -66.8997802734375, 158.80441284179688, -135.69947814941406, 48.03826141357422, 214.48513793945312, 11.717884063720703, 167.56842041015625, 86.09580993652344, -12.421127319335938, 118.0368881225586, 84.04656219482422, 191.6936798095703, 93.69924926757812, -60.953834533691406, 6.410453796386719, 101.58556365966797, 184.49893188476562, 355.0924072265625, 260.36871337890625, -4.034433364868164, 118.421630859375, 106.860595703125, 316.1591491699219, 66.36444854736328, 21.765037536621094, 308.9014892578125, 195.98928833007812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000206.npy"} +{"epoch": 0.302496328928047, "step": 207, "batch_size": 64, "mean": 134.46798706054688, "std": 155.47544860839844, "min": -134.90017700195312, "p10": -28.983270263671873, "median": 89.51468658447266, "p90": 314.5174499511719, "max": 586.1400756835938, "pos_frac": 0.84375, "sample": [20.64529037475586, -134.90017700195312, 62.11328887939453, 98.21537780761719, 181.89547729492188, 46.83312225341797, 317.874267578125, -30.126731872558594, -15.913528442382812, 12.337387084960938, 89.88482666015625, 77.12053680419922, 299.4517517089844, -9.103912353515625, 133.91940307617188, 156.9066619873047, 53.95643615722656, 90.97453308105469, 34.784854888916016, 36.48834991455078, 50.84144973754883, 167.91680908203125, 29.44007110595703, 254.68186950683594, 28.106918334960938, -62.628326416015625, 306.68487548828125, 61.1783447265625, 31.27730369567871, -55.99091720581055, 282.0596618652344, 268.49957275390625, -57.77497100830078, 0.7751312255859375, 30.76410675048828, 260.58148193359375, 35.91157531738281, 230.7598114013672, 153.6181640625, 245.7086639404297, 261.1004333496094, 124.58509826660156, 109.73771667480469, 132.5054931640625, 262.81121826171875, 249.09567260742188, 84.91121673583984, 385.1063232421875, 505.5529479980469, 280.30999755859375, 46.686004638671875, 438.06207275390625, -48.100975036621094, 45.69001007080078, 586.1400756835938, 42.316162109375, 89.14454650878906, 99.17733764648438, 86.1826171875, 522.6964111328125, -84.45548248291016, 195.13400268554688, 432.1093444824219, -26.31519317626953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000207.npy"} +{"epoch": 0.3039647577092511, "step": 208, "batch_size": 64, "mean": 106.39715576171875, "std": 156.0309600830078, "min": -335.8766174316406, "p10": -41.075463104248044, "median": 77.94448852539062, "p90": 324.5382110595704, "max": 441.44024658203125, "pos_frac": 0.765625, "sample": [-208.6074676513672, 144.7502899169922, -153.2298583984375, 297.357177734375, 23.94806671142578, -20.347736358642578, 378.5699462890625, 30.95991325378418, 111.97469329833984, 249.5965576171875, 244.75318908691406, 55.489173889160156, 79.91171264648438, 166.4991912841797, 207.18612670898438, 59.227928161621094, 441.44024658203125, 19.629486083984375, 429.48272705078125, 137.31600952148438, -6.756378173828125, 32.01696014404297, 245.44314575195312, 174.54837036132812, 75.97726440429688, 27.85858917236328, 52.579345703125, -3.407684326171875, -119.54948425292969, 280.64764404296875, -142.5906982421875, -7.870700836181641, 49.28053665161133, 106.97516632080078, 138.91461181640625, 300.76788330078125, 279.4596252441406, 39.74522018432617, -12.338279724121094, 240.44522094726562, -41.980743408203125, 69.70906066894531, 26.07275390625, 340.4796142578125, 141.4678955078125, 337.63189697265625, 98.9030990600586, -335.8766174316406, -38.96314239501953, 142.81509399414062, 15.61566162109375, 126.21183776855469, 334.7254943847656, 99.87339782714844, 48.90306091308594, -132.27676391601562, 20.204132080078125, 380.1390075683594, 183.37374877929688, 289.11773681640625, 202.56622314453125, 70.494384765625, -1.0854339599609375, -16.757415771484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000208.npy"} +{"epoch": 0.3054331864904552, "step": 209, "batch_size": 64, "mean": 93.75180053710938, "std": 150.7190704345703, "min": -327.6315612792969, "p10": -83.88482818603515, "median": 90.0728988647461, "p90": 311.834701538086, "max": 427.9144287109375, "pos_frac": 0.71875, "sample": [334.42462158203125, 80.84768676757812, 175.1008758544922, -44.67296600341797, 366.00152587890625, 394.760009765625, 318.9383239746094, 112.45735168457031, 125.60291290283203, 102.67095947265625, -66.71406555175781, 94.56529998779297, 165.16860961914062, 98.53389739990234, 164.01632690429688, 142.09909057617188, 162.56227111816406, -163.25546264648438, -87.25576782226562, 57.930076599121094, 250.35540771484375, 224.89779663085938, -3.8870792388916016, -94.943359375, 50.226165771484375, -46.2999267578125, -13.931665420532227, 6.800010681152344, 102.95179748535156, 151.1984405517578, 116.85260009765625, 85.58049774169922, -123.08866119384766, -28.956607818603516, 60.969886779785156, 66.86407470703125, -76.01930236816406, -34.225215911865234, 374.8949279785156, 295.25958251953125, 49.723236083984375, 427.9144287109375, 9.816452026367188, -37.01775360107422, -26.24700927734375, 41.822425842285156, 51.62776184082031, 75.20115661621094, 256.40936279296875, 137.9072723388672, 141.86517333984375, -37.010826110839844, -127.02273559570312, -327.6315612792969, 180.82489013671875, 104.29367065429688, 31.61099624633789, 52.402374267578125, 133.44479370117188, -149.64041137695312, 343.00286865234375, 292.5667419433594, 208.0614013671875, 266.9099426269531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000209.npy"} +{"epoch": 0.3069016152716593, "step": 210, "batch_size": 64, "mean": 108.5168228149414, "std": 148.2569580078125, "min": -227.63223266601562, "p10": -30.519593811035154, "median": 82.30493927001953, "p90": 321.5768676757814, "max": 486.8726806640625, "pos_frac": 0.78125, "sample": [8.6988525390625, 15.524835586547852, 132.27294921875, 68.8065185546875, -32.01722717285156, -89.28440856933594, 186.55789184570312, -25.010149002075195, 141.71969604492188, -36.078102111816406, 193.9903564453125, 20.19879913330078, 31.941509246826172, 115.5357894897461, 271.64447021484375, 145.53829956054688, 0.9132537841796875, 73.94319152832031, 211.10202026367188, 15.152484893798828, 290.08074951171875, 90.66668701171875, 31.864686965942383, 232.6087646484375, 262.770263671875, 6.096029281616211, 15.951789855957031, 333.0264892578125, 189.24517822265625, 370.5184326171875, 126.29805755615234, 254.0996551513672, 113.06046295166016, 292.359130859375, -134.74769592285156, -25.59100341796875, -82.04290771484375, -26.748214721679688, 29.27855110168457, 11.296001434326172, 138.52186584472656, 165.82525634765625, 353.7724609375, 113.63375854492188, -6.263235092163086, -27.025115966796875, 437.6940002441406, -12.327743530273438, 128.84254455566406, 156.01333618164062, 276.26483154296875, 486.8726806640625, 294.861083984375, 48.066123962402344, -67.42282104492188, 93.87229919433594, -22.47655487060547, 0.6482582092285156, 11.182661056518555, 353.78582763671875, 5.72442626953125, 17.164512634277344, -227.63223266601562, 394.2364501953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000210.npy"} +{"epoch": 0.30837004405286345, "step": 211, "batch_size": 64, "mean": 92.89525604248047, "std": 165.66407775878906, "min": -225.66122436523438, "p10": -67.400138092041, "median": 62.42278861999512, "p90": 265.9051513671875, "max": 568.8287353515625, "pos_frac": 0.703125, "sample": [127.50043487548828, -59.36231994628906, 246.14154052734375, 98.4365463256836, 62.49691390991211, 164.43861389160156, -45.88714599609375, 112.6797866821289, 538.2449340820312, -225.66122436523438, -36.48863983154297, 58.69966125488281, 26.975658416748047, 11.9259033203125, 67.46075439453125, 233.78977966308594, 145.3170928955078, 209.37811279296875, 174.890380859375, -15.685256958007812, -0.9732894897460938, -47.783447265625, 207.63583374023438, 110.12832641601562, -70.84491729736328, -0.1981964111328125, -44.4459228515625, 513.904296875, 39.53920364379883, 568.8287353515625, 469.02716064453125, -46.26002502441406, -135.42800903320312, 143.16893005371094, 244.7734375, 318.67132568359375, -153.89254760742188, 142.16046142578125, 51.5548095703125, -14.673171997070312, 122.39949035644531, 409.395263671875, -34.14356231689453, 69.66670227050781, 10.468854904174805, 51.36393737792969, 72.74978637695312, -88.40556335449219, 14.839282989501953, 206.8876953125, -160.68104553222656, 62.348663330078125, 202.0836944580078, 39.46513366699219, 158.5477294921875, 268.880615234375, 93.7702407836914, 258.96240234375, 18.637496948242188, 131.6300811767578, 33.54970169067383, 16.17340087890625, -12.058090209960938, -191.4202880859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000211.npy"} +{"epoch": 0.30983847283406757, "step": 212, "batch_size": 64, "mean": 132.0928955078125, "std": 171.62367248535156, "min": -259.5975341796875, "p10": -64.03234558105467, "median": 134.81912231445312, "p90": 321.32439575195315, "max": 573.6549072265625, "pos_frac": 0.75, "sample": [-259.5975341796875, -89.83760070800781, 321.8402404785156, 261.05670166015625, 53.03966522216797, 244.13436889648438, 112.86944580078125, -70.21427917480469, -38.23109436035156, -1.6778030395507812, 238.5898895263672, 182.41598510742188, -207.8190155029297, -38.73675537109375, -23.579021453857422, 6.357017517089844, 322.3768310546875, 213.92483520507812, 433.170166015625, 244.7359161376953, 115.04745483398438, -110.67900848388672, 31.821643829345703, 93.71479797363281, 148.42666625976562, 173.4327392578125, 224.75241088867188, -80.38389587402344, 320.1207580566406, -10.077554702758789, 129.78433227539062, 179.71749877929688, 573.6549072265625, 173.39857482910156, 53.286705017089844, -231.50172424316406, 316.70526123046875, -49.60783386230469, 132.09243774414062, 566.803955078125, 295.8910217285156, 122.48773193359375, 42.638641357421875, 231.0367431640625, 14.891021728515625, 144.24520874023438, -33.66055679321289, 58.3040657043457, 315.5078125, 48.47957229614258, 144.19015502929688, 365.37286376953125, 398.7359619140625, 262.68310546875, 298.2597351074219, -7.609165191650391, 231.80857849121094, 67.11045837402344, 151.5928955078125, 137.54580688476562, 68.3076171875, 304.54400634765625, -41.12371826171875, 177.3782958984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000212.npy"} +{"epoch": 0.31130690161527164, "step": 213, "batch_size": 64, "mean": 126.10995483398438, "std": 153.3463592529297, "min": -205.50222778320312, "p10": -59.96718139648437, "median": 112.95209884643555, "p90": 344.5236450195313, "max": 440.9504699707031, "pos_frac": 0.796875, "sample": [288.17413330078125, -91.4705810546875, 32.702781677246094, -108.65206146240234, 226.9689178466797, 18.0057373046875, 65.63103485107422, 100.48954772949219, -205.50222778320312, -84.52114868164062, 10.473073959350586, 304.82049560546875, 351.6200866699219, -33.302589416503906, 87.83919525146484, 178.80645751953125, 9.224679946899414, 67.65571594238281, 162.66729736328125, 321.48028564453125, 429.974609375, 312.3218994140625, 48.60558319091797, 412.11810302734375, 166.71888732910156, 278.5880126953125, 378.9471740722656, -16.410228729248047, -4.302894592285156, -15.695110321044922, 182.96078491210938, 125.4146499633789, 201.3338623046875, 125.98796844482422, -56.476837158203125, 15.81475830078125, 175.016357421875, 347.7771911621094, 277.0361328125, 147.628173828125, -8.238521575927734, 193.41380310058594, 336.9320373535156, -61.463043212890625, 203.0135040283203, 62.091705322265625, 169.34329223632812, 440.9504699707031, -138.56341552734375, 196.42166137695312, 258.7027587890625, 301.3243408203125, 349.8289794921875, 47.98768997192383, 9.811294555664062, 5.817584991455078, -127.22249603271484, 77.57522583007812, 96.96216583251953, 63.71723175048828, 23.32311248779297, 25.514419555664062, 165.0974884033203, 142.22596740722656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000213.npy"} +{"epoch": 0.31277533039647576, "step": 214, "batch_size": 64, "mean": 113.76599884033203, "std": 147.74057006835938, "min": -176.06768798828125, "p10": -50.10351638793945, "median": 90.65762329101562, "p90": 293.58701477050784, "max": 516.3482666015625, "pos_frac": 0.78125, "sample": [57.27643585205078, 410.5124206542969, 14.134746551513672, 199.75022888183594, -10.394132614135742, -12.644857406616211, -75.51884460449219, -176.06768798828125, 275.6064453125, -83.44461059570312, 174.19851684570312, 39.900001525878906, 109.50794219970703, -0.7567424774169922, 31.473953247070312, 102.35261535644531, 230.00088500976562, -27.470544815063477, 52.15150451660156, 189.89454650878906, 67.3169937133789, 63.331687927246094, 109.54619598388672, 274.620849609375, 244.60360717773438, 81.00303649902344, 17.31584930419922, 1.906869888305664, 123.404541015625, -45.18782043457031, 100.31221008300781, 46.354042053222656, 6.383110046386719, 340.1068420410156, -72.8541030883789, 516.3482666015625, 344.9813232421875, 7.9580535888671875, 26.2196044921875, 395.7003479003906, 214.0360870361328, 207.32894897460938, 262.16265869140625, 295.742431640625, -141.55946350097656, 510.94482421875, 25.426488876342773, 200.55450439453125, 219.03738403320312, 104.4903793334961, -50.93614959716797, 178.80592346191406, -42.98876190185547, 116.41339111328125, -48.16070556640625, 234.31063842773438, 77.23274230957031, 288.5577087402344, 65.5896987915039, 151.3194580078125, 131.0456085205078, 126.83280944824219, -55.77208709716797, 60.775020599365234], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000214.npy"} +{"epoch": 0.3142437591776799, "step": 215, "batch_size": 64, "mean": 118.28256225585938, "std": 137.79710388183594, "min": -149.2458038330078, "p10": -62.97738494873047, "median": 104.36443710327148, "p90": 292.84925842285156, "max": 520.306396484375, "pos_frac": 0.828125, "sample": [67.91455078125, 328.4442443847656, 124.2568359375, 266.98822021484375, -63.336669921875, -95.79232788085938, 55.161781311035156, 121.19031524658203, 140.26036071777344, 97.63774871826172, 130.15147399902344, 44.424049377441406, 1.466684341430664, -30.556188583374023, 414.0084228515625, 257.470703125, 243.64215087890625, 288.8614501953125, 520.306396484375, 165.2356414794922, 103.69068908691406, 407.07440185546875, 110.01807403564453, -91.16679382324219, 73.83401489257812, 197.2139892578125, -68.08460998535156, 66.96826934814453, 192.51666259765625, 252.41726684570312, 23.22352409362793, 69.32170104980469, 50.843780517578125, -5.896902084350586, 158.2902374267578, 95.8266372680664, -121.9457778930664, 149.7149200439453, 196.91445922851562, 254.39410400390625, 351.01019287109375, 105.0381851196289, 195.86013793945312, -16.016952514648438, 127.73100280761719, 50.47125244140625, 48.567420959472656, -62.13905334472656, 43.20717239379883, 203.70132446289062, -149.2458038330078, 152.05816650390625, 388.2032470703125, 32.343360900878906, 82.69110107421875, 97.5428466796875, 294.5583190917969, 11.283004760742188, 205.67323303222656, 33.892799377441406, 106.36557006835938, 45.258758544921875, -85.22869873046875, 114.35242462158203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000215.npy"} +{"epoch": 0.315712187958884, "step": 216, "batch_size": 64, "mean": 117.41053771972656, "std": 167.65435791015625, "min": -285.84002685546875, "p10": -71.28892593383789, "median": 101.4305191040039, "p90": 362.97230224609376, "max": 563.2782592773438, "pos_frac": 0.75, "sample": [185.90841674804688, 410.68658447265625, -72.33698272705078, 142.81549072265625, 202.28228759765625, 42.551177978515625, -183.97972106933594, 157.96499633789062, 215.67202758789062, 18.72602081298828, 563.2782592773438, 98.94740295410156, 111.74465942382812, -71.64564514160156, -16.01529312133789, -13.734466552734375, -90.92755889892578, 206.05178833007812, 34.96384811401367, -175.03549194335938, 2.2858734130859375, 306.798095703125, -21.982940673828125, -16.551727294921875, 111.21463012695312, 150.91285705566406, 49.4320068359375, 191.03797912597656, 121.22178649902344, 7.891935348510742, 103.91363525390625, 197.90863037109375, 25.792129516601562, -54.46379852294922, 384.5423889160156, 365.50604248046875, -65.89588928222656, 310.8004150390625, 40.92616271972656, 80.40518951416016, 55.649803161621094, 264.5892639160156, -4.030050277709961, -285.84002685546875, 58.98048400878906, 184.9562225341797, 239.16177368164062, 396.427490234375, 46.23420715332031, -70.45658111572266, 274.70703125, 40.58729553222656, 115.3460922241211, 170.98947143554688, -46.36627960205078, 332.47052001953125, 237.39852905273438, 334.40972900390625, 396.1694030761719, 377.97247314453125, 20.7998046875, -72.18695831298828, 29.630897521972656, 357.06024169921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000216.npy"} +{"epoch": 0.31718061674008813, "step": 217, "batch_size": 64, "mean": 136.8664093017578, "std": 155.13499450683594, "min": -171.30088806152344, "p10": -16.147467803955074, "median": 92.16363143920898, "p90": 370.6636474609375, "max": 533.6607666015625, "pos_frac": 0.859375, "sample": [140.39236450195312, 137.27224731445312, 91.58841705322266, 132.76907348632812, 63.270240783691406, 50.857139587402344, 68.72396087646484, 233.1149444580078, 75.40677642822266, 92.13397216796875, 54.082427978515625, 372.5784912109375, 145.72946166992188, 366.1956787109375, -128.41119384765625, 3.4463272094726562, 34.65943908691406, 1.275115966796875, 200.5704345703125, 533.6607666015625, -1.5408363342285156, 62.630760192871094, 266.9100036621094, 421.3280944824219, 92.19329071044922, 210.66946411132812, -29.14820098876953, 69.58140563964844, -171.30088806152344, 114.53961181640625, 484.1814880371094, 200.71060180664062, 9.465330123901367, 63.1368408203125, 257.1153564453125, 2.00360107421875, 230.23397827148438, 288.4098205566406, -12.201919555664062, 491.03564453125, 427.91619873046875, -90.2258529663086, 341.033203125, 277.43756103515625, 138.25587463378906, 450.9071350097656, 4.6399993896484375, 97.73739624023438, 70.2313232421875, 63.963531494140625, 70.85609436035156, 164.71817016601562, 260.91595458984375, 1.1201343536376953, 153.341064453125, 75.40911865234375, -41.91832733154297, -17.838417053222656, 25.86913299560547, -35.24894714355469, 128.60801696777344, 206.48728942871094, 245.57347106933594, 20.391469955444336], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000217.npy"} +{"epoch": 0.3186490455212922, "step": 218, "batch_size": 64, "mean": 106.43551635742188, "std": 161.29302978515625, "min": -261.2113037109375, "p10": -75.52274780273437, "median": 80.03852081298828, "p90": 330.3433074951172, "max": 555.0560913085938, "pos_frac": 0.75, "sample": [92.43693542480469, 61.472694396972656, 172.76632690429688, 248.7497100830078, 149.165283203125, 30.583263397216797, 331.8722839355469, 77.21000671386719, 10.175912857055664, 72.30821990966797, 156.6066436767578, 51.9366455078125, -18.920989990234375, 138.67697143554688, 451.33984375, 174.9502410888672, 119.38893127441406, -27.347557067871094, 135.87814331054688, 195.54541015625, 37.38056945800781, 57.36046600341797, 4.096279144287109, 236.83560180664062, -57.87312316894531, -261.2113037109375, 326.77569580078125, 97.22271728515625, 186.508544921875, 203.4215545654297, 34.54695129394531, 415.1212158203125, 414.01971435546875, 247.79934692382812, 201.80511474609375, 66.05039978027344, -21.03871726989746, -26.980409622192383, 80.74154663085938, -15.34317398071289, -37.856414794921875, -99.33257293701172, 144.11810302734375, -91.36256408691406, -74.02803802490234, -50.13477325439453, 555.0560913085938, 332.35693359375, 208.7912139892578, -76.16333770751953, -150.57498168945312, 235.00877380371094, 446.69482421875, 12.102930068969727, 47.01688003540039, 79.33549499511719, 158.92152404785156, 105.5516586303711, 44.400978088378906, -175.16766357421875, 79.09717559814453, -148.7694549560547, 303.532470703125, 111.24362182617188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000218.npy"} +{"epoch": 0.3201174743024963, "step": 219, "batch_size": 64, "mean": 92.58406829833984, "std": 174.80233764648438, "min": -423.2983703613281, "p10": -112.45169448852536, "median": 74.52236557006836, "p90": 351.8211486816407, "max": 479.76385498046875, "pos_frac": 0.75, "sample": [458.57196044921875, 156.01193237304688, 7.969358444213867, 334.3529357910156, 5.245248794555664, 429.39776611328125, -423.2983703613281, 16.055688858032227, 35.94122314453125, 315.0036315917969, -11.896499633789062, 99.06269836425781, 206.64505004882812, 449.01214599609375, 479.76385498046875, 169.10202026367188, 60.74559783935547, 257.71661376953125, 112.77218627929688, 103.4595947265625, 273.7991638183594, 36.92295837402344, -123.4122543334961, 1.361083984375, -53.01000213623047, -68.01789093017578, 359.3075256347656, -4.601287841796875, 364.2367248535156, 391.05877685546875, 41.262367248535156, 272.6660461425781, -8.687515258789062, 48.81639099121094, 7.8901519775390625, 173.25108337402344, 114.56038665771484, 17.898902893066406, 114.20867919921875, 30.0029296875, 300.0002136230469, 131.866943359375, 100.374755859375, -175.05184936523438, 83.49797058105469, 161.28109741210938, -174.74169921875, 6.057626724243164, 9.149267196655273, 224.69027709960938, 26.493408203125, -129.7379150390625, -1.1455001831054688, -51.76438522338867, -233.8028106689453, 94.50641632080078, 167.1044921875, 83.79544830322266, -89.07099914550781, -30.771514892578125, 144.9273681640625, 68.08389282226562, 80.9608383178711, -122.47199249267578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000219.npy"} +{"epoch": 0.32158590308370044, "step": 220, "batch_size": 64, "mean": 77.35502624511719, "std": 127.04686737060547, "min": -246.79562377929688, "p10": -70.32778930664062, "median": 74.8817367553711, "p90": 199.86394958496095, "max": 445.5641174316406, "pos_frac": 0.734375, "sample": [166.76779174804688, 49.894630432128906, 70.14093017578125, -19.83245086669922, 41.41022491455078, 326.8800048828125, 360.1448669433594, 10.023326873779297, 147.64869689941406, 155.01295471191406, 124.6735610961914, 278.3518981933594, 136.1800537109375, 144.74508666992188, -219.53598022460938, 107.5230712890625, 169.54519653320312, 43.12725830078125, 135.21054077148438, 196.42372131347656, 68.31260681152344, 23.722986221313477, 4.6035614013671875, -246.79562377929688, -68.0573501586914, 201.3383331298828, -22.506723403930664, 50.956878662109375, 206.6112060546875, 62.863529205322266, -63.36279296875, 160.83045959472656, 127.14790344238281, -21.169998168945312, 215.6049346923828, 115.65265655517578, 445.5641174316406, 151.9996795654297, -5.756843566894531, 110.3586654663086, 14.091312408447266, 49.76286697387695, -0.9748134613037109, 105.47694396972656, -112.23441314697266, 140.07037353515625, 64.43997192382812, 133.5565643310547, -142.6920166015625, 79.5023193359375, 70.26115417480469, -0.6167221069335938, 147.04510498046875, -49.90592956542969, 46.507694244384766, 153.18540954589844, 176.14703369140625, -71.30083465576172, 176.79806518554688, 179.92886352539062, -20.567180633544922, 156.5789794921875, -139.31826782226562, -147.27389526367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000220.npy"} +{"epoch": 0.32305433186490456, "step": 221, "batch_size": 64, "mean": 170.87864685058594, "std": 162.7834014892578, "min": -184.22882080078125, "p10": -17.32612380981445, "median": 174.84353637695312, "p90": 383.0998687744141, "max": 545.7762451171875, "pos_frac": 0.84375, "sample": [77.51978302001953, 111.41458129882812, 250.08973693847656, -5.7684173583984375, 97.86933898925781, 545.7762451171875, 2.5285110473632812, 394.9490966796875, 222.57476806640625, 276.8643798828125, 280.29168701171875, 15.09262466430664, 294.10125732421875, 268.6914978027344, 153.23793029785156, 272.46661376953125, -14.999603271484375, 451.3849182128906, -3.413942337036133, 94.2043228149414, 161.00128173828125, 305.44635009765625, 386.3966369628906, 467.7264404296875, 483.8883056640625, 246.82196044921875, 209.21817016601562, -184.22882080078125, 54.583038330078125, 65.78722381591797, 170.55630493164062, 197.0375213623047, -29.349166870117188, 62.43761444091797, 65.46212768554688, 179.13076782226562, 114.2826156616211, -132.73062133789062, 258.2276611328125, 375.40740966796875, -18.323204040527344, 296.53265380859375, 221.37631225585938, 318.5829162597656, 114.217529296875, 223.65008544921875, 33.644500732421875, 373.8883056640625, 322.80548095703125, 241.525634765625, -37.33176803588867, -108.2240982055664, 6.525413513183594, 258.6244201660156, 47.98731994628906, -76.19184875488281, 9.917093276977539, 133.75314331054688, 47.536869049072266, 202.92462158203125, 303.2375793457031, 90.90605926513672, 485.47552490234375, 201.2147979736328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000221.npy"} +{"epoch": 0.3245227606461087, "step": 222, "batch_size": 64, "mean": 128.28977966308594, "std": 163.05369567871094, "min": -244.22531127929688, "p10": -46.39433975219727, "median": 118.20893859863281, "p90": 355.80759582519545, "max": 565.4016723632812, "pos_frac": 0.78125, "sample": [-46.618385314941406, 438.7974548339844, -5.5164337158203125, -107.60513305664062, -6.162437438964844, -88.05498504638672, -14.932220458984375, 124.4954833984375, 5.912261962890625, 230.39047241210938, 104.75786590576172, 122.25334930419922, 418.2918701171875, 89.56364440917969, 122.5658187866211, 236.22915649414062, -244.22531127929688, 160.25927734375, 258.30316162109375, 174.35617065429688, 155.43251037597656, 113.93173217773438, -211.3231201171875, 325.0828552246094, -125.6063461303711, 267.98126220703125, 500.0406494140625, 91.68099975585938, 42.72949981689453, 15.597427368164062, 164.80068969726562, 22.51409912109375, 159.964599609375, -45.87156677246094, 205.8633575439453, 197.73277282714844, 44.78623962402344, 150.65823364257812, -22.706714630126953, 229.5067138671875, -29.715675354003906, 99.27281188964844, 83.8372573852539, 81.89479064941406, 324.3866271972656, 240.02093505859375, 114.1645278930664, 240.58953857421875, 154.12095642089844, 46.8251953125, -36.43989562988281, 166.73695373535156, 368.975341796875, 68.63592529296875, 416.2417907714844, 208.28060913085938, 124.89115905761719, -109.21076965332031, 218.30419921875, 19.97808074951172, 565.4016723632812, 55.08856201171875, 422.08251953125, 110.32587432861328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000222.npy"} +{"epoch": 0.32599118942731276, "step": 223, "batch_size": 64, "mean": 107.05718994140625, "std": 179.18881225585938, "min": -344.72625732421875, "p10": -113.98109283447265, "median": 109.78412628173828, "p90": 312.3448425292969, "max": 626.3522338867188, "pos_frac": 0.75, "sample": [31.151718139648438, -70.7936019897461, 217.04800415039062, 137.92828369140625, 138.91259765625, 358.576171875, 197.71902465820312, 102.73540496826172, 211.08924865722656, 162.60723876953125, -154.35472106933594, 55.06317138671875, 96.29374694824219, 235.59976196289062, 300.22113037109375, 292.81439208984375, 135.80392456054688, 87.39588928222656, 553.9404907226562, -12.473812103271484, 134.85267639160156, -54.70806121826172, 93.17910766601562, -60.69992446899414, -58.07439422607422, 305.2951965332031, -149.31736755371094, 375.3072509765625, 104.46540069580078, -116.77799224853516, 3.33636474609375, 9.352127075195312, 265.35333251953125, -344.72625732421875, 122.86865234375, 138.58566284179688, -203.8755645751953, 199.68948364257812, 71.70034790039062, 626.3522338867188, 233.26242065429688, -107.45499420166016, 176.85067749023438, 266.2454833984375, -142.139892578125, -60.85208511352539, 284.2252197265625, 331.154541015625, 155.7109832763672, -5.010108947753906, 46.980743408203125, 39.80671691894531, 33.644378662109375, 406.37109375, 26.48644256591797, 27.99968719482422, -12.132116317749023, 10.037221908569336, 143.30545043945312, 129.31121826171875, 115.10285186767578, 164.79501342773438, -266.8430480957031, 315.3661193847656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000223.npy"} +{"epoch": 0.3274596182085169, "step": 224, "batch_size": 64, "mean": 124.37174987792969, "std": 186.22703552246094, "min": -333.69287109375, "p10": -74.87051162719726, "median": 70.05747604370117, "p90": 363.2789642333985, "max": 670.1328125, "pos_frac": 0.796875, "sample": [259.4755859375, 2.6950531005859375, 137.14349365234375, 26.682655334472656, -85.17411041259766, 564.3087768554688, 245.2152862548828, 45.259376525878906, 72.6169662475586, -333.69287109375, 46.210914611816406, 183.0914306640625, 151.13441467285156, 155.3034210205078, 450.8253173828125, 88.89932250976562, 54.473289489746094, 227.6190948486328, 192.41644287109375, 670.1328125, 299.779296875, 107.90771484375, 357.4803161621094, 263.5874938964844, -137.0369415283203, -82.54733276367188, 276.1477966308594, 67.49798583984375, 238.79461669921875, -22.972381591796875, 6.2279510498046875, 39.83282470703125, 35.749366760253906, 634.0545654296875, 262.3411560058594, 239.4305419921875, 112.56787872314453, -75.62987518310547, 29.27381706237793, 24.110942840576172, 251.10235595703125, 1.02557373046875, 47.98224639892578, -24.505535125732422, 109.32003784179688, -84.21617889404297, -64.10920715332031, 12.330703735351562, -110.01420593261719, 29.30136489868164, 180.54412841796875, 126.2760009765625, 365.76409912109375, -5.057310104370117, 8.093708038330078, 65.37681579589844, 5.22540283203125, 327.2210998535156, 40.282623291015625, -73.09866333007812, 89.1968002319336, 419.31549072265625, 427.6029968261719, -16.40304946899414], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000224.npy"} +{"epoch": 0.328928046989721, "step": 225, "batch_size": 64, "mean": 146.93002319335938, "std": 186.6449737548828, "min": -262.6552429199219, "p10": -93.19714126586912, "median": 146.3915252685547, "p90": 402.68798217773445, "max": 734.08349609375, "pos_frac": 0.71875, "sample": [243.59771728515625, 166.80868530273438, 419.75128173828125, 365.77252197265625, 35.55016326904297, 113.46829986572266, 47.30419921875, -106.23931884765625, -144.38079833984375, 450.121826171875, 366.9530334472656, 277.7754821777344, -149.36268615722656, -72.82903289794922, 284.19085693359375, -44.03705978393555, -111.85689544677734, 21.77931785583496, -9.998088836669922, 68.86585998535156, 110.72354125976562, 143.56007385253906, 235.2613525390625, 168.96163940429688, -9.942506790161133, 145.31573486328125, 28.998138427734375, 172.16241455078125, -19.15450668334961, 256.94415283203125, 323.34356689453125, 177.9611053466797, 227.24842834472656, 102.21161651611328, -45.3378791809082, 471.96844482421875, 449.28564453125, -7.777290344238281, 251.57794189453125, 409.53717041015625, 99.65631103515625, 341.27117919921875, 193.0654754638672, -37.216392517089844, 163.43443298339844, 147.46731567382812, 734.08349609375, 282.5640563964844, 166.2137451171875, 78.60691833496094, 420.893310546875, -5.7135009765625, 235.11422729492188, -101.92633056640625, -103.04130554199219, 386.70654296875, 300.07183837890625, 178.18435668945312, 174.43072509765625, -262.6552429199219, 135.8955078125, 141.214111328125, -48.314491271972656, -32.5687255859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000225.npy"} +{"epoch": 0.3303964757709251, "step": 226, "batch_size": 64, "mean": 118.97482299804688, "std": 174.26934814453125, "min": -208.06114196777344, "p10": -39.074589920043934, "median": 79.1956558227539, "p90": 349.91426086425787, "max": 603.6814575195312, "pos_frac": 0.765625, "sample": [603.6814575195312, -158.21966552734375, -43.29940414428711, 14.127899169921875, -13.33660888671875, 169.58897399902344, -10.19287109375, 66.49118041992188, 174.22616577148438, -14.41103744506836, 133.31851196289062, 10.757369995117188, -7.115959167480469, 37.554176330566406, -185.69833374023438, 91.80735778808594, -110.12730407714844, 354.3565368652344, 337.6134338378906, 30.591026306152344, 339.5489501953125, 364.155029296875, 84.35384368896484, 135.52745056152344, 184.16285705566406, 297.09619140625, 18.233970642089844, -29.216690063476562, 132.05636596679688, 428.7747497558594, 303.26446533203125, -74.23694610595703, 181.85179138183594, 78.21660614013672, 47.27748107910156, 559.087890625, 234.14730834960938, 544.62841796875, 301.4974670410156, -9.266746520996094, -4.215576171875, 143.25784301757812, -208.06114196777344, 157.4666748046875, 45.259891510009766, 124.9937744140625, 4.847877502441406, 5.2266693115234375, 9.177835464477539, 80.1747055053711, -4.5018310546875, 270.766845703125, 108.52310180664062, 199.01535034179688, 49.958656311035156, 122.65577697753906, -89.91360473632812, 6.43739128112793, 504.0299072265625, 60.39800262451172, 43.54663848876953, 135.94003295898438, 35.274261474609375, 211.2565155029297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000226.npy"} +{"epoch": 0.33186490455212925, "step": 227, "batch_size": 64, "mean": 144.17855834960938, "std": 174.31427001953125, "min": -381.69488525390625, "p10": -16.484533309936516, "median": 110.09749603271484, "p90": 368.63389282226564, "max": 609.2084350585938, "pos_frac": 0.84375, "sample": [309.0401611328125, 108.0643310546875, 132.01226806640625, 382.3672180175781, 226.53817749023438, 37.4488525390625, 57.15901184082031, 101.15889739990234, 306.2354736328125, 19.756668090820312, 92.89142608642578, 109.11131286621094, 186.97451782226562, 40.660789489746094, -19.867481231689453, 27.165443420410156, 164.80361938476562, 103.66966247558594, 195.8448028564453, 609.2084350585938, 163.29627990722656, 78.68075561523438, 351.1079406738281, 220.1911163330078, 42.81847381591797, 288.60107421875, 164.32516479492188, 251.3868408203125, 105.00125122070312, 94.50050354003906, 179.66073608398438, 68.55259704589844, 350.42340087890625, 5.876323699951172, -81.54299926757812, -113.91130065917969, 187.62347412109375, 217.15750122070312, 503.20001220703125, 111.08367919921875, 191.05267333984375, 467.1683349609375, -113.45720672607422, 578.5487670898438, -5.453996658325195, 20.55127716064453, 188.69493103027344, 44.982460021972656, 222.10687255859375, 38.263675689697266, 132.50906372070312, 370.9454345703125, 116.009521484375, -8.590988159179688, 7.040239334106445, -51.16487121582031, 363.24029541015625, -381.69488525390625, -8.523712158203125, 89.89949035644531, -59.158531188964844, 23.268762588500977, 131.8353271484375, 491.0788879394531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000227.npy"} +{"epoch": 0.3333333333333333, "step": 228, "batch_size": 64, "mean": 103.1017074584961, "std": 165.70947265625, "min": -267.6720275878906, "p10": -105.69986801147459, "median": 106.22488403320312, "p90": 319.1811767578125, "max": 491.9019775390625, "pos_frac": 0.734375, "sample": [-176.20098876953125, -239.4617919921875, -267.6720275878906, 209.71304321289062, 140.8021240234375, 338.6832275390625, -198.00772094726562, 169.9336395263672, 194.67208862304688, -157.44439697265625, 28.037109375, 290.3927307128906, 491.9019775390625, 324.30670166015625, 463.03179931640625, 328.18231201171875, 354.91790771484375, 175.249755859375, 121.58470153808594, -33.93794250488281, -43.7823486328125, 305.6289367675781, 117.80835723876953, 307.22161865234375, 181.69149780273438, 156.04127502441406, 127.78694152832031, 17.770214080810547, -86.43083953857422, 109.93571472167969, 38.36918640136719, 175.30137634277344, 9.113510131835938, 205.52029418945312, 223.16006469726562, 95.04000091552734, 114.09415435791016, 81.03298950195312, 34.25569152832031, 35.114654541015625, 82.32003021240234, 238.105712890625, -86.50491333007812, -17.45220947265625, 102.51405334472656, 99.60755157470703, 444.8551940917969, 19.92766571044922, 83.60380554199219, 58.377647399902344, 265.9571533203125, -8.416812896728516, -24.0538330078125, 198.93798828125, 234.22142028808594, 163.5211181640625, 206.5637969970703, 111.43157196044922, -113.92627716064453, -24.440277099609375, -19.88990020751953, 24.907791137695312, -154.039794921875, -50.976829528808594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000228.npy"} +{"epoch": 0.33480176211453744, "step": 229, "batch_size": 64, "mean": 110.69569396972656, "std": 158.15969848632812, "min": -375.85211181640625, "p10": -40.04754104614257, "median": 97.5522346496582, "p90": 336.72923278808594, "max": 541.8796997070312, "pos_frac": 0.765625, "sample": [-13.071670532226562, 14.316545486450195, -104.029052734375, 406.7159423828125, 53.386898040771484, -172.21017456054688, 109.3499984741211, 179.93682861328125, 71.82183837890625, 214.75735473632812, 264.0787658691406, 342.6813049316406, 98.60060119628906, 348.9560852050781, 90.35951232910156, 96.50386810302734, 228.16275024414062, 107.44689178466797, 36.50258255004883, 334.8208312988281, -97.2099609375, 60.908721923828125, 0.6357231140136719, 218.3274383544922, 140.98585510253906, 44.210296630859375, 541.8796997070312, -156.64215087890625, 50.17811584472656, 239.9111328125, 155.88217163085938, 233.4991455078125, 173.93104553222656, 365.70867919921875, 337.547119140625, 46.280555725097656, 307.348388671875, 28.95136260986328, 115.386962890625, -43.401824951171875, 57.469581604003906, 127.39753723144531, 45.304344177246094, 200.15182495117188, -30.6213436126709, 166.2002716064453, 197.58746337890625, 222.69334411621094, -29.832366943359375, -135.96612548828125, 119.66565704345703, 428.1336669921875, 30.458349227905273, 74.5635986328125, 74.61905670166016, 155.2834930419922, -9.832695007324219, 180.3023681640625, -14.562393188476562, 172.28167724609375, -375.85211181640625, -0.4893035888671875, -11.616928100585938, -32.22087860107422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000229.npy"} +{"epoch": 0.33627019089574156, "step": 230, "batch_size": 64, "mean": 160.3502655029297, "std": 157.3489227294922, "min": -262.7828674316406, "p10": -24.905437278747556, "median": 148.70856475830078, "p90": 396.46502685546875, "max": 502.27984619140625, "pos_frac": 0.84375, "sample": [76.22685241699219, 356.3814392089844, -4.439451217651367, 292.730224609375, 215.55267333984375, 75.65074920654297, -26.348100662231445, 239.8948516845703, 102.91413879394531, 134.65736389160156, 201.93736267089844, 165.54580688476562, 13.300682067871094, 213.637939453125, 28.200241088867188, 113.69534301757812, -51.571571350097656, 75.2376708984375, 93.45907592773438, 135.07403564453125, 53.69012451171875, -89.54042053222656, -21.539222717285156, 199.35397338867188, 139.381591796875, 176.31805419921875, 203.7239990234375, 339.07196044921875, 306.2477111816406, 156.8143310546875, 80.64468383789062, 8.806968688964844, 312.39599609375, 450.1211853027344, 160.5964813232422, -91.7301254272461, 119.62689971923828, 170.32614135742188, 232.57705688476562, 462.56500244140625, -34.087093353271484, -6.2042694091796875, -262.7828674316406, -54.453590393066406, 30.562057495117188, 49.31597137451172, 391.0821533203125, 122.51019287109375, 371.022216796875, 426.2254638671875, 152.4287567138672, 105.96136474609375, 106.45350646972656, 18.750978469848633, 398.77197265625, 258.3953857421875, 478.4866638183594, 187.5776824951172, 411.6588134765625, 198.8474884033203, 217.97471618652344, 144.98837280273438, 225.4612579345703, 502.27984619140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000230.npy"} +{"epoch": 0.3377386196769457, "step": 231, "batch_size": 64, "mean": 114.06205749511719, "std": 151.45257568359375, "min": -136.669921875, "p10": -48.24774894714355, "median": 86.78449249267578, "p90": 331.6016998291017, "max": 645.1551513671875, "pos_frac": 0.8125, "sample": [458.481201171875, 83.19657897949219, -56.96697235107422, 285.437744140625, 19.675498962402344, 78.79273986816406, 340.751708984375, 7.253143310546875, 68.49578094482422, 110.2251968383789, 103.99732971191406, 347.31280517578125, -66.9654541015625, 79.46908569335938, 480.8500061035156, 136.2273712158203, 202.49794006347656, 91.621826171875, 145.54766845703125, 209.20822143554688, 394.6103820800781, 62.70474624633789, 111.32776641845703, 8.008071899414062, 42.87377166748047, -56.87989044189453, 310.2516784667969, -50.54072952270508, 208.5305938720703, 645.1551513671875, 448.3498840332031, 229.495849609375, 122.15681457519531, 26.44258689880371, -136.669921875, 68.61373901367188, 104.685546875, 157.94369506835938, 90.37240600585938, -42.8974609375, -5.524749755859375, 41.88520050048828, 211.2935791015625, 66.32625579833984, -21.92644500732422, 114.8645248413086, -25.574081420898438, -100.5413818359375, 51.20283126831055, 124.9671630859375, 290.81097412109375, 159.7866973876953, 81.01995086669922, 9.004268646240234, -30.865833282470703, 119.98519897460938, -94.00740051269531, 114.58879089355469, 22.95263671875, 17.004241943359375, 10.950836181640625, 138.557373046875, 97.819091796875, 35.74784851074219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000231.npy"} +{"epoch": 0.3392070484581498, "step": 232, "batch_size": 64, "mean": 93.53819274902344, "std": 160.38485717773438, "min": -161.5782012939453, "p10": -71.82919540405273, "median": 54.66155815124512, "p90": 300.40176086425794, "max": 613.5557861328125, "pos_frac": 0.71875, "sample": [175.28543090820312, 110.67205047607422, 8.675317764282227, 42.77204132080078, 17.2567138671875, -138.34852600097656, 313.4955139160156, 253.11294555664062, -31.60662841796875, 251.72442626953125, 43.320716857910156, 441.30059814453125, 82.08092498779297, -70.26142120361328, -25.78582763671875, 58.514854431152344, -36.37799072265625, 188.74716186523438, -104.79377746582031, 68.60660552978516, 148.6934051513672, 587.9395141601562, 67.04336547851562, 85.03260803222656, 133.146484375, -37.556819915771484, 613.5557861328125, -3.9091033935546875, 30.9715576171875, 253.86590576171875, 145.21380615234375, 221.92971801757812, -58.480438232421875, 74.9107666015625, -91.90121459960938, 137.48680114746094, -118.43901824951172, 19.825424194335938, -72.5010986328125, 423.8602294921875, 31.866973876953125, -51.498069763183594, 3.74700927734375, -96.15766906738281, 40.0500373840332, 78.66377258300781, 44.43743896484375, -161.5782012939453, 269.2220458984375, 124.82223510742188, 339.48236083984375, 23.144023895263672, -1.907470703125, 326.92047119140625, 203.64080810546875, -30.383378982543945, 112.76212310791016, 269.84967041015625, -20.103063583374023, 51.37165069580078, 57.15445327758789, 31.10037612915039, 78.58897399902344, 52.168663024902344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000232.npy"} +{"epoch": 0.3406754772393539, "step": 233, "batch_size": 64, "mean": 146.382568359375, "std": 211.47802734375, "min": -224.47433471679688, "p10": -67.9090103149414, "median": 100.92501831054688, "p90": 465.07917785644537, "max": 675.9614868164062, "pos_frac": 0.734375, "sample": [13.337663650512695, 12.632991790771484, 480.40081787109375, -33.7633056640625, 191.441650390625, 187.42132568359375, -162.41505432128906, -121.86551666259766, 2.1025848388671875, -177.95326232910156, 113.66529846191406, 28.8157958984375, 166.7107696533203, 260.4913330078125, 252.54293823242188, 675.9614868164062, 119.02509307861328, 76.45792388916016, 525.2583618164062, 335.76776123046875, 46.44273376464844, 159.26109313964844, 81.01298522949219, 550.517333984375, 15.755546569824219, 276.8924560546875, 0.259368896484375, -72.77593994140625, -174.9437255859375, -51.93062210083008, 120.00575256347656, 489.5055847167969, 213.35765075683594, 154.43589782714844, 300.35247802734375, 449.9139709472656, 38.18516540527344, 443.8704833984375, -56.55284118652344, 40.06202697753906, 125.65569305419922, 409.0858154296875, -35.81162643432617, 326.86285400390625, 63.71241760253906, 387.698974609375, -47.07705307006836, -224.47433471679688, -134.26805114746094, -20.678543090820312, 47.30949401855469, -27.07514190673828, 332.58154296875, 88.18473815917969, -47.23077392578125, 202.07020568847656, -22.57632064819336, 471.57855224609375, -38.556217193603516, 598.3289794921875, 378.2581787109375, 53.84504699707031, 309.32275390625, 202.07308959960938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000233.npy"} +{"epoch": 0.342143906020558, "step": 234, "batch_size": 64, "mean": 148.38348388671875, "std": 170.77529907226562, "min": -139.5249481201172, "p10": -49.31187133789061, "median": 132.557861328125, "p90": 370.84226074218753, "max": 640.353759765625, "pos_frac": 0.796875, "sample": [-71.80308532714844, 262.98895263671875, 281.28509521484375, 238.1875, 338.6470642089844, 70.63265228271484, 155.7034912109375, 181.75796508789062, 393.5873718261719, -36.29930114746094, 281.2586975097656, 156.90037536621094, -139.5249481201172, 11.909488677978516, 79.8155746459961, 253.25289916992188, 58.05230712890625, -81.09252166748047, 58.251800537109375, 284.6294250488281, 640.353759765625, 140.94146728515625, -12.556894302368164, -33.656028747558594, 66.59551239013672, -23.765335083007812, 177.06634521484375, 363.6763610839844, 5.392723083496094, 349.59161376953125, -25.72974967956543, 157.40032958984375, 430.8734130859375, 103.71405792236328, 487.73846435546875, 104.845458984375, 32.387184143066406, 131.0101318359375, 62.08561325073242, 267.07513427734375, 272.8082580566406, 138.65174865722656, 237.412353515625, 64.70708465576172, 23.52802276611328, 81.72955322265625, -93.6668701171875, 1.8185195922851562, 226.2418212890625, 87.66496276855469, 86.75971221923828, 463.14813232421875, 585.4630126953125, -95.80298614501953, 298.9860534667969, 163.63650512695312, -54.88868713378906, -11.246316909790039, 174.76329040527344, 49.24028015136719, 134.1055908203125, -101.35690307617188, 373.9133605957031, 185.74551391601562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000234.npy"} +{"epoch": 0.3436123348017621, "step": 235, "batch_size": 64, "mean": 98.07133483886719, "std": 169.71798706054688, "min": -408.2266845703125, "p10": -74.24182968139648, "median": 106.67793273925781, "p90": 299.18798828125006, "max": 508.32550048828125, "pos_frac": 0.734375, "sample": [32.26353454589844, 227.41030883789062, -408.2266845703125, 10.367095947265625, 161.50320434570312, 508.32550048828125, 165.0023193359375, 311.7880859375, -52.398685455322266, 202.92921447753906, -160.71139526367188, -26.012359619140625, 171.18185424804688, 279.25396728515625, 58.960784912109375, 34.63032531738281, -85.97525024414062, 17.09954071044922, 108.00685119628906, -287.7318420410156, 19.213672637939453, 17.109054565429688, -5.3837738037109375, 70.2997817993164, 334.016845703125, 231.7519073486328, -28.84081268310547, -57.115966796875, 157.31692504882812, 217.59104919433594, 123.60308837890625, 105.34901428222656, 307.73114013671875, -65.28839111328125, 127.40495300292969, 200.06390380859375, -4.051591873168945, 232.55645751953125, 24.205184936523438, 162.97921752929688, 29.969188690185547, 50.72515869140625, 502.5500183105469, 237.10888671875, 178.53465270996094, 159.1005096435547, 167.88824462890625, -39.549095153808594, -160.5421142578125, 140.28448486328125, 474.77374267578125, 259.20867919921875, -134.10400390625, 2.37066650390625, 113.39376831054688, -11.472494125366211, -78.07901763916016, 180.82485961914062, 96.25051879882812, 14.293365478515625, 189.68582153320312, 125.84400939941406, 404.4510192871094, -65.12358093261719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000235.npy"} +{"epoch": 0.34508076358296624, "step": 236, "batch_size": 64, "mean": 155.16970825195312, "std": 142.1499786376953, "min": -97.21076202392578, "p10": -15.90657997131347, "median": 127.36352157592773, "p90": 352.3702087402344, "max": 465.1845397949219, "pos_frac": 0.859375, "sample": [371.7690124511719, 85.56258392333984, 17.310836791992188, 157.80905151367188, 189.07847595214844, 128.2619171142578, 427.6526794433594, 21.97430419921875, 72.47076416015625, 126.00645446777344, 198.123291015625, 350.6183776855469, 97.50897216796875, 193.59701538085938, 308.69671630859375, 334.7117919921875, 56.86289978027344, 260.279541015625, -8.205802917480469, 378.430419921875, 138.54934692382812, -38.89201354980469, 82.02687072753906, 318.0270080566406, 224.29693603515625, 18.244075775146484, 130.70217895507812, 330.0325927734375, 242.11465454101562, 45.449913024902344, 465.1845397949219, 99.78644561767578, -48.71440124511719, 246.35704040527344, -2.6620216369628906, 241.14093017578125, 130.84036254882812, 444.90350341796875, 99.18961334228516, -19.206912994384766, 126.46512603759766, 110.0248794555664, 27.8861141204834, 49.20044708251953, 16.03982925415039, 191.0510711669922, 114.15756225585938, 94.02713775634766, -67.666015625, -97.21076202392578, 147.02728271484375, 450.5398254394531, 352.542236328125, 120.11285400390625, 215.24801635742188, 109.8724365234375, 315.9181213378906, 97.53936767578125, 2.0198211669921875, 190.8413543701172, -65.62326049804688, 351.96881103515625, -28.864307403564453, 191.8539581298828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000236.npy"} +{"epoch": 0.3465491923641703, "step": 237, "batch_size": 64, "mean": 119.95465850830078, "std": 176.00682067871094, "min": -343.18902587890625, "p10": -78.68788375854491, "median": 101.71115493774414, "p90": 362.80506591796893, "max": 681.1165161132812, "pos_frac": 0.765625, "sample": [203.70240783691406, 203.10682678222656, 183.17977905273438, 154.48526000976562, -3.8710174560546875, 101.71092987060547, 95.86713409423828, 147.11502075195312, 37.861385345458984, 166.38943481445312, 63.282691955566406, 30.94195556640625, 391.27423095703125, 147.38674926757812, 220.02566528320312, -71.05809020996094, -106.12687683105469, 141.3370819091797, -80.86676788330078, 100.90149688720703, 141.60012817382812, -108.36344909667969, 384.3065185546875, 681.1165161132812, -84.41557312011719, 403.3546142578125, 312.635009765625, 14.244842529296875, 16.801712036132812, 13.632793426513672, 474.31072998046875, 101.71138000488281, 228.36549377441406, 214.9481658935547, 14.164772033691406, 241.32875061035156, 22.525169372558594, 150.1800537109375, 290.2857666015625, -41.51293182373047, 241.99331665039062, 86.02425384521484, 147.1114501953125, 273.2406921386719, 424.70697021484375, 43.886627197265625, 0.482421875, -5.273780822753906, -343.18902587890625, 277.16094970703125, -45.841468811035156, 80.95927429199219, -179.05160522460938, -43.92576599121094, 191.22552490234375, 283.59771728515625, -73.60382080078125, 451.57183837890625, 7.493125915527344, -116.53028869628906, -48.95233154296875, 122.55176544189453, 68.902587890625, 234.69253540039062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000237.npy"} +{"epoch": 0.34801762114537443, "step": 238, "batch_size": 64, "mean": 119.5045394897461, "std": 177.6407470703125, "min": -203.0545654296875, "p10": -34.43104171752929, "median": 76.63698959350586, "p90": 291.06929626464853, "max": 772.7348022460938, "pos_frac": 0.78125, "sample": [-6.5024261474609375, -27.90979766845703, 4.9058837890625, 139.05064392089844, 20.54556655883789, -6.317161560058594, 134.2519989013672, 252.663818359375, -120.1318359375, 423.17498779296875, 72.51382446289062, 159.82229614257812, 66.84854125976562, 61.08743667602539, -6.810464859008789, 185.12393188476562, 301.0137634277344, -69.90475463867188, 56.10950469970703, 47.968143463134766, 12.243947982788086, 742.4027099609375, 32.98222351074219, 141.30091857910156, -37.225860595703125, 519.509765625, 103.57527160644531, 10.655586242675781, 138.81497192382812, -102.65782165527344, 115.31591796875, 110.61355590820312, 149.98724365234375, 115.47942352294922, 357.0601806640625, 17.20700454711914, 55.89126205444336, 121.26399230957031, 772.7348022460938, 142.9581298828125, -15.403003692626953, 10.03717041015625, 62.726383209228516, -107.54546356201172, -10.332351684570312, 265.0463562011719, 252.71270751953125, 178.086669921875, 63.85155487060547, 461.4979553222656, 217.47720336914062, 12.02044677734375, 195.2230224609375, 228.32699584960938, -203.0545654296875, 267.86553955078125, 208.29119873046875, 38.32763671875, 0.11865997314453125, -17.65521240234375, 157.90878295898438, 156.55377197265625, -62.168609619140625, 80.7601547241211], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000238.npy"} +{"epoch": 0.34948604992657856, "step": 239, "batch_size": 64, "mean": 131.09304809570312, "std": 163.37149047851562, "min": -261.03778076171875, "p10": -40.41120529174805, "median": 120.54230117797852, "p90": 365.1321990966797, "max": 479.6341857910156, "pos_frac": 0.796875, "sample": [375.73175048828125, 177.7367401123047, -149.5550079345703, 360.0448303222656, -108.98342895507812, 164.51824951171875, 57.373382568359375, 142.67686462402344, -29.309825897216797, -147.5812530517578, 44.44093704223633, 41.868865966796875, 75.01487731933594, 273.3723449707031, 78.79024505615234, 4.189491271972656, 479.6341857910156, 122.69712829589844, 175.309326171875, 133.080322265625, 206.94322204589844, 193.05540466308594, 245.63304138183594, -61.29513931274414, 21.62969207763672, 38.25679016113281, 183.62474060058594, 376.0406799316406, 238.1562042236328, 106.98637390136719, -6.465490341186523, 7.352203369140625, 329.9210205078125, 434.32574462890625, -187.1276397705078, 57.85846710205078, -38.05187225341797, 334.82525634765625, -9.750045776367188, 369.3348693847656, 30.05301284790039, 351.78057861328125, 327.1694030761719, 145.124755859375, -41.42234802246094, 43.347938537597656, -261.03778076171875, 155.4609375, -37.01559066772461, 146.9720001220703, 146.01156616210938, 276.18218994140625, 301.1116943359375, 66.15383911132812, 445.32427978515625, 211.28671264648438, 367.3125, 79.780517578125, 101.67066955566406, 118.3874740600586, 60.053619384765625, -22.02326202392578, 247.98048400878906, 17.986684799194336], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000239.npy"} +{"epoch": 0.3509544787077827, "step": 240, "batch_size": 64, "mean": 109.98140716552734, "std": 180.91506958007812, "min": -237.15078735351562, "p10": -162.76088104248043, "median": 123.6515884399414, "p90": 349.58030090332034, "max": 534.5574951171875, "pos_frac": 0.765625, "sample": [139.39151000976562, 163.59730529785156, 156.99331665039062, 97.1645278930664, 125.04951477050781, 346.87628173828125, 132.5084228515625, 418.85980224609375, -204.42208862304688, -50.45305633544922, 176.3710174560547, 145.3549041748047, -237.15078735351562, 185.57504272460938, 350.7391662597656, 77.93291473388672, -173.31243896484375, 69.09095764160156, 31.230567932128906, 239.84918212890625, 175.87918090820312, 397.30859375, 147.86160278320312, 146.28936767578125, 64.70494079589844, 8.550411224365234, -215.2071533203125, 117.59395599365234, 76.97279357910156, 489.969970703125, 180.9499053955078, 111.23431396484375, 89.25099182128906, 17.51927947998047, -221.7755126953125, 313.5220642089844, 230.8910369873047, 177.70672607421875, -202.00830078125, 73.16027069091797, -59.21926498413086, 281.2469482421875, -138.1405792236328, 46.272823333740234, 534.5574951171875, -103.69094848632812, 273.80316162109375, -30.060762405395508, 252.79354858398438, 175.39996337890625, 122.253662109375, 449.8582763671875, -44.81111145019531, 100.5678939819336, -108.35200500488281, 149.6134033203125, 88.6707763671875, -41.316741943359375, 135.44570922851562, 178.7573699951172, 46.949188232421875, 135.38717651367188, 454.3317565917969, -233.1282501220703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000240.npy"} +{"epoch": 0.3524229074889868, "step": 241, "batch_size": 64, "mean": 172.10183715820312, "std": 201.54823303222656, "min": -232.1362762451172, "p10": -31.772038650512695, "median": 142.94709014892578, "p90": 425.83910217285165, "max": 854.660400390625, "pos_frac": 0.8125, "sample": [96.16447448730469, 84.20851135253906, -197.29278564453125, -17.031702041625977, 50.088958740234375, 243.43218994140625, 257.7747802734375, 434.54541015625, 74.29855346679688, 178.7938232421875, 121.08198547363281, -52.5291748046875, -5.540088653564453, 497.57183837890625, -2.8612070083618164, 163.27578735351562, -32.22593307495117, 337.47216796875, 173.31138610839844, 152.8022003173828, 351.045166015625, 125.13519287109375, 90.30695343017578, 121.85426330566406, 854.660400390625, 405.5243835449219, 201.63162231445312, 134.59103393554688, 579.2998046875, 41.690086364746094, 794.8746948242188, 437.9044189453125, 319.05377197265625, 66.52765655517578, 83.17413330078125, 151.3031463623047, 211.01846313476562, 94.63716125488281, -30.71295166015625, 170.4824981689453, 96.9808120727539, 49.633872985839844, 108.0772705078125, -9.961502075195312, 228.06475830078125, 57.38551330566406, 283.154541015625, -49.90614700317383, 210.81150817871094, 199.568603515625, 71.75601196289062, 44.463600158691406, 241.40109252929688, 493.0579528808594, 288.83935546875, 165.3017578125, -181.59121704101562, 280.23382568359375, 257.8651428222656, 284.23992919921875, -232.1362762451172, 367.6618347167969, 89.43685913085938, -91.16458129882812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000241.npy"} +{"epoch": 0.35389133627019087, "step": 242, "batch_size": 64, "mean": 123.77261352539062, "std": 171.72618103027344, "min": -266.82562255859375, "p10": -44.64950714111327, "median": 93.36262130737305, "p90": 304.6214569091797, "max": 665.9413452148438, "pos_frac": 0.8125, "sample": [-32.863304138183594, 15.013423919677734, 162.23880004882812, 272.06866455078125, 41.25393295288086, -266.82562255859375, 13.579437255859375, -9.222068786621094, 303.764892578125, 133.69818115234375, 21.701704025268555, 243.00547790527344, 621.5587158203125, 23.355573654174805, -25.77735137939453, 75.3346939086914, 120.43248748779297, 139.28640747070312, 57.191650390625, 12.640012741088867, 185.3268280029297, 66.50462341308594, 240.1885223388672, 212.58181762695312, 258.5177001953125, -49.70073699951172, 105.52581024169922, 39.128623962402344, 114.45830535888672, -98.38131713867188, 160.25570678710938, 209.71115112304688, 87.47570037841797, -134.144287109375, 257.3517150878906, 125.53690338134766, 304.9885559082031, 440.8326416015625, 19.018287658691406, 73.48265075683594, 82.0946044921875, 200.5940399169922, -4.9667816162109375, 665.9413452148438, 41.857154846191406, 75.95146942138672, 467.5885009765625, 99.24954223632812, 17.437576293945312, 2.7351856231689453, -108.23897552490234, 436.30517578125, 172.3732147216797, -18.113563537597656, 69.72700500488281, 114.83908081054688, 152.26014709472656, -138.27532958984375, 244.19923400878906, 220.2896728515625, 7.945978164672852, 468.3483581542969, 162.67822265625, -51.46813201904297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000242.npy"} +{"epoch": 0.355359765051395, "step": 243, "batch_size": 64, "mean": 151.74656677246094, "std": 174.80267333984375, "min": -281.2373962402344, "p10": -51.313291168212885, "median": 129.51422882080078, "p90": 377.4388671875, "max": 700.3162841796875, "pos_frac": 0.859375, "sample": [340.46630859375, 18.883220672607422, 80.7927017211914, 452.65252685546875, 191.52273559570312, 103.7191390991211, 127.50529479980469, 174.51409912109375, -79.11592102050781, -94.19864654541016, -54.192169189453125, 85.58718872070312, 231.4882049560547, 46.305686950683594, -131.8547821044922, 26.957427978515625, -89.64056396484375, 336.411865234375, 138.84654235839844, 153.77322387695312, -281.2373962402344, 36.560394287109375, 170.87115478515625, 25.300437927246094, 29.47263526916504, 297.89129638671875, 58.24688720703125, 188.349365234375, 91.02117156982422, 380.46966552734375, 286.7960510253906, 262.2003173828125, -44.595909118652344, 138.74034118652344, -80.265380859375, 370.36700439453125, 260.3088073730469, 513.7442626953125, 333.80517578125, 236.179931640625, 289.41802978515625, 120.1987533569336, 405.32855224609375, 700.3162841796875, 400.18682861328125, 42.364593505859375, 131.52316284179688, 50.72255325317383, 368.19390869140625, -17.153884887695312, 157.77456665039062, 198.18246459960938, 34.3741455078125, 119.11334228515625, 505.3362121582031, 46.39209747314453, 139.5130615234375, 82.50349426269531, 15.573768615722656, 230.10211181640625, 17.465118408203125, 77.11517333984375, 61.68864440917969, 200.89654541015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000243.npy"} +{"epoch": 0.3568281938325991, "step": 244, "batch_size": 64, "mean": 152.10162353515625, "std": 157.76722717285156, "min": -154.99642944335938, "p10": -14.802527618408202, "median": 134.16181182861328, "p90": 375.84345703125007, "max": 563.912109375, "pos_frac": 0.859375, "sample": [87.00822448730469, 13.211524963378906, 112.49960327148438, 149.59909057617188, 17.597667694091797, 54.2895622253418, 435.6525573730469, 132.69500732421875, -58.135345458984375, 5.3062744140625, 178.9972686767578, 207.11001586914062, 5.8408203125, 302.4193115234375, 41.46075439453125, 136.59193420410156, -11.827991485595703, 449.7615051269531, 41.895877838134766, 94.2616958618164, -154.99642944335938, 125.30389404296875, 271.5987548828125, -63.369354248046875, 195.77346801757812, 33.07921600341797, 154.50350952148438, 238.28118896484375, 121.31796264648438, 28.26409912109375, 246.11219787597656, 116.56281280517578, 320.88330078125, 301.2042541503906, 468.7296142578125, 536.6138305664062, 175.6708221435547, 380.8768615722656, 47.82145309448242, 563.912109375, 235.05459594726562, -13.293937683105469, 357.8541259765625, 168.01437377929688, 153.9585723876953, -69.0533676147461, 135.6286163330078, -20.564485549926758, -15.449066162109375, 234.9597930908203, 36.68370056152344, 9.564643859863281, 151.72669982910156, 80.96479797363281, 4.244873046875, 113.58174133300781, 364.0988464355469, 201.572021484375, -72.52430725097656, 138.51107788085938, 87.95576477050781, 444.7708740234375, 203.0986785888672, 298.7674865722656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000244.npy"} +{"epoch": 0.35829662261380324, "step": 245, "batch_size": 64, "mean": 122.78467559814453, "std": 177.676513671875, "min": -430.5545349121094, "p10": -36.12038002014159, "median": 105.30636978149414, "p90": 326.4335388183595, "max": 618.2938842773438, "pos_frac": 0.8125, "sample": [118.41913604736328, 404.84136962890625, 176.79576110839844, 133.90370178222656, 183.9715576171875, 143.32261657714844, 54.71971893310547, -23.855621337890625, 212.6109619140625, 123.71316528320312, 89.73806762695312, 113.08024597167969, 161.2880096435547, 44.798221588134766, 134.49407958984375, 173.81094360351562, 217.18392944335938, 289.43475341796875, 98.59786987304688, 24.161029815673828, 540.0714111328125, 139.7532196044922, -19.95812225341797, 354.94598388671875, 59.60368347167969, -3.5493927001953125, 216.8884735107422, -41.376705169677734, -192.64901733398438, -11.721092224121094, 293.4581298828125, 153.56817626953125, 618.2938842773438, -158.8011016845703, 183.95718383789062, 12.332160949707031, 18.69243621826172, 90.97702026367188, 285.3485107421875, 69.08651733398438, -16.58365249633789, 214.664794921875, 156.9178009033203, -62.2138671875, 66.07549285888672, -430.5545349121094, 276.9249267578125, 285.88543701171875, 25.19928741455078, 213.70457458496094, 340.56585693359375, 20.4779052734375, 584.5487670898438, -89.57450866699219, 1.7106361389160156, 497.54058837890625, 77.18605041503906, 112.0148696899414, 9.151382446289062, -117.342529296875, 65.42269897460938, 61.43080139160156, 67.33722686767578, 13.777862548828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000245.npy"} +{"epoch": 0.35976505139500736, "step": 246, "batch_size": 64, "mean": 125.66952514648438, "std": 183.41656494140625, "min": -237.39915466308594, "p10": -91.32632675170898, "median": 82.90713119506836, "p90": 359.1256530761719, "max": 744.229248046875, "pos_frac": 0.796875, "sample": [188.40670776367188, 354.54595947265625, 129.90371704101562, 84.4279556274414, 408.36138916015625, 209.93983459472656, 744.229248046875, -156.6378173828125, 40.57430648803711, -237.39915466308594, 46.57878875732422, 84.84972381591797, 73.40655517578125, 0.8738269805908203, 169.19651794433594, 361.08837890625, 300.1036376953125, 60.938148498535156, 40.76829528808594, 51.933128356933594, 45.960845947265625, 383.0762023925781, 147.5111846923828, -92.0387191772461, 113.05746459960938, -102.42654418945312, 232.24635314941406, -47.800933837890625, 338.3894958496094, -101.6919174194336, 4.7303009033203125, -25.340599060058594, 114.28260803222656, 107.81596374511719, 315.3775634765625, 32.271690368652344, 18.92915916442871, -14.24410629272461, 3.973602294921875, 225.10049438476562, 383.5530090332031, 113.56766510009766, 38.736427307128906, 225.6210479736328, 81.38630676269531, 154.83389282226562, 77.6246337890625, -89.66407775878906, -0.7698402404785156, 209.4220733642578, -94.66789245605469, 90.38104248046875, 315.4533996582031, 175.7812042236328, 25.16796875, 64.53861999511719, 11.869260787963867, 582.1163330078125, 325.5617370605469, 575.3150634765625, -32.09619903564453, 243.540283203125, -96.28318786621094, 6.591953277587891], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000246.npy"} +{"epoch": 0.36123348017621143, "step": 247, "batch_size": 64, "mean": 177.672119140625, "std": 198.25411987304688, "min": -311.0759582519531, "p10": -25.11617660522458, "median": 152.26233673095703, "p90": 450.8974426269533, "max": 660.6763916015625, "pos_frac": 0.890625, "sample": [-241.1409454345703, 91.35310363769531, -80.08229064941406, 398.2657470703125, 90.22966003417969, 498.39935302734375, 154.6768798828125, 393.93701171875, -59.27777862548828, 342.2962951660156, 376.8852844238281, 501.0763244628906, 211.12380981445312, 285.4226379394531, 54.056793212890625, 342.22161865234375, 472.58978271484375, 44.05823516845703, 348.22760009765625, 310.1246337890625, 165.99942016601562, 35.07000732421875, 26.602127075195312, 113.55577850341797, 14.221939086914062, 268.69842529296875, 28.84918975830078, 77.36964416503906, 91.4234848022461, 111.13424682617188, 233.3101806640625, 346.98370361328125, 64.86640167236328, 159.77728271484375, 41.02305603027344, 397.0753173828125, 514.574462890625, 332.3514404296875, 105.51649475097656, -144.69747924804688, 15.00161361694336, 202.72462463378906, 3.28717041015625, 182.69659423828125, 51.203529357910156, 18.707923889160156, 149.84779357910156, 400.281982421875, 646.435791015625, 90.4975357055664, 43.68122100830078, -115.70260620117188, 93.59281921386719, 660.6763916015625, 217.350830078125, -37.289039611816406, 169.11557006835938, 291.61798095703125, 210.9570770263672, -311.0759582519531, 474.32440185546875, 15.736495971679688, 140.44044494628906, 238.75689697265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000247.npy"} +{"epoch": 0.36270190895741555, "step": 248, "batch_size": 64, "mean": 153.25155639648438, "std": 143.7351837158203, "min": -198.61439514160156, "p10": -2.752876281738279, "median": 139.29306030273438, "p90": 348.92448425292974, "max": 459.1712646484375, "pos_frac": 0.859375, "sample": [320.06195068359375, 388.7419738769531, 99.3051986694336, 181.727294921875, 196.61575317382812, 0.852569580078125, 154.3885955810547, 354.2827453613281, 121.92054748535156, -87.87261962890625, 24.210403442382812, 264.86602783203125, 185.78179931640625, 36.75114440917969, 232.76849365234375, 49.631072998046875, 21.942201614379883, 48.47938537597656, 150.29676818847656, -3.641773223876953, 140.00778198242188, 246.50750732421875, 239.598388671875, 418.8082580566406, 9.86590576171875, -43.938873291015625, 138.57833862304688, 336.421875, 122.41334533691406, 227.66165161132812, 182.71119689941406, -198.61439514160156, 129.53060913085938, 304.61566162109375, 104.03810119628906, 175.0661163330078, 75.67230224609375, 17.458724975585938, -17.219619750976562, 48.17979049682617, 212.4202117919922, 258.9971008300781, 190.94476318359375, 433.319091796875, 104.92271423339844, 100.53083801269531, 433.03387451171875, 313.0353698730469, 45.5955810546875, 459.1712646484375, 9.182319641113281, 167.66806030273438, 327.7327575683594, 256.05499267578125, 264.3916015625, 366.4124755859375, -11.371444702148438, 35.56257629394531, 122.99409484863281, 289.1420593261719, 117.49324798583984, -0.5967826843261719, -0.6787834167480469, -86.33119201660156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000248.npy"} +{"epoch": 0.3641703377386197, "step": 249, "batch_size": 64, "mean": 84.31858825683594, "std": 187.11886596679688, "min": -346.63909912109375, "p10": -145.40407257080076, "median": 72.0584602355957, "p90": 320.5450073242189, "max": 663.2310180664062, "pos_frac": 0.671875, "sample": [-21.515398025512695, 285.6722412109375, -109.73591613769531, -66.43421936035156, 109.3516616821289, -122.86624908447266, 20.907089233398438, 153.8886260986328, 61.55029296875, 382.0148010253906, 32.22509765625, 32.2266845703125, 70.27892303466797, 39.54161834716797, 240.9038543701172, -346.63909912109375, -98.60740661621094, 380.388916015625, 170.16172790527344, -75.50775146484375, 138.70428466796875, 213.3951873779297, -158.239990234375, 100.85298919677734, -31.865432739257812, 356.1627197265625, -151.93260192871094, 269.78564453125, 130.01693725585938, 78.81050109863281, -25.836132049560547, -22.052608489990234, 213.20379638671875, 61.35389709472656, 111.84613800048828, -23.598541259765625, 183.66561889648438, -324.12158203125, 105.73611450195312, 262.80035400390625, 215.40634155273438, -22.122589111328125, 663.2310180664062, -93.98200988769531, -229.62515258789062, -159.34510803222656, 48.920387268066406, 25.11386489868164, 101.30844116210938, 205.34104919433594, 114.97357940673828, 266.1944580078125, -194.93930053710938, 244.90682983398438, 335.490478515625, 267.28326416015625, 73.83799743652344, -130.17083740234375, -52.680015563964844, 217.79153442382812, 62.62610626220703, 405.70306396484375, 393.4813232421875, 11.152305603027344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000249.npy"} +{"epoch": 0.3656387665198238, "step": 250, "batch_size": 64, "mean": 156.46585083007812, "std": 164.75741577148438, "min": -176.26544189453125, "p10": -35.59816875457763, "median": 152.68402099609375, "p90": 378.2839111328126, "max": 541.095703125, "pos_frac": 0.796875, "sample": [256.8785095214844, 26.074506759643555, 45.49784851074219, -4.215915679931641, -39.49676513671875, 206.1325225830078, 79.67276763916016, 492.7003173828125, 160.44277954101562, -139.04893493652344, 58.90648651123047, 517.444091796875, 243.16180419921875, 143.58631896972656, 227.4666748046875, 162.3251953125, 27.106651306152344, 139.67041015625, 320.3244323730469, 359.195068359375, 183.76190185546875, -93.7012710571289, -54.064674377441406, 98.02335357666016, 89.5062026977539, 185.86383056640625, 63.569297790527344, -18.438772201538086, 74.13026428222656, -21.8763427734375, -26.50144386291504, 39.17108154296875, 137.1707000732422, 278.8458251953125, -45.77666473388672, 541.095703125, 127.01119995117188, 198.85443115234375, -131.887939453125, -6.750572204589844, 203.96559143066406, 393.45806884765625, 302.5469970703125, 169.17234802246094, 167.94390869140625, 57.13941192626953, 269.15948486328125, 144.92526245117188, 276.58074951171875, 135.1442108154297, 57.20864486694336, -4.8638458251953125, 196.80764770507812, 261.6518249511719, -176.26544189453125, 227.869384765625, 386.46484375, 201.63372802734375, 177.31527709960938, 449.2886047363281, 337.2808837890625, 38.4388427734375, 522.1738891601562, 316.94378662109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000250.npy"} +{"epoch": 0.3671071953010279, "step": 251, "batch_size": 64, "mean": 106.71569061279297, "std": 190.78912353515625, "min": -395.91748046875, "p10": -57.57828140258789, "median": 86.34718322753906, "p90": 348.1887817382813, "max": 702.21240234375, "pos_frac": 0.734375, "sample": [431.6037292480469, 12.004627227783203, 357.01531982421875, -29.68682098388672, 217.67910766601562, -290.504638671875, -395.91748046875, 702.21240234375, 0.9529190063476562, 187.88302612304688, 171.10903930664062, -145.36062622070312, 323.7607116699219, 42.396705627441406, 86.47439575195312, -42.68544006347656, -291.55450439453125, 452.8337097167969, -28.828161239624023, 170.99905395507812, 20.332836151123047, 283.5262145996094, -14.440483093261719, -55.184288024902344, 46.886627197265625, 194.2434539794922, -131.60768127441406, -58.604278564453125, 69.51947021484375, 267.96160888671875, 73.55059814453125, -142.18124389648438, 332.0162353515625, -34.090965270996094, 63.51782989501953, 210.67471313476562, 54.367897033691406, 125.91265106201172, 149.6048126220703, 135.9031524658203, 2.4980316162109375, 209.54693603515625, -29.242990493774414, 279.94708251953125, -3.51287841796875, 360.9638366699219, 214.5795440673828, 185.5936737060547, 88.64069366455078, 137.3358154296875, -17.172216415405273, 14.641586303710938, 582.7741088867188, 86.219970703125, 355.119873046875, 191.81155395507812, -24.68915367126465, 233.5306396484375, 90.98825073242188, 4.424510955810547, 34.35277557373047, 127.98275756835938, 36.386512756347656, 142.7867889404297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000251.npy"} +{"epoch": 0.368575624082232, "step": 252, "batch_size": 64, "mean": 144.67434692382812, "std": 151.98562622070312, "min": -492.47039794921875, "p10": -15.025075721740716, "median": 155.98380279541016, "p90": 329.14268188476564, "max": 485.35552978515625, "pos_frac": 0.859375, "sample": [261.88226318359375, 313.99102783203125, 7.005054473876953, 198.629638671875, 255.41990661621094, 75.3917236328125, 156.36257934570312, 339.014892578125, 87.61029052734375, 135.44711303710938, 229.4307098388672, 1.265960693359375, 177.216796875, 58.524322509765625, 112.30381774902344, 159.9115447998047, 43.71133804321289, 134.39039611816406, 187.42807006835938, -72.1195297241211, 273.44500732421875, 155.6050262451172, 13.994029998779297, 417.2018127441406, 278.1014099121094, -129.92041015625, -20.16084861755371, 111.08029174804688, -17.469039916992188, 148.7529296875, -37.37541580200195, 192.4393310546875, 39.14558410644531, 160.37777709960938, 62.0010986328125, 131.52349853515625, 129.18310546875, 416.926513671875, 368.10784912109375, 331.95703125, 47.650047302246094, 180.4384002685547, 177.0301971435547, 31.303207397460938, 215.96066284179688, 322.57586669921875, 283.41217041015625, 268.0062561035156, 206.02761840820312, 173.3421173095703, -2.1912078857421875, -492.47039794921875, 346.8018798828125, 280.15411376953125, 129.074462890625, 91.72560119628906, -9.322492599487305, 177.9344482421875, 485.35552978515625, -63.306880950927734, 84.1849365234375, 57.506690979003906, 175.15447998046875, 205.07623291015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000252.npy"} +{"epoch": 0.3700440528634361, "step": 253, "batch_size": 64, "mean": 142.34326171875, "std": 181.11199951171875, "min": -409.5155944824219, "p10": -71.95600814819335, "median": 130.23860931396484, "p90": 394.96443481445317, "max": 591.736572265625, "pos_frac": 0.796875, "sample": [-64.58843231201172, 20.08556365966797, 287.9301452636719, 11.387985229492188, -6.992340087890625, 130.15057373046875, 227.474609375, 54.632652282714844, 183.91944885253906, 66.38645935058594, 105.87490844726562, 591.736572265625, 220.87884521484375, 59.289756774902344, 241.11575317382812, 116.05209350585938, 384.7509460449219, 91.79954528808594, -75.11354064941406, 103.07131958007812, -12.686979293823242, -409.5155944824219, 229.56341552734375, 399.3416442871094, 416.7218933105469, 10.192289352416992, 452.147216796875, 24.58118438720703, -20.466392517089844, 36.07234573364258, 17.648277282714844, -81.73466491699219, 138.802490234375, 281.11285400390625, 311.6953430175781, 206.42446899414062, 302.47723388671875, -134.44888305664062, 303.97076416015625, 87.83368682861328, 246.5553741455078, 340.59613037109375, 171.72265625, 297.20050048828125, 432.4451904296875, 121.37680053710938, 130.32664489746094, 402.3338928222656, 136.60430908203125, -239.96710205078125, 378.9443359375, 180.36416625976562, -88.37066650390625, 63.51189422607422, -77.336669921875, 90.67083740234375, 164.95664978027344, 228.6575164794922, 210.2644805908203, -34.65771484375, -53.136173248291016, 60.647796630859375, 455.2276916503906, 181.4539794921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000253.npy"} +{"epoch": 0.37151248164464024, "step": 254, "batch_size": 64, "mean": 120.19171142578125, "std": 206.7840118408203, "min": -275.5563659667969, "p10": -125.05795898437499, "median": 92.30457305908203, "p90": 411.04849243164074, "max": 861.0596923828125, "pos_frac": 0.75, "sample": [128.24163818359375, -24.099838256835938, 140.43017578125, 99.94353485107422, 99.55702209472656, 103.63276672363281, 17.883071899414062, -84.32078552246094, 144.04058837890625, 25.901451110839844, 150.91757202148438, 207.1630096435547, 239.38955688476562, 43.920623779296875, -128.1127166748047, -275.5563659667969, 200.75482177734375, -28.351116180419922, 80.267333984375, 201.27597045898438, 8.972415924072266, 215.12234497070312, 523.8284912109375, 316.4114990234375, 368.2648010253906, -175.2191162109375, 426.00665283203125, 289.53314208984375, -12.210685729980469, 3.694314956665039, 79.84993743896484, 197.8444061279297, 42.18523406982422, -207.17242431640625, 23.612689971923828, 193.24209594726562, -55.399330139160156, 441.2327880859375, -131.9722900390625, 54.57429885864258, -29.48387908935547, -208.40020751953125, 62.03216552734375, 465.15045166015625, 9.459985733032227, 219.08120727539062, 5.257047653198242, 376.1461181640625, 52.539390563964844, 173.75396728515625, 861.0596923828125, 159.03871154785156, 85.0521240234375, -12.831062316894531, -176.43988037109375, 571.4072265625, 151.89132690429688, 232.4122314453125, 147.58055114746094, 56.88911437988281, -117.93019104003906, 551.3594970703125, 144.20654296875, -32.241851806640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000254.npy"} +{"epoch": 0.37298091042584436, "step": 255, "batch_size": 64, "mean": 128.564697265625, "std": 184.40765380859375, "min": -419.718994140625, "p10": -64.73961029052734, "median": 91.57090377807617, "p90": 342.8689270019532, "max": 610.95361328125, "pos_frac": 0.765625, "sample": [610.95361328125, -89.42938232421875, 24.957061767578125, 347.64385986328125, 186.3548583984375, -58.132965087890625, -53.615623474121094, -37.37690734863281, -419.718994140625, 48.32883834838867, 295.181396484375, 86.72727966308594, -8.565071105957031, 23.79030990600586, 239.05125427246094, 257.2611999511719, -164.44781494140625, -226.544921875, 153.46823120117188, 288.0411682128906, 277.5509338378906, 31.749162673950195, 80.74132537841797, 14.437366485595703, 78.60795593261719, -60.216270446777344, -128.36898803710938, 382.4629821777344, -21.824748992919922, 193.21873474121094, 212.7825164794922, 231.44866943359375, 81.48724365234375, 317.1583557128906, 366.9393310546875, 316.8148193359375, 262.4499206542969, 371.97674560546875, 331.7274169921875, 540.3192138671875, 51.53382110595703, -13.561115264892578, 273.0704650878906, 134.33822631835938, 27.389598846435547, 210.64210510253906, 66.5526123046875, 242.02264404296875, -114.93629455566406, 325.91864013671875, 426.99554443359375, 57.01520538330078, 135.29864501953125, 52.80933380126953, -66.67818450927734, 138.26901245117188, 13.240089416503906, 96.4145278930664, 37.106651306152344, 150.7699432373047, -47.129310607910156, 66.20059204101562, 302.5494384765625, 276.9180908203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000255.npy"} +{"epoch": 0.3744493392070485, "step": 256, "batch_size": 64, "mean": 138.02523803710938, "std": 178.59033203125, "min": -295.26885986328125, "p10": -60.070536804199214, "median": 117.975830078125, "p90": 348.223226928711, "max": 614.30908203125, "pos_frac": 0.765625, "sample": [93.57368469238281, 554.0758666992188, 113.08545684814453, 259.88330078125, 207.6414794921875, 255.45278930664062, 34.815773010253906, 185.78732299804688, -25.55472183227539, 25.289291381835938, 308.952392578125, 237.0809326171875, 180.52542114257812, 349.25439453125, 263.5935363769531, -139.62428283691406, 52.68197250366211, 173.95191955566406, -295.26885986328125, 36.964290618896484, 274.71636962890625, 112.45169067382812, -145.5374298095703, 286.67279052734375, 60.84370803833008, 275.13140869140625, -68.50562286376953, -62.524658203125, -80.93902587890625, 172.7145538330078, 371.67706298828125, -8.350341796875, 221.26025390625, 91.39752197265625, 63.37388610839844, 122.86620330810547, 387.58551025390625, -54.34425354003906, 49.06846618652344, 50.93675231933594, -196.7401885986328, 31.628265380859375, 29.168487548828125, 3.272003173828125, 179.40023803710938, -36.209598541259766, 207.8677978515625, 122.94567108154297, -30.5902099609375, 268.2147216796875, -38.88340759277344, 112.54684448242188, -7.572059631347656, 201.5146484375, 345.3843994140625, 406.3730773925781, 156.8079071044922, 58.87590408325195, 614.30908203125, 532.1319580078125, 225.83084106445312, 345.8171691894531, -21.851232528686523, 330.7159729003906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000256.npy"} +{"epoch": 0.37591776798825255, "step": 257, "batch_size": 64, "mean": 146.7384033203125, "std": 190.81143188476562, "min": -290.0919494628906, "p10": -45.609196090698234, "median": 110.5522575378418, "p90": 444.30572204589845, "max": 672.8951416015625, "pos_frac": 0.78125, "sample": [465.19696044921875, -39.31649398803711, 11.528121948242188, 122.13528442382812, 367.9627685546875, -7.3175811767578125, 232.67092895507812, 444.16094970703125, 243.7921142578125, 157.28732299804688, -15.473012924194336, -48.306068420410156, 100.10610961914062, 306.5118408203125, 74.13329315185547, 23.9111328125, 223.5296630859375, 232.1204376220703, 19.89760971069336, 444.3677673339844, 36.85270690917969, 421.0351867675781, 280.46533203125, -128.6175994873047, 87.69620513916016, 113.0079345703125, -84.73149108886719, 13.629180908203125, -30.224178314208984, 508.7122497558594, 168.1192169189453, 224.9080352783203, 262.7593994140625, 672.8951416015625, 79.04302215576172, -53.80792236328125, 108.0965805053711, 6.724252700805664, 126.0753402709961, -290.0919494628906, 97.44229888916016, 451.0923156738281, 38.214054107666016, 240.45654296875, 202.5486297607422, 220.81341552734375, 70.33122253417969, 3.3286514282226562, 541.65478515625, 251.9122314453125, 243.16641235351562, 321.121337890625, -9.212181091308594, 115.23595428466797, 48.565948486328125, 536.9139404296875, -93.21467590332031, -13.423471450805664, 164.95187377929688, -177.92845153808594, 36.13002014160156, 29.90484619140625, -25.23748779296875, 215.04437255859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000257.npy"} +{"epoch": 0.37738619676945667, "step": 258, "batch_size": 64, "mean": 142.62551879882812, "std": 200.61253356933594, "min": -491.8069763183594, "p10": -62.15648574829101, "median": 131.598388671875, "p90": 374.7330261230469, "max": 697.4442138671875, "pos_frac": 0.796875, "sample": [296.6089172363281, 36.76647186279297, -107.60423278808594, 276.14532470703125, 54.85246276855469, 127.89203643798828, 262.71038818359375, 243.36294555664062, 231.525634765625, -7.596435546875, -36.69689178466797, 2.2198715209960938, -60.02112579345703, 50.27484130859375, 95.49774932861328, -39.84971618652344, -14.929649353027344, 113.8118896484375, 163.99803161621094, -0.6903152465820312, 62.85102844238281, 261.5208435058594, 540.89501953125, 219.3005828857422, 485.6518249511719, 271.38824462890625, 166.67950439453125, 21.77886199951172, 154.03411865234375, 363.7843322753906, 184.83453369140625, 282.06793212890625, 313.5269775390625, 171.1054229736328, -230.24862670898438, 102.25375366210938, 277.3509826660156, -362.4944152832031, 25.257118225097656, 130.59182739257812, 405.98809814453125, 12.072803497314453, -88.58729553222656, 391.74664306640625, 31.082889556884766, -491.8069763183594, 697.4442138671875, 132.60494995117188, -139.86599731445312, 101.6415023803711, 236.35809326171875, -63.07164001464844, 309.22613525390625, 363.64581298828125, 379.4253234863281, 42.563194274902344, 291.86767578125, 437.2114562988281, 146.87635803222656, 269.1217041015625, 75.49031066894531, 111.4172592163086, 269.3918151855469, 75.77970123291016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000258.npy"} +{"epoch": 0.3788546255506608, "step": 259, "batch_size": 64, "mean": 120.01094055175781, "std": 182.5396728515625, "min": -230.28294372558594, "p10": -100.46448287963867, "median": 94.53255462646484, "p90": 358.37385864257817, "max": 550.184326171875, "pos_frac": 0.75, "sample": [209.77745056152344, -17.04542350769043, 137.17916870117188, 190.98216247558594, 17.724788665771484, -97.38491821289062, 320.0017395019531, 432.886474609375, 433.8523254394531, 204.56008911132812, 49.501869201660156, -100.72106170654297, 108.00617980957031, -80.98307037353516, 8.492156982421875, -112.19709777832031, 348.9779357910156, -52.282867431640625, 467.5465087890625, 182.139404296875, 138.43707275390625, 142.4501953125, 81.05892944335938, 71.16629028320312, 309.52850341796875, 23.299087524414062, 57.9635009765625, 40.73307800292969, 550.184326171875, -190.26812744140625, 69.7425537109375, 29.891244888305664, 501.413330078125, 177.2235107421875, 40.345664978027344, -161.8345184326172, 158.36505126953125, 325.0473937988281, -151.01718139648438, 326.04925537109375, -164.24002075195312, 16.71410369873047, 350.9739990234375, -99.86579895019531, 112.98442077636719, 49.3446044921875, 361.54522705078125, 285.9265441894531, -230.28294372558594, 142.0412139892578, -36.450286865234375, 294.1412353515625, 277.93634033203125, -1.08416748046875, 374.17218017578125, -77.3438491821289, 118.81716918945312, 311.946044921875, 144.11923217773438, -45.17675018310547, 60.514827728271484, 34.55172348022461, 151.3675537109375, 57.254493713378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000259.npy"} +{"epoch": 0.3803230543318649, "step": 260, "batch_size": 64, "mean": 162.99392700195312, "std": 183.4407196044922, "min": -182.16232299804688, "p10": -24.095032501220697, "median": 126.67322158813477, "p90": 401.55622558593757, "max": 990.910400390625, "pos_frac": 0.859375, "sample": [233.16192626953125, -41.620140075683594, 39.00043487548828, -104.31278991699219, 4.196615219116211, 159.58416748046875, 124.90055084228516, 182.35533142089844, 329.0870361328125, 56.937408447265625, 65.0716552734375, 458.53460693359375, 118.15135192871094, 105.09367370605469, 146.961669921875, 990.910400390625, 156.9473419189453, 238.83047485351562, 65.32989501953125, 446.0662841796875, -182.16232299804688, 155.2213134765625, -32.882476806640625, 174.8439483642578, -96.81925201416016, -26.84986114501953, 152.79237365722656, -17.667098999023438, 247.42013549804688, 128.44589233398438, 110.87196350097656, 366.89373779296875, -10.117683410644531, 215.71556091308594, 154.8955535888672, 382.47259521484375, 374.9098205566406, 143.68292236328125, 117.65632629394531, 87.98822021484375, 88.00086975097656, 68.45526123046875, 337.405029296875, 112.73693084716797, 146.79904174804688, 40.1453971862793, 44.31480407714844, 83.29345703125, 165.17779541015625, 207.09263610839844, 207.332763671875, 191.2808837890625, 120.77494049072266, 419.73974609375, 24.89978790283203, -27.48798370361328, 60.6295166015625, 409.73492431640625, 22.066329956054688, 598.269287109375, 111.93419647216797, 416.4234924316406, 55.63600158691406, 304.45684814453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000260.npy"} +{"epoch": 0.38179148311306904, "step": 261, "batch_size": 64, "mean": 150.76947021484375, "std": 202.81057739257812, "min": -327.8468017578125, "p10": -51.86358985900879, "median": 123.24789428710938, "p90": 404.6301544189453, "max": 886.6013793945312, "pos_frac": 0.78125, "sample": [407.0530700683594, 360.1178894042969, 47.757362365722656, -225.8705291748047, 128.1289520263672, 886.6013793945312, 43.25523376464844, 348.62237548828125, -29.577625274658203, 212.31619262695312, 47.788726806640625, 70.1692886352539, -95.01483154296875, 409.83575439453125, 118.36683654785156, 211.19528198242188, 221.13687133789062, 367.5283508300781, 412.55133056640625, -24.127845764160156, 117.57432556152344, 83.66677856445312, 151.44183349609375, 169.36196899414062, 175.95272827148438, 320.99456787109375, 491.72869873046875, 28.621490478515625, 306.1730041503906, 200.38267517089844, 60.707611083984375, 40.39215850830078, -4.069206237792969, 89.11825561523438, 242.76321411132812, 58.043983459472656, 280.6630859375, 376.3714599609375, 245.6988067626953, 105.25348663330078, 231.61019897460938, -51.671871185302734, 454.0933532714844, 398.9766845703125, -12.694635391235352, 343.50286865234375, -199.45103454589844, 193.38546752929688, -10.320030212402344, 251.77984619140625, 111.02620697021484, 495.43310546875, 7.229957580566406, -327.8468017578125, -187.68634033203125, 114.2120361328125, -172.97509765625, -18.186254501342773, 137.04803466796875, 203.96885681152344, 59.356056213378906, 70.74376678466797, 150.98220825195312, -51.94575500488281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000261.npy"} +{"epoch": 0.3832599118942731, "step": 262, "batch_size": 64, "mean": 123.46087646484375, "std": 169.67532348632812, "min": -250.1248321533203, "p10": -77.14892272949217, "median": 124.75458908081055, "p90": 301.89546203613287, "max": 754.2808837890625, "pos_frac": 0.796875, "sample": [41.29656982421875, 404.8092956542969, 109.38134002685547, 216.30995178222656, -250.1248321533203, 309.5757751464844, 472.3333740234375, 103.18638610839844, 156.23785400390625, 247.75222778320312, -147.7748260498047, -244.66976928710938, 157.99200439453125, 754.2808837890625, 132.91531372070312, -169.47967529296875, 171.29730224609375, 283.9747314453125, -43.41266632080078, 182.58584594726562, 157.8315887451172, 58.789337158203125, -87.23338317871094, 259.9944152832031, 139.49229431152344, -17.015396118164062, 47.928096771240234, 426.9357604980469, 104.20474243164062, 310.7300109863281, 47.629722595214844, 85.67086029052734, 84.30682373046875, -57.95487594604492, 63.73695373535156, 73.2999038696289, 137.2725830078125, -30.574081420898438, 177.59494018554688, 144.48464965820312, 138.8951416015625, 151.33860778808594, 10.015281677246094, 227.1995849609375, 238.83033752441406, 88.6618881225586, 240.21817016601562, 116.59386444091797, -81.6473617553711, -66.6525650024414, 73.78303527832031, 71.83798217773438, 49.91471862792969, 340.8292541503906, -158.40570068359375, -33.72661590576172, 95.7470703125, 161.78411865234375, 263.870849609375, 258.8070983886719, 182.6749267578125, 8.352767944335938, 275.54791259765625, 231.43429565429688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000262.npy"} +{"epoch": 0.38472834067547723, "step": 263, "batch_size": 64, "mean": 176.84181213378906, "std": 206.23117065429688, "min": -349.71905517578125, "p10": -55.26733779907226, "median": 160.72258758544922, "p90": 457.2254791259767, "max": 684.4710693359375, "pos_frac": 0.796875, "sample": [316.3098449707031, -349.71905517578125, 164.41107177734375, 121.79576110839844, 361.511962890625, 56.25176239013672, 334.2662353515625, 72.2086181640625, 25.077774047851562, 100.94927215576172, -238.63079833984375, 167.4268035888672, 82.83301544189453, 301.0321044921875, 302.180908203125, 173.9053192138672, 19.11893081665039, 362.6259765625, 96.47612762451172, 165.47434997558594, -3.777261734008789, -67.8909912109375, 50.86634063720703, 315.8717346191406, -27.153247833251953, -72.02581787109375, 278.1826171875, 157.0341033935547, -18.819175720214844, 98.64213562011719, 41.99028015136719, 26.07581901550293, 403.98199462890625, 39.45732879638672, -31.493927001953125, 509.45794677734375, 283.4762268066406, 100.1907958984375, 387.57171630859375, 468.22412109375, 101.11588287353516, 481.45928955078125, 69.86123657226562, 279.722412109375, 337.5019836425781, 165.1357879638672, 476.24505615234375, -57.44557189941406, 238.94134521484375, 276.7911682128906, 377.0596008300781, 431.5619812011719, 663.874267578125, -172.75303649902344, -14.834548950195312, 555.1986083984375, -50.184791564941406, -75.86817169189453, 223.29612731933594, 239.76812744140625, 153.6053009033203, 684.4710693359375, 139.9862518310547, 217.99801635742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000263.npy"} +{"epoch": 0.38619676945668135, "step": 264, "batch_size": 64, "mean": 151.50254821777344, "std": 204.70956420898438, "min": -239.03762817382812, "p10": -77.88779983520507, "median": 124.66874694824219, "p90": 455.6923309326174, "max": 650.49462890625, "pos_frac": 0.796875, "sample": [215.01123046875, 321.53704833984375, 47.395023345947266, 650.49462890625, 101.28860473632812, -230.7722625732422, -151.6864013671875, 401.2326965332031, 83.6036376953125, 480.7540283203125, -52.76593780517578, 45.87389373779297, 104.43505859375, -144.3209228515625, 158.46212768554688, 186.26339721679688, 268.585693359375, 410.1776123046875, 405.0624694824219, 362.36187744140625, 85.42730712890625, 237.31668090820312, 260.593505859375, 178.1504669189453, 62.43890380859375, 39.47206115722656, -78.4226303100586, 11.152633666992188, -5.369071960449219, 197.62245178222656, -76.63986206054688, 18.38666534423828, 57.449913024902344, 576.2228393554688, 3.3828125, 475.1986389160156, 135.0526123046875, 195.51187133789062, 14.762100219726562, 251.15638732910156, 53.8287353515625, 211.0301055908203, 120.21194458007812, 129.12554931640625, -2.3448104858398438, 623.80419921875, 370.2771911621094, -239.03762817382812, 232.27676391601562, -137.37863159179688, 191.81809997558594, 1.6734962463378906, 229.59170532226562, 485.68646240234375, -36.522857666015625, 138.3149871826172, 5.223884582519531, 65.90802001953125, -202.2793731689453, 493.7960510253906, 324.1158142089844, 279.65521240234375, -6.823581695556641, 62.35221862792969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000264.npy"} +{"epoch": 0.3876651982378855, "step": 265, "batch_size": 64, "mean": 144.70460510253906, "std": 237.8543701171875, "min": -485.34771728515625, "p10": -83.21995239257811, "median": 135.7381134033203, "p90": 418.0658203125, "max": 811.109130859375, "pos_frac": 0.75, "sample": [138.04232788085938, 408.45428466796875, 351.78106689453125, 226.2429656982422, 84.40462493896484, 151.16201782226562, 344.9509582519531, 19.39008331298828, 626.327392578125, 81.46307373046875, 258.8924865722656, -305.9717102050781, 6.624622344970703, -34.34476089477539, -136.97640991210938, 50.8065071105957, 302.3857727050781, -127.08059692382812, 105.06334686279297, -43.98675537109375, 254.94049072265625, 811.109130859375, -485.34771728515625, 524.8056640625, -71.57296752929688, 197.85195922851562, 184.5784912109375, 460.3172912597656, -71.16677856445312, 761.2927856445312, -260.01416015625, 327.45703125, 62.53593444824219, -54.562557220458984, 70.88662719726562, -88.21151733398438, 7.7374114990234375, -44.732879638671875, 369.9913024902344, -65.42381286621094, 411.2496643066406, 105.91937255859375, 69.84708404541016, 404.9475402832031, -42.427818298339844, 182.45933532714844, 21.758834838867188, 166.4055938720703, -29.490921020507812, 277.1707763671875, 85.29865264892578, 133.9205322265625, 157.18557739257812, 420.9870300292969, 170.12564086914062, 137.55569458007812, 220.20912170410156, 156.51808166503906, 40.031856536865234, -318.07086181640625, 440.7052307128906, 96.94564819335938, 324.77825927734375, 226.96275329589844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000265.npy"} +{"epoch": 0.3891336270190896, "step": 266, "batch_size": 64, "mean": 118.16098022460938, "std": 192.6403350830078, "min": -273.2129211425781, "p10": -95.1062675476074, "median": 77.68882369995117, "p90": 399.7540557861329, "max": 661.5164794921875, "pos_frac": 0.71875, "sample": [-16.503982543945312, 530.0053100585938, 46.56946563720703, 119.66300201416016, 257.8480224609375, 126.90144348144531, -51.39253234863281, 277.86669921875, 75.48895263671875, 419.71661376953125, 126.10737609863281, 316.81878662109375, -45.41117858886719, 275.71575927734375, -101.88902282714844, 58.185997009277344, 292.7753601074219, 91.39407348632812, 202.8673858642578, 65.52422332763672, 135.02613830566406, -267.2952575683594, -58.681007385253906, 371.38214111328125, 171.2335205078125, 384.9444885253906, 157.15272521972656, 9.732154846191406, 47.884193420410156, 406.10101318359375, 465.793212890625, 277.73199462890625, 661.5164794921875, 447.439453125, 216.73654174804688, 71.60736083984375, 62.34751510620117, 234.60821533203125, 16.847145080566406, 90.56966400146484, 65.6600341796875, 150.99697875976562, 71.9382095336914, 27.915807723999023, 163.58505249023438, -2.027996063232422, -273.2129211425781, -245.1839599609375, 257.3680419921875, 16.67742347717285, -136.65719604492188, -79.27983856201172, -25.093063354492188, -104.87750244140625, -56.38232421875, -38.3885498046875, -30.081689834594727, -14.359550476074219, -144.75802612304688, 437.6679992675781, 216.65008544921875, 239.94839477539062, 13.379024505615234, 79.8886947631836], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000266.npy"} +{"epoch": 0.39060205580029367, "step": 267, "batch_size": 64, "mean": 145.4833526611328, "std": 206.0981903076172, "min": -513.745361328125, "p10": -43.87514648437499, "median": 121.05089950561523, "p90": 365.76297607421884, "max": 707.6538696289062, "pos_frac": 0.78125, "sample": [78.78534698486328, 187.10867309570312, 333.36865234375, -11.681720733642578, 86.9190673828125, -20.334135055541992, 273.916259765625, 192.84097290039062, 4.193145751953125, 41.14619827270508, -49.506500244140625, -30.735321044921875, -8.840789794921875, 375.1085510253906, 156.8802490234375, 121.0855941772461, -83.1478500366211, 246.02810668945312, 471.47698974609375, 707.6538696289062, 93.54380798339844, 79.91083526611328, 149.84043884277344, 15.09326171875, -104.46192932128906, 98.31991577148438, 79.0970458984375, 93.71704864501953, 316.5667724609375, 215.82542419433594, 522.2479858398438, 400.1219482421875, 680.628662109375, 66.26968383789062, -5.285373687744141, 273.6857604980469, 33.28498458862305, -62.498146057128906, 10.71969223022461, 193.24020385742188, 284.8956604003906, 17.988983154296875, -6.107206344604492, 611.0889892578125, -513.745361328125, -11.725486755371094, -374.3360595703125, 226.85348510742188, 191.384033203125, 121.01620483398438, 221.9241485595703, 234.7789306640625, 81.09912109375, 311.6982727050781, 106.33765411376953, 167.68801879882812, -50.56645202636719, 343.9566345214844, 76.65857696533203, 130.45098876953125, 322.2140197753906, 165.3987579345703, 285.1777038574219, 144.6710968017578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000267.npy"} +{"epoch": 0.3920704845814978, "step": 268, "batch_size": 64, "mean": 137.439208984375, "std": 203.34576416015625, "min": -324.086181640625, "p10": -61.38204803466797, "median": 125.63863754272461, "p90": 337.3724975585938, "max": 769.6926879882812, "pos_frac": 0.796875, "sample": [2.5926895141601562, 460.5184326171875, 93.06241607666016, -68.77501678466797, 39.71537780761719, 71.71936798095703, 172.53855895996094, 71.62054443359375, 342.60693359375, 101.14947509765625, 186.84869384765625, -292.13604736328125, 150.95407104492188, 43.389617919921875, 273.11480712890625, -196.3675537109375, 107.63457489013672, 43.30946350097656, -323.4454040527344, 133.3691864013672, 156.20635986328125, 769.6926879882812, 51.3604850769043, 159.24273681640625, 77.42549133300781, 258.49530029296875, 460.7711181640625, 500.953857421875, -29.610984802246094, 319.6583251953125, -96.51422882080078, 66.42544555664062, -57.485382080078125, -10.678211212158203, 577.3150024414062, 142.5422821044922, 200.12435913085938, 139.25450134277344, 253.07852172851562, 223.8469696044922, 140.4876251220703, 274.14404296875, 6.554023742675781, -63.05204772949219, 312.6588134765625, 30.32696533203125, 196.06556701660156, 301.19207763671875, 16.668319702148438, 266.3427429199219, 188.14407348632812, 111.52830505371094, 674.48828125, 200.2984619140625, 77.79449462890625, 70.92378234863281, 117.90808868408203, -4.609825134277344, 325.1588134765625, -17.659650802612305, 184.02407836914062, -324.086181640625, -28.249195098876953, 163.5326385498047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000268.npy"} +{"epoch": 0.3935389133627019, "step": 269, "batch_size": 64, "mean": 171.92185974121094, "std": 220.90115356445312, "min": -405.1007995605469, "p10": -82.7112937927246, "median": 174.76653289794922, "p90": 446.49113464355474, "max": 650.0057373046875, "pos_frac": 0.734375, "sample": [154.39817810058594, 317.8697509765625, 157.64697265625, 302.93890380859375, -84.92266082763672, 237.24026489257812, 320.7960205078125, 184.91944885253906, -77.55143737792969, -24.325523376464844, 294.518310546875, -15.313278198242188, 487.1415710449219, 35.190887451171875, 339.02777099609375, 270.38201904296875, 2.4937362670898438, 187.81666564941406, -105.91252136230469, -405.1007995605469, 296.0722961425781, 12.35971450805664, 240.88787841796875, -251.4925537109375, -112.63056945800781, 400.19866943359375, -23.21898651123047, 581.5926513671875, 516.7202758789062, 133.16995239257812, 164.61361694335938, 59.177978515625, 405.597900390625, 293.5154113769531, -19.952255249023438, 332.97662353515625, 425.7549743652344, 92.64800262451172, 313.02471923828125, 23.597412109375, 14.495429992675781, 144.21490478515625, -191.7859649658203, 45.420257568359375, 140.5106658935547, 570.0188598632812, 302.0730285644531, 489.70098876953125, 312.9444274902344, -22.894378662109375, 344.3959045410156, 258.4328918457031, -4.343841552734375, 650.0057373046875, -76.03488159179688, 449.7083435058594, -19.96820068359375, 44.11620330810547, -35.07307052612305, 250.12750244140625, 438.98431396484375, 408.0628967285156, -186.67764282226562, 212.696533203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000269.npy"} +{"epoch": 0.39500734214390604, "step": 270, "batch_size": 64, "mean": 127.91445922851562, "std": 197.0740509033203, "min": -389.81378173828125, "p10": -72.47053680419921, "median": 110.54150390625, "p90": 375.9945983886719, "max": 683.304443359375, "pos_frac": 0.796875, "sample": [230.44097900390625, 333.7236328125, 479.51690673828125, -305.3886413574219, -389.81378173828125, -24.685287475585938, 55.350852966308594, 257.7352600097656, 99.7512435913086, 307.0756530761719, 378.8245849609375, -45.508331298828125, 121.3317642211914, 183.58689880371094, 26.063812255859375, 54.345924377441406, 80.80724334716797, 139.5728759765625, -147.02682495117188, 198.86758422851562, 279.9561462402344, 144.1031951904297, -29.100242614746094, 206.1239013671875, 444.12579345703125, 267.42669677734375, 15.897420883178711, 78.8833999633789, 208.68710327148438, 99.04840850830078, -235.48036193847656, 391.97314453125, 683.304443359375, -74.25367736816406, -138.86557006835938, 273.913330078125, 336.13916015625, 40.875762939453125, 81.69517517089844, 3.6075401306152344, 588.199462890625, -199.1700439453125, 187.38998413085938, 6.561748504638672, 146.14651489257812, 125.17784881591797, 51.524566650390625, 206.41000366210938, -68.30987548828125, -10.513774871826172, 206.24722290039062, 336.13726806640625, 91.66064453125, 23.43958854675293, 1.2839813232421875, 159.63442993164062, 14.789108276367188, 60.11283874511719, -31.93659210205078, 369.39129638671875, 152.96937561035156, 151.39797973632812, 90.44184875488281, 414.90716552734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000270.npy"} +{"epoch": 0.3964757709251101, "step": 271, "batch_size": 64, "mean": 155.0682830810547, "std": 175.69522094726562, "min": -124.81519317626953, "p10": -26.984109878540025, "median": 109.7655258178711, "p90": 408.9490203857423, "max": 678.5303344726562, "pos_frac": 0.796875, "sample": [678.5303344726562, 373.6170959472656, 96.78616333007812, -33.59342575073242, 216.43736267089844, -14.511981964111328, 382.7764587402344, 371.3409729003906, 36.59613800048828, -10.746406555175781, 320.2752685546875, -124.27667236328125, 420.16583251953125, -124.81519317626953, 83.81153106689453, -32.329307556152344, 75.78274536132812, -0.06322479248046875, 142.77162170410156, 2.584259033203125, -81.3681411743164, 7.7426910400390625, 463.552490234375, 337.41455078125, 460.3779296875, 367.25311279296875, 65.91368865966797, -41.04881286621094, 68.98885345458984, 116.93695831298828, -7.763240814208984, 227.1538543701172, 144.61199951171875, -8.192512512207031, -46.724395751953125, 130.73399353027344, 58.511810302734375, 22.95299530029297, 328.0540771484375, 109.77949523925781, 12.787605285644531, 496.5714111328125, 66.17765045166016, -5.496721267700195, 95.87828063964844, 35.66108322143555, 109.75155639648438, 122.74961853027344, 325.9971008300781, 194.46884155273438, 462.9224853515625, 263.0326232910156, 89.81372833251953, 25.734146118164062, 457.39959716796875, 22.697242736816406, 211.59481811523438, 344.052001953125, 29.73492431640625, 194.6819305419922, 135.64108276367188, 291.96649169921875, 150.92135620117188, 203.6096954345703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000271.npy"} +{"epoch": 0.39794419970631423, "step": 272, "batch_size": 64, "mean": 193.73599243164062, "std": 186.1200714111328, "min": -178.7349090576172, "p10": -45.670285034179685, "median": 197.97667694091797, "p90": 460.6904479980469, "max": 605.284912109375, "pos_frac": 0.8125, "sample": [288.03460693359375, 198.75244140625, -12.83824348449707, -107.35073852539062, -23.989028930664062, 349.46746826171875, 223.5023956298828, 476.9642333984375, 466.92755126953125, 281.76312255859375, 322.1748352050781, 353.3435974121094, -45.60487365722656, 159.850830078125, -164.04061889648438, 81.46409606933594, 97.06816864013672, 203.39964294433594, 161.81399536132812, 143.32070922851562, -66.48793029785156, 273.54656982421875, 312.5921325683594, 206.1044921875, 316.9849853515625, 92.77291107177734, 0.2643928527832031, 285.2188415527344, 123.0867691040039, 599.0555419921875, 48.24229431152344, 192.56295776367188, 203.9054718017578, 154.28628540039062, 68.86305236816406, 375.5572814941406, 485.1900939941406, -178.7349090576172, 214.78720092773438, 236.45458984375, 605.284912109375, 15.1973876953125, 496.41424560546875, 151.5072021484375, 281.1353759765625, -69.7238998413086, 287.83929443359375, 62.04023361206055, 219.23008728027344, 400.4822692871094, -45.69831848144531, 197.20091247558594, -11.147171020507812, -106.57167053222656, -41.43488311767578, 269.4427490234375, 304.6540832519531, 48.753868103027344, 422.66668701171875, 160.46876525878906, 183.11904907226562, 446.13720703125, 535.5469970703125, 188.28152465820312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000272.npy"} +{"epoch": 0.39941262848751835, "step": 273, "batch_size": 64, "mean": 188.71389770507812, "std": 199.29367065429688, "min": -254.57606506347656, "p10": -52.05585632324218, "median": 173.64932250976562, "p90": 445.71768798828145, "max": 673.5010986328125, "pos_frac": 0.796875, "sample": [243.44012451171875, 290.5838623046875, 98.88140869140625, 155.36338806152344, 530.8624267578125, 471.6142578125, 173.58749389648438, -96.08808135986328, 659.5315551757812, -47.21870422363281, 380.3014831542969, 196.40383911132812, 2.17510986328125, 73.01439666748047, -1.5878276824951172, 159.5023193359375, -1.8426399230957031, 259.0570068359375, 187.73934936523438, 359.93084716796875, 233.1846466064453, 52.96697998046875, 70.29840087890625, 156.9845733642578, 98.63056945800781, 60.683326721191406, 344.712646484375, 182.9138946533203, 173.71115112304688, 182.45401000976562, 332.16607666015625, -10.969413757324219, 358.3202819824219, 319.39788818359375, -14.914865493774414, 606.2015380859375, 551.8880004882812, 234.08216857910156, 165.1299285888672, 282.46527099609375, -199.63357543945312, -105.43211364746094, 276.49334716796875, -90.29953002929688, 257.52984619140625, 290.3841552734375, 366.8726806640625, 55.25889587402344, 144.02377319335938, -254.57606506347656, 673.5010986328125, 162.22119140625, 95.10801696777344, -102.21316528320312, -18.02557373046875, 328.8329162597656, 53.97322082519531, 122.90982055664062, 277.0185241699219, -54.12892150878906, 349.8601379394531, 105.9997329711914, 401.9978942871094, 464.4547424316406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000273.npy"} +{"epoch": 0.4008810572687225, "step": 274, "batch_size": 64, "mean": 144.90960693359375, "std": 188.8855438232422, "min": -217.79246520996094, "p10": -106.46806640624999, "median": 147.07884216308594, "p90": 430.7862121582033, "max": 566.89892578125, "pos_frac": 0.75, "sample": [90.17487335205078, 87.44947814941406, 35.025299072265625, -156.9467315673828, 215.18190002441406, 160.72323608398438, 214.14903259277344, -35.917335510253906, 116.39495086669922, -3.1107940673828125, -120.19412231445312, 445.97021484375, 340.83636474609375, 227.40859985351562, 285.5811462402344, 160.4265594482422, 33.15776824951172, 460.21307373046875, -217.79246520996094, 264.008056640625, -132.3209991455078, 148.16966247558594, -59.4010009765625, 74.59756469726562, 60.569297790527344, 262.03216552734375, 450.5819396972656, -100.80517578125, 98.79183197021484, 561.51171875, -39.0276985168457, 156.41036987304688, 313.72528076171875, 167.79122924804688, 2.1289615631103516, 190.3794708251953, 463.8938903808594, -10.798248291015625, 198.7481231689453, 92.98112487792969, 293.4228820800781, -2.6534271240234375, 145.98802185058594, 275.3815002441406, 390.54119873046875, -82.08513641357422, 122.32020568847656, 44.19480895996094, 187.02142333984375, 544.23388671875, 286.4640197753906, 59.22364807128906, 8.861358642578125, 152.57119750976562, -173.05239868164062, 311.298828125, -125.43890380859375, -108.89501953125, 566.89892578125, 203.38674926757812, 125.29098510742188, 395.35687255859375, -31.021392822265625, 182.20700073242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000274.npy"} +{"epoch": 0.4023494860499266, "step": 275, "batch_size": 64, "mean": 153.61099243164062, "std": 190.96343994140625, "min": -226.67861938476562, "p10": -76.83283538818358, "median": 151.7874298095703, "p90": 406.15877990722663, "max": 596.7031860351562, "pos_frac": 0.796875, "sample": [186.65272521972656, 55.74253845214844, 307.1148376464844, 12.236812591552734, 1.4790706634521484, -100.4480972290039, 386.8209533691406, 138.2969207763672, 495.7926940917969, -102.92012786865234, 263.0954284667969, 441.3367614746094, 548.6967163085938, 82.75682830810547, 175.72946166992188, 263.29046630859375, -17.531864166259766, 4.9331207275390625, 139.98504638671875, 190.86170959472656, -93.2625503540039, 228.97314453125, 454.6976013183594, -34.72167205810547, 55.475318908691406, -194.4312744140625, 275.93084716796875, 177.34426879882812, 169.61764526367188, 62.58092498779297, -79.64920806884766, 59.68260192871094, 337.14080810546875, 217.46005249023438, 3.967527389526367, 5.222259521484375, -219.7823486328125, -17.016876220703125, -25.5706844329834, 35.50761413574219, 77.23110961914062, 358.78240966796875, -226.67861938476562, 154.8441925048828, 247.47096252441406, 410.86199951171875, 208.75680541992188, 596.7031860351562, 202.97915649414062, 467.02008056640625, 395.1846008300781, -36.46776580810547, 224.91036987304688, 380.6585693359375, 47.47870635986328, 359.15673828125, 148.7306671142578, 355.8638610839844, -70.26129913330078, 200.55374145507812, 41.84516143798828, 305.2115173339844, 57.161376953125, 30.018341064453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000275.npy"} +{"epoch": 0.40381791483113066, "step": 276, "batch_size": 64, "mean": 153.95571899414062, "std": 207.44520568847656, "min": -262.9680480957031, "p10": -68.5997604370117, "median": 157.39041900634766, "p90": 429.074154663086, "max": 831.7674560546875, "pos_frac": 0.8125, "sample": [111.30170440673828, 831.7674560546875, 144.16847229003906, 449.91729736328125, 437.5475158691406, 13.124944686889648, -13.095535278320312, 538.4051513671875, 25.52357292175293, 158.74110412597656, 166.71131896972656, 19.34345245361328, -41.50958251953125, 255.81680297851562, 303.3663635253906, 253.63180541992188, 217.44894409179688, -262.9680480957031, 139.6118621826172, 1.8088417053222656, 38.281673431396484, 352.18841552734375, 194.4160614013672, 335.7856750488281, -76.7470703125, -225.87759399414062, -19.264179229736328, 81.61253356933594, 197.0264892578125, 342.7472839355469, -49.58937072753906, 190.4007568359375, 40.723899841308594, 167.36752319335938, 321.8873291015625, 278.6158447265625, -138.17495727539062, -206.68385314941406, 2.912332534790039, 7.584774017333984, -137.07846069335938, 12.708099365234375, 28.771907806396484, 458.5301818847656, 94.12317657470703, 161.7047119140625, 253.5865020751953, 218.66505432128906, 287.1382141113281, 34.544349670410156, 504.71630859375, -28.206192016601562, 162.19573974609375, 245.25527954101562, 35.11126708984375, 238.8011932373047, 234.2398223876953, 156.03973388671875, 375.9478759765625, 65.21197509765625, -176.07867431640625, 595.843505859375, 36.215240478515625, 409.302978515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000276.npy"} +{"epoch": 0.4052863436123348, "step": 277, "batch_size": 64, "mean": 158.27908325195312, "std": 207.5612030029297, "min": -299.20843505859375, "p10": -106.64529113769531, "median": 149.47364807128906, "p90": 442.4332489013672, "max": 564.89892578125, "pos_frac": 0.796875, "sample": [3.8423118591308594, 313.1016845703125, -110.49794006347656, 174.4444580078125, 35.14046859741211, 315.41754150390625, 375.201171875, 159.17010498046875, -97.65577697753906, 201.53094482421875, 172.87847900390625, -41.39203643798828, 364.0240478515625, -34.45966339111328, 220.7613067626953, 382.10565185546875, 60.078453063964844, 138.64382934570312, 205.37417602539062, 303.60809326171875, -111.0749740600586, 399.0570983886719, 37.58717346191406, 51.444679260253906, -219.3553466796875, -75.23912811279297, 523.0684204101562, 43.15435791015625, -18.292724609375, 167.62387084960938, -299.20843505859375, 527.0547485351562, 166.10964965820312, 298.20599365234375, -214.78073120117188, -186.36431884765625, 564.89892578125, 279.574462890625, 138.58596801757812, -36.25579833984375, 242.62451171875, 21.149200439453125, 537.4544677734375, 33.46525955200195, 12.059234619140625, 112.28714752197266, 227.57528686523438, 363.7116394042969, 436.9892578125, 499.54534912109375, -155.30056762695312, 293.00762939453125, 59.61304473876953, 92.6358871459961, 12.679611206054688, 3.4791603088378906, 279.43402099609375, 139.77719116210938, 444.7663879394531, 481.9825439453125, 104.95957946777344, 181.0447998046875, 105.58988189697266, 422.2189636230469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000277.npy"} +{"epoch": 0.4067547723935389, "step": 278, "batch_size": 64, "mean": 183.9108123779297, "std": 208.61668395996094, "min": -162.496826171875, "p10": -56.50035820007324, "median": 141.71035766601562, "p90": 518.2839721679687, "max": 675.9649047851562, "pos_frac": 0.796875, "sample": [-94.4345703125, 354.9749450683594, -73.77880096435547, 30.69534683227539, 322.2574157714844, 332.61541748046875, 234.80618286132812, 68.82920837402344, 164.37179565429688, 108.0872573852539, 292.3514709472656, 175.22763061523438, 30.637136459350586, -58.12213134765625, 123.09344482421875, 245.89027404785156, -5.669708251953125, 337.4587097167969, 516.046142578125, 10.778678894042969, 0.2273693084716797, 605.597412109375, 401.3319091796875, 201.04702758789062, 279.7080383300781, 308.87042236328125, 132.16781616210938, -25.160133361816406, 181.32223510742188, 533.7365112304688, 493.8556213378906, -70.08372497558594, 111.64370727539062, 21.4351806640625, 587.2288208007812, 519.2430419921875, 42.375465393066406, -42.95098876953125, 151.25289916992188, 196.99603271484375, 98.52689361572266, 431.512939453125, 184.13119506835938, 62.37419128417969, 39.158599853515625, 546.5181884765625, 122.10623931884766, 236.32350158691406, -60.349552154541016, 610.3607177734375, 193.3105926513672, 120.88705444335938, 14.745147705078125, 30.00586700439453, 346.2662048339844, 302.1500244140625, -39.16248321533203, -26.68428611755371, 675.9649047851562, -162.496826171875, 367.58428955078125, 116.85623168945312, -133.04190063476562, -52.71622085571289], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000278.npy"} +{"epoch": 0.40822320117474303, "step": 279, "batch_size": 64, "mean": 180.64385986328125, "std": 241.22213745117188, "min": -390.16357421875, "p10": -47.90715713500976, "median": 143.60595703125, "p90": 502.3202117919922, "max": 917.455322265625, "pos_frac": 0.78125, "sample": [73.35765838623047, 59.62189483642578, 53.2892951965332, 35.929107666015625, -38.37823486328125, -49.98876190185547, 398.4627685546875, 509.8562927246094, -34.12415313720703, -42.620540618896484, 23.37981414794922, 245.95555114746094, 161.896484375, 33.90765380859375, 506.6279602050781, 649.5421142578125, 54.50519943237305, 177.91571044921875, 270.13555908203125, 295.92047119140625, 704.46533203125, 95.01250457763672, 32.50574493408203, -274.259521484375, 41.059722900390625, 83.26604461669922, -15.387664794921875, 205.48373413085938, 86.76248168945312, 579.8175048828125, 118.1973876953125, 41.48204803466797, 235.36068725585938, -32.53905487060547, 262.73004150390625, 468.7352600097656, 250.50064086914062, 917.455322265625, 492.268798828125, 300.50579833984375, -27.694931030273438, -43.050079345703125, -171.6376495361328, -390.16357421875, 152.1464080810547, 465.5022888183594, 95.1434326171875, 267.4342956542969, 363.18756103515625, 135.0655059814453, 398.20806884765625, 319.35479736328125, 80.13626861572266, -117.98391723632812, 506.9062194824219, 208.67837524414062, 424.25115966796875, 278.6318359375, -106.37047576904297, -188.62574768066406, 381.63604736328125, 282.64581298828125, 2.886821746826172, 266.3047180175781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000279.npy"} +{"epoch": 0.40969162995594716, "step": 280, "batch_size": 64, "mean": 186.25331115722656, "std": 195.02511596679688, "min": -303.6202087402344, "p10": -65.63065948486327, "median": 174.75436401367188, "p90": 440.5317626953125, "max": 573.4691162109375, "pos_frac": 0.796875, "sample": [133.70372009277344, 313.12774658203125, 121.73624420166016, 237.7592315673828, 32.13887023925781, -43.192420959472656, 377.6420593261719, 489.42547607421875, 499.22283935546875, 36.704620361328125, 219.39573669433594, -22.226524353027344, -86.36180114746094, 373.6611328125, 402.3571472167969, 389.860595703125, 130.7431640625, 272.145263671875, 169.81781005859375, 279.5368957519531, 104.19741821289062, 535.5711059570312, 438.05621337890625, 196.52554321289062, 261.1910095214844, 100.89755249023438, 357.18115234375, 345.24591064453125, -1.64263916015625, 236.83197021484375, 184.94546508789062, 152.27911376953125, 560.4501953125, 156.29891967773438, -52.6446533203125, 151.64476013183594, -176.68109130859375, 388.3701477050781, 217.7863006591797, 83.43470001220703, 219.35153198242188, 112.47473907470703, -103.61857604980469, 573.4691162109375, -71.19609069824219, -303.6202087402344, 139.90261840820312, 503.9786682128906, 179.69091796875, 130.90176391601562, 194.49510192871094, 441.59271240234375, -44.87068176269531, 153.18202209472656, -28.115936279296875, -79.3259506225586, 222.52825927734375, 131.5486297607422, 114.56863403320312, 351.2685546875, 318.4876708984375, -180.7540283203125, 13.064346313476562, 364.07110595703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000280.npy"} +{"epoch": 0.4111600587371512, "step": 281, "batch_size": 64, "mean": 169.2798614501953, "std": 192.94300842285156, "min": -600.8914794921875, "p10": -30.411274337768536, "median": 153.67955780029297, "p90": 408.5805084228516, "max": 563.1823120117188, "pos_frac": 0.84375, "sample": [136.01312255859375, 311.94091796875, 148.25164794921875, 232.33143615722656, 48.85782241821289, 185.37188720703125, 60.07818603515625, 210.10952758789062, 203.0427703857422, 54.230445861816406, 239.046142578125, 435.9656677246094, 281.6234436035156, 81.13184356689453, 244.5526885986328, -99.12237548828125, 197.13943481445312, 563.1823120117188, 55.857940673828125, 159.1074676513672, -12.085479736328125, 398.81884765625, 293.766845703125, 552.8014526367188, 223.65310668945312, 268.0579528808594, 191.96371459960938, 268.805908203125, -38.26518630981445, 543.38623046875, 363.2693176269531, 110.24288940429688, 189.47030639648438, 399.9371337890625, 200.0406494140625, -600.8914794921875, 65.05552673339844, 93.25663757324219, 94.07015228271484, 50.601654052734375, 492.08843994140625, 20.142379760742188, -3.7266693115234375, 316.43377685546875, 140.64117431640625, 248.2052001953125, 49.02607727050781, 127.13447570800781, 506.19232177734375, 87.12952423095703, -83.13973236083984, 137.38095092773438, -136.42576599121094, 90.21963500976562, -59.742950439453125, -2.4308547973632812, -58.09336853027344, 400.3551025390625, 235.63987731933594, 65.5416488647461, 137.02597045898438, 299.85589599609375, 7.683897018432617, 412.1056823730469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000281.npy"} +{"epoch": 0.41262848751835535, "step": 282, "batch_size": 64, "mean": 129.77859497070312, "std": 213.11900329589844, "min": -215.1263427734375, "p10": -111.02870483398436, "median": 113.74681091308594, "p90": 351.6037017822266, "max": 1146.318603515625, "pos_frac": 0.75, "sample": [432.5049743652344, 147.73851013183594, 72.85404968261719, -209.92404174804688, 185.62303161621094, 270.8675231933594, 84.6458740234375, 41.74650573730469, 148.4884033203125, 356.0662841796875, 268.3674621582031, 170.097900390625, 316.6797790527344, 87.41155242919922, -26.092147827148438, 240.26397705078125, 1146.318603515625, 162.69595336914062, 224.374755859375, 116.89459228515625, 149.52410888671875, 207.90505981445312, 28.64260482788086, -134.2603759765625, 571.288818359375, 378.754150390625, 96.38145446777344, 159.37782287597656, 157.01333618164062, 520.6638793945312, 311.58056640625, 194.9647979736328, 304.27734375, -18.012863159179688, 12.804973602294922, 122.51547241210938, -117.06938171386719, -105.33992767333984, 65.61767578125, -113.35403442382812, -45.60577392578125, 42.451812744140625, 138.85691833496094, 131.09756469726562, 219.21205139160156, -14.632179260253906, -105.60293579101562, 386.1100158691406, 206.36822509765625, 110.59902954101562, -52.822364807128906, -59.26106262207031, 26.030807495117188, 51.44894790649414, 97.05641174316406, -175.47344970703125, 286.9488525390625, 5.39137077331543, -120.8541488647461, 341.1910095214844, -215.1263427734375, 36.31339645385742, -55.94261932373047, 41.17564392089844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000282.npy"} +{"epoch": 0.41409691629955947, "step": 283, "batch_size": 64, "mean": 190.0657196044922, "std": 202.4391326904297, "min": -438.3199462890625, "p10": -40.5438907623291, "median": 180.05704498291016, "p90": 470.91626586914066, "max": 629.000732421875, "pos_frac": 0.828125, "sample": [-108.00448608398438, -71.07347869873047, 212.9485321044922, 34.304283142089844, 310.82470703125, 321.46954345703125, -38.947509765625, 257.1757507324219, 371.6214904785156, 71.8259048461914, 67.74840545654297, 155.8154754638672, -41.22805404663086, 91.70497131347656, 110.55431365966797, 173.6273193359375, -438.3199462890625, 45.78580856323242, 309.124267578125, -61.581336975097656, 494.8941345214844, 446.9156188964844, 176.8542938232422, 305.10528564453125, 26.809154510498047, 134.65765380859375, -57.93333435058594, 467.480224609375, 362.43865966796875, 510.4625244140625, 43.07708740234375, 293.74603271484375, -129.2599334716797, 200.1275634765625, 472.38885498046875, 45.42268371582031, 58.985565185546875, -25.505603790283203, 89.35026550292969, -28.548952102661133, 480.35791015625, 207.0680694580078, 314.163818359375, 205.8447265625, 282.3589172363281, 183.25979614257812, 92.90351104736328, 191.40478515625, 426.9664306640625, 269.7940673828125, 579.9229736328125, 31.948387145996094, 377.50665283203125, 153.57211303710938, 66.21778106689453, 305.7341613769531, 565.5855712890625, 213.26954650878906, 312.45245361328125, 629.000732421875, 435.174560546875, 108.76532745361328, 102.0114517211914, -33.92152404785156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000283.npy"} +{"epoch": 0.4155653450807636, "step": 284, "batch_size": 64, "mean": 184.56295776367188, "std": 199.60781860351562, "min": -205.73837280273438, "p10": -33.03383140563963, "median": 163.32579040527344, "p90": 484.10634460449245, "max": 727.7727661132812, "pos_frac": 0.859375, "sample": [-89.77310180664062, 135.98825073242188, 325.4407958984375, 38.6646728515625, 121.98090362548828, 72.34381866455078, 370.97210693359375, 17.62731170654297, 268.4465637207031, 422.56494140625, 367.087158203125, -38.595890045166016, 510.4812316894531, 3.3359012603759766, 279.61968994140625, 20.283679962158203, 41.84593963623047, 241.54141235351562, 282.39434814453125, 517.6112060546875, 56.7120361328125, 668.6159057617188, 170.04049682617188, 289.3481140136719, 158.76358032226562, -11.261886596679688, 168.31907653808594, 84.334228515625, 267.400390625, -20.055694580078125, 22.875457763671875, 727.7727661132812, 173.82762145996094, -45.756065368652344, 152.7130126953125, 100.15179443359375, -79.4003677368164, 79.8955307006836, 228.69467163085938, 725.6181640625, 76.74479675292969, 200.46817016601562, 192.44638061523438, 72.08871459960938, -86.68118286132812, -205.73837280273438, 271.3328857421875, 312.53277587890625, 123.27207946777344, 266.5393371582031, 527.6796264648438, 167.88800048828125, 313.4736633300781, 205.79588317871094, 51.05769348144531, 15.105335235595703, -51.34709930419922, 8.678642272949219, 355.08148193359375, 245.4727325439453, 38.185829162597656, 513.08935546875, 22.20610809326172, 348.1865234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000284.npy"} +{"epoch": 0.4170337738619677, "step": 285, "batch_size": 64, "mean": 165.91085815429688, "std": 198.38706970214844, "min": -507.76763916015625, "p10": -55.93999404907223, "median": 156.27227783203125, "p90": 387.75461425781253, "max": 598.307861328125, "pos_frac": 0.859375, "sample": [377.88916015625, 260.1072082519531, 45.69364929199219, 391.982666015625, 135.0843505859375, 122.6170654296875, 177.85208129882812, 59.285987854003906, 147.28379821777344, 598.307861328125, 279.2724609375, 238.3492889404297, 320.1449890136719, 146.4853515625, 161.6254425048828, 575.1755981445312, 26.55975341796875, 120.18142700195312, 146.40643310546875, 247.24658203125, 451.0813903808594, -228.67465209960938, 89.33296966552734, 313.169189453125, 429.5978698730469, 10.274604797363281, -69.21014404296875, 86.0194091796875, 53.97393798828125, 89.6108169555664, 256.60003662109375, 150.9191131591797, -131.62173461914062, 22.23232650756836, 272.5209045410156, 318.5157470703125, 44.009925842285156, -507.76763916015625, 267.1863098144531, 48.652740478515625, 167.98800659179688, 16.286865234375, 589.52392578125, 271.8099670410156, -24.97631072998047, 321.95086669921875, 299.87701416015625, 136.8016815185547, 287.24945068359375, 353.13006591796875, 175.48858642578125, -199.5603790283203, 296.5445861816406, 225.30465698242188, -191.17103576660156, -8.284061431884766, 435.0147705078125, 95.44216918945312, 98.5956039428711, 163.3798065185547, -152.26544189453125, 353.845458984375, 218.55734252929688, 143.7869873046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000285.npy"} +{"epoch": 0.4185022026431718, "step": 286, "batch_size": 64, "mean": 180.1011199951172, "std": 218.9644012451172, "min": -343.2037658691406, "p10": -76.42151412963867, "median": 172.2771759033203, "p90": 488.15527343750006, "max": 647.7030029296875, "pos_frac": 0.78125, "sample": [61.30107879638672, -62.124107360839844, 499.9543762207031, 126.77288818359375, 171.53237915039062, 170.95724487304688, 55.53026580810547, 260.5977783203125, 347.49493408203125, -343.2037658691406, 286.9820251464844, 242.16983032226562, 44.75285339355469, -72.70439910888672, 466.5471496582031, 177.52517700195312, 543.50146484375, -30.54327392578125, 173.02197265625, 175.18019104003906, 491.61468505859375, 66.82095336914062, -304.05462646484375, 480.41015625, 309.38909912109375, -55.95794677734375, -66.21580505371094, 317.50201416015625, 331.2931213378906, 550.7698364257812, 431.19232177734375, -173.33578491210938, 175.77464294433594, 92.40890502929688, 23.234359741210938, 87.02296447753906, 377.88116455078125, 146.9044952392578, 5.909088134765625, -76.77836608886719, 197.27561950683594, -75.58885955810547, 244.49227905273438, -89.85787200927734, 99.15042114257812, 439.23529052734375, 476.61578369140625, 156.53025817871094, -114.7200927734375, 574.318603515625, -42.207374572753906, 233.7425537109375, 67.18051147460938, 491.474609375, 148.06707763671875, 308.1260681152344, 115.72213745117188, 242.597412109375, 288.1621398925781, -90.13241577148438, 647.7030029296875, 414.5998840332031, 179.35162353515625, 107.6015396118164], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000286.npy"} +{"epoch": 0.4199706314243759, "step": 287, "batch_size": 64, "mean": 145.67831420898438, "std": 192.9538116455078, "min": -273.5018005371094, "p10": -71.45898361206055, "median": 118.87469482421875, "p90": 420.3535522460938, "max": 656.54345703125, "pos_frac": 0.8125, "sample": [124.89500427246094, 53.889251708984375, 3.120443344116211, 75.20500946044922, 144.7632293701172, -62.059417724609375, 299.962158203125, 561.2273559570312, 179.9551239013672, 138.21109008789062, 551.124755859375, 99.92039489746094, 211.0780487060547, 516.6195068359375, -84.5853271484375, -72.30055236816406, 24.82067108154297, 431.79150390625, 118.91778564453125, 334.61126708984375, 212.7745819091797, 6.650808334350586, 147.4498291015625, -273.5018005371094, 656.54345703125, 81.25940704345703, 53.91693878173828, -16.40118408203125, -61.07408142089844, 51.320640563964844, 163.83367919921875, -48.409584045410156, -158.73443603515625, 150.71315002441406, 76.26853942871094, 234.45594787597656, 66.64602661132812, 255.77479553222656, 113.9090576171875, 286.52197265625, 10.307065963745117, 260.12249755859375, 211.9720458984375, 84.50462341308594, 26.923416137695312, 118.83160400390625, 112.1649169921875, 44.21421813964844, 353.32757568359375, 370.626953125, -158.53219604492188, 67.41545867919922, 216.3985137939453, 53.044639587402344, 414.5347595214844, -200.72915649414062, -146.37464904785156, -69.49532318115234, 203.2760009765625, 343.1878356933594, 459.72320556640625, 422.8473205566406, 203.2984161376953, 270.737548828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000287.npy"} +{"epoch": 0.42143906020558003, "step": 288, "batch_size": 64, "mean": 188.2078399658203, "std": 203.32125854492188, "min": -329.3408508300781, "p10": 4.218954086303712, "median": 173.44625091552734, "p90": 413.9374969482422, "max": 720.7265625, "pos_frac": 0.90625, "sample": [171.5308837890625, 173.85427856445312, 3.7950477600097656, 196.10305786132812, 329.119873046875, 406.60101318359375, 24.248624801635742, 720.7265625, 41.27545166015625, 372.631103515625, 267.5583801269531, 170.31173706054688, -105.94165802001953, 16.721389770507812, 101.37667846679688, 173.03822326660156, 130.37416076660156, 412.1664123535156, 252.135009765625, 339.1766052246094, 193.9298095703125, 269.1303405761719, 298.215087890625, 5.20806884765625, 83.61341857910156, 177.85916137695312, 171.177734375, 7.523279190063477, 182.57989501953125, 63.43382263183594, 199.80841064453125, 221.88226318359375, 617.8651733398438, 166.45053100585938, 414.696533203125, 346.3114013671875, 193.18418884277344, 10.87286376953125, 200.17681884765625, 100.61780548095703, -329.3408508300781, 607.47412109375, 109.4442138671875, 284.71923828125, 72.71824645996094, 167.02493286132812, 54.98419952392578, 33.74981689453125, 520.8206176757812, 120.07915496826172, 256.48272705078125, 596.4164428710938, 223.46337890625, -312.52215576171875, 74.43698120117188, 540.1510009765625, 315.0740051269531, 69.20518493652344, 53.359100341796875, 396.33563232421875, -9.723670959472656, 346.863525390625, -136.63218688964844, -130.62094116210938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000288.npy"} +{"epoch": 0.42290748898678415, "step": 289, "batch_size": 64, "mean": 207.6798095703125, "std": 227.2295379638672, "min": -207.57064819335938, "p10": -68.68418655395507, "median": 160.5833282470703, "p90": 545.8766662597657, "max": 698.9129028320312, "pos_frac": 0.828125, "sample": [159.03317260742188, -48.003082275390625, 561.4141845703125, 643.187255859375, -165.13235473632812, 568.830810546875, 395.54388427734375, 291.24664306640625, 54.803955078125, 549.379638671875, 527.953125, 423.9447021484375, 280.2190856933594, 537.7030639648438, 360.02099609375, 334.61962890625, 26.494308471679688, 698.9129028320312, -70.45854949951172, 249.61917114257812, 104.42886352539062, 25.494260787963867, 119.00201416015625, -64.54400634765625, -111.9794921875, 54.075679779052734, 315.4248046875, 425.7894592285156, 65.91627502441406, 131.35813903808594, 417.2029724121094, 639.2325439453125, 355.4263610839844, -49.35988998413086, 67.6381607055664, 65.83616638183594, 598.935546875, 93.09999084472656, -109.839599609375, 16.117055892944336, 102.22465515136719, 476.00555419921875, -73.71487426757812, 263.9169921875, 204.3079071044922, 409.9808349609375, 403.83502197265625, -19.216917037963867, 137.15980529785156, -99.72305297851562, 161.08502197265625, 216.47979736328125, 73.67779541015625, 37.36973571777344, 529.6629638671875, 190.2410888671875, 23.227712631225586, 31.99431610107422, 160.08163452148438, 180.75439453125, 233.85214233398438, -207.57064819335938, 261.2351379394531, 56.053733825683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000289.npy"} +{"epoch": 0.4243759177679883, "step": 290, "batch_size": 64, "mean": 212.3250274658203, "std": 258.9597473144531, "min": -316.6517028808594, "p10": -65.00979995727539, "median": 206.65811157226562, "p90": 601.6361145019532, "max": 906.8594970703125, "pos_frac": 0.8125, "sample": [538.8394775390625, 198.92764282226562, 120.63465118408203, 162.58645629882812, -316.6517028808594, 399.7316589355469, 448.2889404296875, -66.86343383789062, -11.385086059570312, 137.45237731933594, -237.067626953125, 393.7013854980469, 227.76123046875, 27.062530517578125, 255.10089111328125, 214.38858032226562, 512.2964477539062, -22.8223876953125, 248.00991821289062, 11.89310073852539, 59.57954406738281, 50.53898620605469, -173.953857421875, 119.0599365234375, 143.83731079101562, -279.78741455078125, 127.20806884765625, 288.7187194824219, 906.8594970703125, 728.7366943359375, 85.15177917480469, 228.7117156982422, 302.4664306640625, 671.9439697265625, 234.22625732421875, 44.85289001464844, 261.2795715332031, 118.80122375488281, 278.4225769042969, 176.5274658203125, 14.74456787109375, 219.0373077392578, 214.66575622558594, 354.1407470703125, 52.2628173828125, 286.74774169921875, -60.684654235839844, -56.555633544921875, 341.50506591796875, -107.61264038085938, 680.6522216796875, 448.7901611328125, 277.1404113769531, 627.3924560546875, 16.767597198486328, 612.7105712890625, 575.7957153320312, 339.0545654296875, -21.443042755126953, 79.90365600585938, 75.70889282226562, 830.559814453125, 251.88250732421875, -79.43107604980469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000290.npy"} +{"epoch": 0.42584434654919234, "step": 291, "batch_size": 64, "mean": 187.67138671875, "std": 224.53297424316406, "min": -446.4739074707031, "p10": -59.19000701904295, "median": 202.54363250732422, "p90": 450.9234619140625, "max": 641.05810546875, "pos_frac": 0.765625, "sample": [198.53221130371094, 206.5550537109375, -15.139724731445312, 37.93921661376953, -8.696395874023438, 294.9317626953125, 641.05810546875, 293.8476867675781, 451.33892822265625, 271.73785400390625, 51.308929443359375, 449.95404052734375, 312.0831604003906, -79.76158905029297, 609.0325317382812, -25.627777099609375, 420.3848876953125, 248.47203063964844, -44.75598907470703, 241.1135711669922, 449.2139892578125, -9.882095336914062, 273.4335632324219, 88.48477172851562, 265.2672119140625, 413.28033447265625, 126.84300994873047, 184.2611541748047, 396.48699951171875, 38.27895736694336, 305.6745300292969, 70.70706939697266, 342.05755615234375, -194.52857971191406, 25.03607177734375, -202.8648681640625, -28.798362731933594, 63.42876434326172, -446.4739074707031, 567.3443603515625, 330.59820556640625, 472.7492980957031, 414.8936462402344, 241.64877319335938, -160.0119171142578, -43.12239074707031, -65.37601470947266, 359.14044189453125, 69.56451416015625, 169.02694702148438, 105.76246643066406, 169.7259521484375, 337.34423828125, 307.6719665527344, 0.929595947265625, 242.75619506835938, 415.2197570800781, 79.96952819824219, 541.798828125, -40.801368713378906, -150.73291015625, 401.49517822265625, 526.99755859375, 2.160938262939453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000291.npy"} +{"epoch": 0.42731277533039647, "step": 292, "batch_size": 64, "mean": 200.83868408203125, "std": 250.30386352539062, "min": -627.4581909179688, "p10": -49.13671150207519, "median": 194.74374389648438, "p90": 530.599642944336, "max": 776.3243408203125, "pos_frac": 0.796875, "sample": [226.65380859375, 61.22708511352539, 6.911102294921875, 202.59262084960938, 280.1916809082031, 156.67141723632812, 129.58148193359375, 121.87471771240234, 191.99349975585938, -627.4581909179688, 731.957763671875, 68.97737884521484, 590.400146484375, 121.64183044433594, 349.7788391113281, -52.054443359375, -237.2799072265625, 429.3392639160156, 69.00772857666016, 318.2546081542969, -9.221122741699219, 734.6650390625, 415.2121887207031, 216.39797973632812, 451.1550598144531, 551.1634521484375, 254.003173828125, 436.531982421875, 149.19174194335938, 133.69540405273438, -123.978515625, 297.824951171875, 9.250873565673828, 483.609130859375, -12.189735412597656, 120.81475067138672, -72.2608642578125, 179.51844787597656, 118.01179504394531, 125.39317321777344, 39.09257507324219, 541.10498046875, 232.828125, 200.06712341308594, 212.53067016601562, 197.49398803710938, 449.9974060058594, 344.3947448730469, 776.3243408203125, -11.671785354614258, 158.39437866210938, 285.9985656738281, 237.16725158691406, -219.3016815185547, 246.7356414794922, -141.97015380859375, 710.1710815429688, -33.02132797241211, -42.328670501708984, 506.0871887207031, 240.12635803222656, 223.55157470703125, 112.48915100097656, -11.636537551879883], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000292.npy"} +{"epoch": 0.4287812041116006, "step": 293, "batch_size": 64, "mean": 154.96490478515625, "std": 243.51544189453125, "min": -295.077880859375, "p10": -135.36778564453124, "median": 115.64213180541992, "p90": 451.2542266845703, "max": 909.344970703125, "pos_frac": 0.71875, "sample": [39.31490707397461, 60.67474365234375, 29.145605087280273, 55.09419250488281, 239.87307739257812, 209.7352752685547, 481.0083923339844, 145.9055633544922, 449.2423095703125, 47.022605895996094, 451.3182678222656, -68.20939636230469, 236.45925903320312, 13.57095718383789, 909.344970703125, -267.60015869140625, 311.6972961425781, 197.9483642578125, 42.045501708984375, 282.4296875, 362.2346496582031, -160.28936767578125, 237.42385864257812, -75.98391723632812, 43.768409729003906, 38.96038055419922, -72.06915283203125, 134.16380310058594, -14.691360473632812, 511.21649169921875, 451.10479736328125, 331.6040954589844, 328.3575439453125, 417.8521423339844, 296.40277099609375, -144.1178741455078, 45.86949157714844, 163.58074951171875, 180.60107421875, -7.221151351928711, -114.82439422607422, 88.2726821899414, -8.886962890625, 297.5903625488281, 351.9783935546875, 366.881103515625, 220.2375030517578, 336.65704345703125, 91.10050964355469, -243.01644897460938, -22.371856689453125, -295.077880859375, -157.7848358154297, 468.61285400390625, 836.0484619140625, 97.1204605102539, -138.50967407226562, 149.2635955810547, 62.19968032836914, -101.69328308105469, -128.03671264648438, 360.2841491699219, 518.0690307617188, -51.149681091308594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000293.npy"} +{"epoch": 0.4302496328928047, "step": 294, "batch_size": 64, "mean": 172.348388671875, "std": 235.93817138671875, "min": -309.9860534667969, "p10": -110.36974792480461, "median": 105.10205078125, "p90": 497.7941772460938, "max": 862.931396484375, "pos_frac": 0.875, "sample": [46.23468780517578, 45.199737548828125, 312.35693359375, 417.0404052734375, 90.1424560546875, 26.323253631591797, -164.45046997070312, 140.40380859375, 4.941802978515625, 69.91675567626953, 19.685317993164062, 40.799285888671875, -45.74050521850586, 536.8935546875, 64.29549407958984, 272.2579650878906, 211.51878356933594, 422.0863952636719, 49.76716995239258, 171.52955627441406, 678.0009155273438, -175.01234436035156, 291.1832275390625, 102.70457458496094, 643.7473754882812, 485.2727966308594, 294.1457824707031, 161.42002868652344, 187.18093872070312, -138.0679931640625, 246.98092651367188, 291.1806335449219, 307.19049072265625, 862.931396484375, -217.348876953125, -165.48123168945312, 643.3287353515625, 172.69561767578125, 426.1440124511719, 19.880592346191406, -309.9860534667969, -178.42669677734375, 224.54132080078125, 303.0498962402344, 587.7620239257812, 107.49952697753906, 15.32602310180664, 165.83843994140625, 492.78839111328125, 7.024126052856445, 320.3539123535156, 342.0048522949219, 41.494651794433594, 147.9695281982422, 46.9458122253418, 43.391510009765625, 58.66714096069336, 39.68433380126953, 17.298587799072266, 69.08088684082031, 499.93951416015625, 77.82913208007812, 1.309722900390625, 59.63050079345703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000294.npy"} +{"epoch": 0.43171806167400884, "step": 295, "batch_size": 64, "mean": 191.00411987304688, "std": 254.74459838867188, "min": -323.26336669921875, "p10": -137.48369522094725, "median": 191.28479766845703, "p90": 527.3473999023438, "max": 927.6494140625, "pos_frac": 0.78125, "sample": [-252.50808715820312, -2.436918258666992, 619.3445434570312, 12.830646514892578, -323.26336669921875, 205.30718994140625, -186.55323791503906, 329.8701477050781, 306.4640808105469, -244.08529663085938, 333.08477783203125, -19.975296020507812, 159.73983764648438, 397.84136962890625, 398.7914733886719, 264.5549011230469, 257.15374755859375, 239.00009155273438, -100.91382598876953, 77.79509735107422, 318.51239013671875, -28.89790916442871, 207.92901611328125, 390.7030944824219, 28.757667541503906, 162.22744750976562, 533.4234619140625, -146.6759490966797, 733.88037109375, 190.14627075195312, 412.6797180175781, 49.65220642089844, 3.1736717224121094, -173.77536010742188, 441.533203125, 571.427734375, 71.56145477294922, -116.03510284423828, -254.3517608642578, 79.43370056152344, 243.04934692382812, 375.49285888671875, 374.64947509765625, 107.66154479980469, -51.31456756591797, 62.9964599609375, 238.67884826660156, 90.5022964477539, 210.13299560546875, 566.211669921875, 325.28521728515625, 314.30657958984375, 5.116668701171875, 927.6494140625, 513.169921875, 469.6180725097656, 155.34844970703125, 192.42332458496094, 205.66542053222656, 178.7318878173828, 31.41974449157715, 115.62374877929688, 651.906982421875, -27.410911560058594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000295.npy"} +{"epoch": 0.4331864904552129, "step": 296, "batch_size": 64, "mean": 131.2469024658203, "std": 275.9085388183594, "min": -720.2102661132812, "p10": -221.3554901123047, "median": 138.17303466796875, "p90": 509.07607421875, "max": 788.1875610351562, "pos_frac": 0.734375, "sample": [130.79034423828125, 69.75684356689453, 185.21868896484375, 782.8721923828125, -154.6099853515625, 571.685302734375, -257.51495361328125, -73.68751525878906, -54.65033721923828, -720.2102661132812, 90.02542114257812, 204.326416015625, -372.153076171875, 214.23211669921875, 156.66819763183594, -248.81210327148438, 154.58538818359375, 337.94598388671875, 624.3265380859375, 393.26300048828125, 203.885498046875, 256.7511901855469, 44.80290985107422, -79.42132568359375, 97.87103271484375, 360.3409423828125, 60.13629150390625, -221.0090789794922, 18.8438720703125, -221.5039520263672, 613.662109375, 18.948165893554688, 278.0021057128906, 41.231040954589844, 227.20846557617188, 531.9388427734375, 172.9557647705078, 84.85531616210938, -398.2484436035156, 247.30609130859375, -333.5221862792969, 294.56927490234375, -100.91138458251953, -64.260986328125, 508.35223388671875, 108.9986343383789, 117.43941497802734, 218.36572265625, 145.55572509765625, 174.11550903320312, 90.70496368408203, -6.694786071777344, -7.591209411621094, 788.1875610351562, 166.29930114746094, 226.85916137695312, 93.7767105102539, 60.687095642089844, 509.38629150390625, -84.32266235351562, 167.1721649169922, 162.91305541992188, 431.947509765625, 359.159423828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000296.npy"} +{"epoch": 0.434654919236417, "step": 297, "batch_size": 64, "mean": 159.77444458007812, "std": 213.42166137695312, "min": -282.5634765625, "p10": -48.653538131713866, "median": 115.60364532470703, "p90": 448.447900390625, "max": 674.572509765625, "pos_frac": 0.734375, "sample": [159.6690216064453, 254.17027282714844, 351.0067443847656, 565.1622924804688, 40.48392868041992, -0.16379356384277344, 308.210693359375, 204.04071044921875, -147.21090698242188, -49.92155838012695, 18.656816482543945, 136.48568725585938, 240.57247924804688, 457.69866943359375, 387.03057861328125, -15.889062881469727, -45.69482421875, 31.010467529296875, -112.6318359375, -11.708076477050781, 319.69793701171875, -175.05787658691406, 72.79237365722656, 1.0817089080810547, 561.9644775390625, 92.94027709960938, 293.5522766113281, 88.03228759765625, 216.7940673828125, 339.1690368652344, 181.1514892578125, 109.82028198242188, 451.5799560546875, 202.71043395996094, 441.1397705078125, -10.763736724853516, 63.12516784667969, -20.02739715576172, 549.0946044921875, 276.7366027832031, 3.4754180908203125, 180.72450256347656, -282.5634765625, 374.08270263671875, 86.56331634521484, -40.203224182128906, 121.38700866699219, 61.84161376953125, 674.572509765625, -35.07649230957031, 131.73043823242188, 441.106689453125, 248.09912109375, -14.2525634765625, -257.3648376464844, 358.77978515625, -16.32469940185547, 63.16505432128906, 239.54574584960938, 614.6197509765625, 97.8358154296875, 396.64093017578125, -65.40547943115234, 16.07328987121582], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000297.npy"} +{"epoch": 0.43612334801762115, "step": 298, "batch_size": 64, "mean": 211.74114990234375, "std": 320.229248046875, "min": -681.2748413085938, "p10": -108.95877304077149, "median": 165.17342376708984, "p90": 627.5915405273438, "max": 1450.043212890625, "pos_frac": 0.734375, "sample": [116.06485748291016, -152.11834716796875, 30.49911117553711, -681.2748413085938, 73.83859252929688, -112.13397216796875, 5.34996223449707, 604.5726928710938, 297.7282409667969, 19.84783935546875, 189.87457275390625, 490.58026123046875, -30.743820190429688, -160.12635803222656, -196.13760375976562, 88.80181121826172, 235.4100341796875, 90.70419311523438, 102.81888580322266, 290.6580505371094, 845.2205810546875, -8.920700073242188, 375.9326171875, -203.95285034179688, 668.9693603515625, 272.118896484375, -108.75492858886719, 586.4313354492188, 681.1785888671875, 691.0714721679688, -109.04613494873047, -83.31112670898438, -4.561912536621094, 7.7701568603515625, -39.214454650878906, 635.0018920898438, 672.8978271484375, 130.5837860107422, 407.33685302734375, 184.10443115234375, -6.910127639770508, 3.34710693359375, 1450.043212890625, 262.7469482421875, 610.3007202148438, 185.46914672851562, 491.08184814453125, 422.662109375, -20.27883529663086, 109.33816528320312, 210.70108032226562, 436.89080810546875, 329.63104248046875, 124.21109771728516, 559.5897827148438, 146.24241638183594, -45.660614013671875, 376.0252990722656, -95.18287658691406, 249.37429809570312, 186.56484985351562, 263.2008056640625, 60.847198486328125, 336.12725830078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000298.npy"} +{"epoch": 0.43759177679882527, "step": 299, "batch_size": 64, "mean": 177.01646423339844, "std": 266.0360412597656, "min": -422.37158203125, "p10": -113.37960815429686, "median": 142.68121337890625, "p90": 566.3854736328127, "max": 877.2796020507812, "pos_frac": 0.75, "sample": [-98.61756134033203, -286.45648193359375, -422.37158203125, 379.3682556152344, 57.36822509765625, -45.02530288696289, 369.73468017578125, 130.994140625, 60.546104431152344, 103.89713287353516, 597.2152099609375, 583.8833618164062, 785.6749267578125, -98.47689056396484, 175.1819610595703, 731.6331176757812, -239.40484619140625, 351.4867248535156, 144.32284545898438, 254.68275451660156, -119.7061996459961, 332.9346008300781, 141.03958129882812, 156.99981689453125, 448.6408386230469, 117.48956298828125, 877.2796020507812, -12.957836151123047, 237.00155639648438, -11.84115982055664, -40.37818145751953, 129.52247619628906, -161.13534545898438, 36.85002136230469, 192.41439819335938, 525.5570678710938, 137.49522399902344, 160.0773468017578, -120.77154541015625, 170.80160522460938, 70.76766204833984, 323.63922119140625, 305.42120361328125, -59.371158599853516, 202.95025634765625, -95.73987579345703, -234.27467346191406, 455.2471618652344, 7.4130859375, 793.388427734375, -95.29084014892578, 65.8722152709961, 264.4104309082031, 327.5933532714844, 248.7022705078125, 306.096923828125, 70.81625366210938, 618.1846923828125, 223.37548828125, 135.8903045654297, 72.545166015625, 372.5207214355469, 23.088706970214844, 192.85647583007812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000299.npy"} +{"epoch": 0.4390602055800294, "step": 300, "batch_size": 64, "mean": 220.83203125, "std": 299.98291015625, "min": -418.0345764160156, "p10": -202.52694396972655, "median": 226.97035217285156, "p90": 643.4610290527344, "max": 788.778564453125, "pos_frac": 0.765625, "sample": [331.3648681640625, 302.77911376953125, 134.53550720214844, 342.53167724609375, 648.443603515625, -407.6434326171875, -208.94677734375, 631.8350219726562, 421.79638671875, 23.165157318115234, -96.6637954711914, 350.33984375, 95.39775085449219, 651.4542236328125, 357.2851257324219, 409.2925720214844, 104.19561767578125, 524.6304321289062, 57.23176574707031, 80.68113708496094, -88.3907470703125, 190.57455444335938, 246.92391967773438, -13.781044006347656, 471.9151916503906, 373.95233154296875, -121.8720703125, 103.51612854003906, 196.9787139892578, 298.28863525390625, 72.59278869628906, 671.940185546875, 280.748046875, -418.0345764160156, 525.2275390625, -269.2140197753906, 583.3447875976562, 313.2753601074219, 524.3448486328125, 196.22955322265625, 89.31058502197266, 452.10540771484375, 667.804443359375, 375.33441162109375, -7.611537933349609, 405.1294250488281, -58.7325325012207, -187.54733276367188, 754.5455322265625, 439.5588073730469, 463.4033203125, -332.720947265625, 259.2463073730469, -265.82916259765625, 8.41867446899414, 205.52474975585938, 516.7739868164062, 79.10467529296875, 672.8201904296875, 788.778564453125, -290.21990966796875, 28.561542510986328, 207.01678466796875, -29.792617797851562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000300.npy"} +{"epoch": 0.44052863436123346, "step": 301, "batch_size": 64, "mean": 210.38327026367188, "std": 264.5196838378906, "min": -209.25933837890625, "p10": -67.87928161621093, "median": 159.7206573486328, "p90": 575.5082977294925, "max": 926.3017578125, "pos_frac": 0.796875, "sample": [168.0690155029297, 320.91156005859375, -61.471412658691406, 343.10260009765625, 431.7069396972656, 301.47406005859375, 820.745361328125, 64.34126281738281, 488.53900146484375, -34.76924133300781, 926.3017578125, 364.96661376953125, 52.14966583251953, 252.99143981933594, -139.84046936035156, 153.41677856445312, 98.70653533935547, 602.82275390625, 60.72230529785156, 344.01580810546875, 768.5917358398438, 32.55327224731445, -90.78009796142578, 66.62974548339844, 434.4326477050781, 56.934051513671875, 37.953269958496094, 43.90779113769531, 259.5697326660156, 370.6318054199219, 306.7742919921875, 321.3946533203125, -4.739871978759766, 168.14877319335938, 59.659767150878906, 77.49276733398438, 213.49534606933594, -161.11537170410156, 10.65815544128418, -13.757080078125, 93.65960693359375, 32.062461853027344, 230.936767578125, -95.17717742919922, 49.66157531738281, 916.3878784179688, 305.63751220703125, 248.13796997070312, 664.6815185546875, 26.422149658203125, 511.7745666503906, 166.0245361328125, 262.2304382324219, 489.92718505859375, 486.26800537109375, 73.53268432617188, -69.1859130859375, -207.4108123779297, 621.469482421875, -209.25933837890625, 101.91349792480469, -64.83047485351562, -20.027584075927734, 332.3538513183594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000301.npy"} +{"epoch": 0.4419970631424376, "step": 302, "batch_size": 64, "mean": 175.31198120117188, "std": 236.5364227294922, "min": -231.7837677001953, "p10": -107.33299179077147, "median": 125.2342529296875, "p90": 462.0748809814454, "max": 848.609375, "pos_frac": 0.75, "sample": [-12.943771362304688, 419.90496826171875, 738.871826171875, -7.736843109130859, 213.37942504882812, -8.13654899597168, 235.36978149414062, 12.662689208984375, 187.71624755859375, 7.612152099609375, 238.30776977539062, 19.34942626953125, 74.9332046508789, 91.51996612548828, 156.79248046875, 66.16511535644531, 436.51708984375, -6.273464202880859, 355.01849365234375, 356.0323791503906, 198.3616485595703, 191.17840576171875, 179.44552612304688, 260.15435791015625, -109.48360443115234, -101.60285186767578, 431.1251220703125, 569.942626953125, 96.6626205444336, -231.7837677001953, 244.72854614257812, -75.2021255493164, 119.5474853515625, -28.899215698242188, -102.31489562988281, 516.0572509765625, 848.609375, 471.6588134765625, 130.9210205078125, -111.24459838867188, -57.71551513671875, 424.95684814453125, -231.50860595703125, 225.45944213867188, 398.7926025390625, 41.498252868652344, 117.32414245605469, 659.6351928710938, 242.98765563964844, 349.6693115234375, 23.888254165649414, 279.2855224609375, -160.71292114257812, 320.7174072265625, 90.56388854980469, 258.6011962890625, -119.24964904785156, 439.7123718261719, 86.01182556152344, 648.6448364257812, -149.53878784179688, 50.63200759887695, 111.63297271728516, 95.75457000732422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000302.npy"} +{"epoch": 0.4434654919236417, "step": 303, "batch_size": 64, "mean": 167.9858856201172, "std": 255.2613983154297, "min": -458.0278015136719, "p10": -132.74627227783202, "median": 156.90269470214844, "p90": 560.5878906250001, "max": 683.1484985351562, "pos_frac": 0.703125, "sample": [303.10565185546875, -0.3652076721191406, 78.9000244140625, 650.2935791015625, -131.37213134765625, 373.43023681640625, 534.8480224609375, 653.5825805664062, -20.49339485168457, -87.90638732910156, 400.5132141113281, 191.88677978515625, 313.614501953125, -133.33518981933594, 64.48722076416016, 171.9658203125, 85.98179626464844, 29.10106086730957, -59.69237518310547, 381.52215576171875, -458.0278015136719, -54.86466598510742, -92.95775604248047, 373.3025817871094, 462.5373229980469, 616.9047241210938, -139.28807067871094, 293.52593994140625, -252.67068481445312, 99.21903991699219, -87.39141845703125, 344.64080810546875, 683.1484985351562, 229.582763671875, 506.51263427734375, 270.833251953125, -163.84677124023438, 184.40167236328125, 117.98108673095703, 639.23974609375, 571.6192626953125, -49.79351806640625, 180.04336547851562, 472.39910888671875, 163.0426025390625, 324.9820556640625, 240.3843994140625, 307.4632568359375, 150.76278686523438, -67.6830825805664, 41.017669677734375, -58.63782501220703, 216.93348693847656, 14.946277618408203, 100.69873046875, 615.4763793945312, -160.77813720703125, -35.98039245605469, 18.853656768798828, 42.075809478759766, 252.63243103027344, 9.485687255859375, -139.80435180664062, 168.10653686523438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000303.npy"} +{"epoch": 0.44493392070484583, "step": 304, "batch_size": 64, "mean": 184.502197265625, "std": 170.46478271484375, "min": -284.1424560546875, "p10": -10.14099884033203, "median": 178.8809356689453, "p90": 415.28298950195324, "max": 580.5339965820312, "pos_frac": 0.875, "sample": [39.81819152832031, 277.25164794921875, 296.55426025390625, 93.73961639404297, -7.7712249755859375, -28.515090942382812, 7.910064697265625, 65.63423156738281, 178.45071411132812, 108.79257202148438, 293.7076110839844, 390.948974609375, 179.3111572265625, 337.493408203125, 53.62834167480469, 289.9340515136719, 298.7943420410156, 326.0852355957031, -91.03736114501953, 16.19351577758789, 157.69427490234375, 388.0185546875, 42.26564025878906, 70.59307861328125, 304.68798828125, 102.14077758789062, 300.52984619140625, 42.891815185546875, 327.816650390625, 108.41197204589844, 130.79653930664062, 95.72750854492188, 580.5339965820312, 115.5517349243164, -42.58673858642578, -284.1424560546875, 240.86953735351562, -11.1566162109375, 41.14790344238281, 275.211669921875, -60.095001220703125, 477.53350830078125, 59.24443054199219, 66.22850036621094, 468.84954833984375, 168.90431213378906, 11.85113525390625, 77.29469299316406, 225.91732788085938, 360.7188720703125, 288.137939453125, -94.16368103027344, 184.1127166748047, 433.6474304199219, 425.71185302734375, 70.90888977050781, 261.3125305175781, 223.67630004882812, 427.02130126953125, 502.45361328125, 344.123046875, 237.75399780273438, 288.5048828125, 244.56521606445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000304.npy"} +{"epoch": 0.44640234948604995, "step": 305, "batch_size": 64, "mean": 162.76901245117188, "std": 236.83709716796875, "min": -296.9483337402344, "p10": -85.91730041503905, "median": 114.39013290405273, "p90": 458.6659332275391, "max": 1068.2845458984375, "pos_frac": 0.796875, "sample": [430.0046081542969, 304.91961669921875, 83.03257751464844, 214.5762481689453, 170.81419372558594, 665.6951904296875, 271.3879699707031, 27.53577995300293, 266.13751220703125, 476.38800048828125, 136.5858612060547, 81.99614715576172, 98.7258529663086, -55.5592041015625, -67.91009521484375, 19.538169860839844, 321.4137268066406, 459.7471618652344, 69.0149917602539, 71.67253875732422, 1068.2845458984375, 322.947265625, 24.318889617919922, 280.8582763671875, 314.341796875, 12.538911819458008, 2.9226741790771484, 232.4722442626953, 247.3848419189453, -93.63467407226562, -42.836647033691406, 257.9334716796875, -119.38423156738281, 329.75518798828125, 83.630859375, 137.52310180664062, 539.9095458984375, 308.3555908203125, 15.705720901489258, 156.49114990234375, 130.05441284179688, -27.50292205810547, 89.23887634277344, 80.46160125732422, 576.3175048828125, 543.3403930664062, 254.3340301513672, -45.482269287109375, -270.27398681640625, 135.57293701171875, -281.6495056152344, 456.14306640625, 444.723388671875, 51.32848358154297, -95.64495849609375, 32.15838623046875, -116.76798248291016, 42.9842529296875, 265.3509826660156, -296.9483337402344, 63.909584045410156, 266.76690673828125, -23.399612426757812, 16.96613311767578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000305.npy"} +{"epoch": 0.447870778267254, "step": 306, "batch_size": 64, "mean": 157.5745849609375, "std": 196.25537109375, "min": -367.13897705078125, "p10": -47.228285980224605, "median": 109.64873123168945, "p90": 419.04808044433594, "max": 860.457763671875, "pos_frac": 0.796875, "sample": [13.362384796142578, -19.703990936279297, 90.56661987304688, 24.11309814453125, 34.66455078125, -58.90943908691406, 306.8067321777344, 22.77227020263672, 88.438232421875, -5.609992980957031, 202.24391174316406, 372.2161865234375, 131.07577514648438, 415.328369140625, 63.16325378417969, 74.67513275146484, 35.362918853759766, 456.4422912597656, 70.1847152709961, 45.07429504394531, 79.41063690185547, -55.88677215576172, -40.91937255859375, 429.1218566894531, 246.03271484375, 308.36285400390625, -18.236270904541016, -6.90532112121582, 420.6422424316406, 123.99308776855469, 105.10107421875, 155.65634155273438, 251.02154541015625, 414.04010009765625, 57.99920654296875, 151.2555389404297, 114.1963882446289, -82.69799041748047, 37.3828239440918, 269.819580078125, -367.13897705078125, 323.086181640625, 103.56507110595703, -69.67658996582031, 11.67119026184082, 266.6155700683594, -49.932106018066406, 128.51193237304688, 547.7578125, 497.8104248046875, 187.22412109375, 336.6318359375, 246.6328887939453, -85.85968780517578, 423.84912109375, -7.874042510986328, 94.47259521484375, 29.449913024902344, 264.90606689453125, 122.11117553710938, 860.457763671875, 205.16607666015625, 317.7061767578125, 375.9708557128906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000306.npy"} +{"epoch": 0.44933920704845814, "step": 307, "batch_size": 64, "mean": 160.48297119140625, "std": 205.5437774658203, "min": -429.8045349121094, "p10": -74.17445144653318, "median": 112.9671630859375, "p90": 440.72926940917984, "max": 763.2802124023438, "pos_frac": 0.8125, "sample": [387.2052307128906, 124.94925689697266, 157.82992553710938, 177.2010498046875, 176.474365234375, 151.14134216308594, -119.90142822265625, 55.81951141357422, -81.05409240722656, 567.437744140625, 74.5582275390625, 42.620582580566406, 84.22322082519531, -139.22415161132812, 371.5474853515625, -429.8045349121094, 516.22412109375, -160.3684844970703, 240.33889770507812, -58.12195587158203, 233.34329223632812, 281.89178466796875, 88.24730682373047, 154.88250732421875, 480.4180908203125, 282.5980529785156, 83.53639221191406, 110.36083984375, 75.08416748046875, 453.4730529785156, 108.46837615966797, 458.1215515136719, 73.4466781616211, 71.75212097167969, -54.53849792480469, 111.13076782226562, -45.976959228515625, 71.28962707519531, 393.1002197265625, 410.9937744140625, -21.899986267089844, 344.8119812011719, 39.153221130371094, 268.492919921875, 246.8872833251953, 26.738571166992188, 313.0819091796875, 248.83013916015625, 23.9985294342041, 89.49943542480469, 222.07041931152344, 14.469833374023438, 354.63653564453125, 535.0789794921875, 260.43994140625, 763.2802124023438, 102.33556365966797, -95.41168212890625, 236.70420837402344, 114.80355834960938, -34.00248718261719, 301.22552490234375, 60.73347473144531, -125.76729583740234], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000307.npy"} +{"epoch": 0.45080763582966227, "step": 308, "batch_size": 64, "mean": 181.93313598632812, "std": 187.9801483154297, "min": -204.05499267578125, "p10": -37.27806453704833, "median": 166.92604064941406, "p90": 471.67670593261727, "max": 511.4571533203125, "pos_frac": 0.828125, "sample": [167.16880798339844, 423.07733154296875, -164.54852294921875, 224.45407104492188, 31.584861755371094, 508.90972900390625, -204.05499267578125, -40.63534164428711, 44.6905517578125, 107.60394287109375, 497.97991943359375, 0.5864601135253906, 214.61572265625, 114.38381958007812, 336.5497741699219, 478.1545104980469, -115.40568542480469, 186.52671813964844, 110.47075653076172, 6.547370910644531, 166.6832733154297, 505.9543151855469, 317.90643310546875, 448.28900146484375, -83.33064270019531, 269.4281005859375, 132.59835815429688, 111.17774200439453, 129.30686950683594, -19.203413009643555, 243.7918701171875, 125.50423431396484, 196.59347534179688, 248.84226989746094, 217.7777862548828, 456.56182861328125, 511.4571533203125, 146.86529541015625, -120.50318908691406, 72.784912109375, 73.67977905273438, 344.23626708984375, 314.04119873046875, 166.00778198242188, 154.6599578857422, 497.8224182128906, 226.17489624023438, 45.70790100097656, -8.952095031738281, 361.81072998046875, 503.2022705078125, 275.8133850097656, 173.65115356445312, -14.990461349487305, -29.44441795349121, 67.680419921875, 364.8878173828125, 361.1903076171875, 280.2271728515625, 30.27014923095703, 396.5123596191406, 181.1597900390625, -134.1455078125, 5.372001647949219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000308.npy"} +{"epoch": 0.4522760646108664, "step": 309, "batch_size": 64, "mean": 187.72882080078125, "std": 224.6634979248047, "min": -584.6229248046875, "p10": -69.63152465820312, "median": 174.41378784179688, "p90": 458.5450683593751, "max": 722.2818603515625, "pos_frac": 0.8125, "sample": [171.04702758789062, 77.13032531738281, 107.02344512939453, -94.90322875976562, 282.5778503417969, 177.78054809570312, 124.69417572021484, -17.154260635375977, 542.7493896484375, 31.35284423828125, 256.8533935546875, 432.23077392578125, 94.77066040039062, 518.024169921875, 30.03321075439453, 58.751686096191406, -73.36759948730469, 306.5274963378906, 467.64434814453125, 319.88885498046875, 259.2192077636719, -584.6229248046875, 235.42324829101562, -98.59931945800781, 67.41366577148438, 164.74099731445312, 247.60482788085938, 323.32635498046875, 49.25379943847656, 37.376007080078125, -137.88796997070312, 657.9246215820312, 66.53997802734375, 21.108139038085938, 166.70236206054688, 437.1705322265625, 722.2818603515625, 429.3567810058594, 135.62704467773438, -23.864791870117188, -5.184444427490234, 81.77182006835938, 64.35832214355469, 437.31341552734375, 305.94110107421875, 302.37457275390625, 479.2789306640625, 398.49200439453125, 306.37469482421875, -2.3034095764160156, 424.1830139160156, -172.69923400878906, 229.76181030273438, 313.33465576171875, -120.96121215820312, 80.35844421386719, 509.9768981933594, 423.01123046875, -60.91401672363281, 184.77426147460938, 188.2503662109375, 32.56235122680664, 402.9874572753906, 221.8521270751953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000309.npy"} +{"epoch": 0.45374449339207046, "step": 310, "batch_size": 64, "mean": 173.33135986328125, "std": 186.2578887939453, "min": -265.1800537109375, "p10": -30.06421203613281, "median": 162.25074768066406, "p90": 432.7359130859375, "max": 575.5327758789062, "pos_frac": 0.796875, "sample": [-26.15615463256836, 171.03497314453125, 361.080810546875, -45.57451629638672, 86.62660217285156, 322.24468994140625, 34.10518264770508, -0.47893333435058594, 19.61998176574707, 214.54104614257812, -210.49859619140625, 425.865966796875, 447.8362121582031, -36.361576080322266, 46.68883514404297, 307.78692626953125, 202.7443084716797, 245.74778747558594, 92.08401489257812, 153.46652221679688, -31.739093780517578, 82.08547973632812, 288.372314453125, 84.29743957519531, 152.163818359375, 522.351806640625, 354.56475830078125, -14.82607650756836, 23.07029914855957, 230.57473754882812, -265.1800537109375, 575.5327758789062, -25.273822784423828, 152.86331176757812, 561.4688720703125, 200.79412841796875, 265.3238525390625, -21.480680465698242, -21.321876525878906, 38.58617401123047, 85.11062622070312, 178.81689453125, 332.0533447265625, 235.0816192626953, 43.22235870361328, 311.9674072265625, 435.68017578125, -92.16852569580078, 342.62518310546875, 227.63241577148438, 309.4784240722656, -69.63532257080078, 268.3162536621094, 141.15513610839844, 62.33848571777344, 59.01178741455078, 75.88650512695312, 512.6906127929688, 551.24462890625, 247.7004852294922, 87.43954467773438, 366.92974853515625, 241.0688018798828, 174.9286346435547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000310.npy"} +{"epoch": 0.4552129221732746, "step": 311, "batch_size": 64, "mean": 201.25680541992188, "std": 257.14312744140625, "min": -261.5953369140625, "p10": -110.47822875976561, "median": 170.77682495117188, "p90": 563.5325927734375, "max": 880.1209716796875, "pos_frac": 0.765625, "sample": [474.0645751953125, 393.4759826660156, 423.3760986328125, 5.405769348144531, -144.3044891357422, 558.177490234375, -132.95928955078125, 304.97650146484375, 198.47381591796875, 565.82763671875, 298.559326171875, 88.3843994140625, 351.44024658203125, 761.0247802734375, 303.63995361328125, 142.9100341796875, 515.7208862304688, 129.45928955078125, -0.8264541625976562, 627.014404296875, 188.5150909423828, 210.86074829101562, 527.8035888671875, 227.3570098876953, 43.297203063964844, 152.76988220214844, -54.99549865722656, -179.74923706054688, 198.131103515625, 120.77831268310547, -115.13870239257812, 323.8172302246094, -160.29257202148438, 153.03855895996094, 276.57952880859375, -99.60379028320312, -6.0626678466796875, 880.1209716796875, -28.40362548828125, 273.99334716796875, 20.91345977783203, 87.43970489501953, 237.47134399414062, 43.351806640625, 621.6148681640625, 396.3698425292969, 23.85995864868164, -54.143585205078125, 6.742160797119141, 94.4168701171875, -22.456722259521484, 523.6220703125, 118.76785278320312, 15.419242858886719, -261.5953369140625, 457.1051025390625, 190.14645385742188, 239.72396850585938, 14.388999938964844, 226.50575256347656, 642.4058227539062, -176.5442657470703, -44.39922332763672, 682.6516723632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000311.npy"} +{"epoch": 0.4566813509544787, "step": 312, "batch_size": 64, "mean": 180.94058227539062, "std": 254.26980590820312, "min": -352.5306701660156, "p10": -74.02250289916991, "median": 124.98529052734375, "p90": 495.41860961914074, "max": 1050.4365234375, "pos_frac": 0.828125, "sample": [129.42105102539062, 1050.4365234375, 505.15350341796875, 196.9716796875, 127.71064758300781, 79.84699249267578, -104.77667999267578, 83.52975463867188, 97.01114654541016, 351.11334228515625, -258.14349365234375, 416.6282043457031, 888.0867919921875, 342.50341796875, 238.8301544189453, 253.39730834960938, -203.75701904296875, 242.906982421875, 408.46923828125, 147.64874267578125, 540.5845336914062, 84.84504699707031, 122.25993347167969, 295.1146545410156, 73.51351928710938, 13.28550910949707, 41.214561462402344, 749.5452880859375, 342.7039794921875, 472.703857421875, 104.56863403320312, 411.1335754394531, 182.11679077148438, 106.12391662597656, -144.03152465820312, 92.64533996582031, 165.8885955810547, 520.302734375, -2.9032745361328125, 31.102123260498047, 269.865966796875, 122.24024200439453, -30.140443801879883, 235.51576232910156, 177.75067138671875, 0.6852874755859375, -33.58160400390625, 648.7410888671875, -58.83088684082031, 38.13159942626953, 213.2996826171875, 105.88078308105469, -230.49325561523438, 8.188705444335938, -80.53319549560547, 130.6779022216797, 120.76158142089844, 26.802993774414062, -352.5306701660156, 379.2291564941406, 255.98117065429688, 18.2982177734375, 48.57045364379883, 369.9797668457031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000312.npy"} +{"epoch": 0.4581497797356828, "step": 313, "batch_size": 64, "mean": 182.31651306152344, "std": 248.3165283203125, "min": -420.2560729980469, "p10": -116.57959289550777, "median": 157.30615234375, "p90": 470.58243713378914, "max": 827.203369140625, "pos_frac": 0.8125, "sample": [59.5074462890625, 320.31683349609375, 143.28591918945312, 325.57684326171875, -145.23196411132812, 9.213241577148438, 436.3840026855469, 157.5430908203125, 380.7296447753906, -77.56074523925781, 16.153060913085938, -186.8415069580078, 447.15826416015625, 151.9889678955078, 21.42620277404785, 192.44789123535156, 480.6213684082031, 199.6255645751953, 82.19417572021484, 200.96514892578125, 43.241676330566406, 337.3827209472656, -420.2560729980469, -133.3019561767578, 193.53460693359375, 561.854248046875, 279.78125, 90.26133728027344, 194.65219116210938, 827.203369140625, 343.44647216796875, 142.5164794921875, -12.340360641479492, 164.0637664794922, -250.65066528320312, 122.98579406738281, -37.39263916015625, -415.90240478515625, 414.9588317871094, 251.0156707763672, -12.866539001464844, 222.45059204101562, 57.812496185302734, 295.47235107421875, 306.8353576660156, 145.64207458496094, -10.940757751464844, 308.35137939453125, 295.20098876953125, -135.57968139648438, 445.685302734375, 94.76683044433594, 157.0692138671875, 16.284996032714844, 154.85226440429688, 584.864013671875, 650.7071533203125, 12.569015502929688, 12.850959777832031, 77.60861206054688, 296.1963195800781, 373.0699768066406, 619.7521362304688, 787.0438842773438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000313.npy"} +{"epoch": 0.45961820851688695, "step": 314, "batch_size": 64, "mean": 173.25335693359375, "std": 271.2396240234375, "min": -365.3646240234375, "p10": -141.51600952148436, "median": 130.89640045166016, "p90": 485.02927246093753, "max": 887.8380126953125, "pos_frac": 0.6875, "sample": [764.2523193359375, -300.527587890625, 119.33153533935547, 17.655784606933594, 32.471343994140625, 241.91986083984375, 54.27610778808594, 345.6970520019531, 61.58290481567383, -132.7556915283203, 322.05438232421875, -3.1286468505859375, 242.91314697265625, 98.12107849121094, 183.53692626953125, -365.3646240234375, -163.08323669433594, 29.919754028320312, 694.5687866210938, 765.0856323242188, 32.586639404296875, -33.54682159423828, -147.52902221679688, 88.56013488769531, 181.27459716796875, -175.7891387939453, -32.57218933105469, -141.9693603515625, -47.427940368652344, 422.67169189453125, 452.40191650390625, -1.4244384765625, 273.1623840332031, 489.678466796875, 297.6040954589844, 161.6211700439453, -90.07689666748047, 141.08383178710938, 47.52960205078125, 746.8968505859375, 439.5848693847656, 338.7437744140625, 887.8380126953125, 451.8348388671875, -50.16067886352539, 361.69384765625, -140.45819091796875, 474.18115234375, -23.806732177734375, 236.51290893554688, 312.521728515625, 30.46600341796875, 297.9964599609375, 328.7671203613281, 409.3424987792969, 533.0502319335938, 120.70896911621094, 209.75701904296875, 249.7381134033203, -27.333961486816406, -133.6951141357422, -52.12295150756836, 349.8794860839844, -190.08714294433594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000314.npy"} +{"epoch": 0.461086637298091, "step": 315, "batch_size": 64, "mean": 158.88729858398438, "std": 208.8935089111328, "min": -199.1038818359375, "p10": -63.13768272399902, "median": 126.416748046875, "p90": 459.37054748535166, "max": 700.4134521484375, "pos_frac": 0.75, "sample": [551.4957885742188, -34.74018096923828, 264.945068359375, 6.194427490234375, -43.314205169677734, 52.91007995605469, 57.123138427734375, 139.6239776611328, 278.9600830078125, -37.429931640625, 330.9937744140625, 20.869050979614258, -60.40092849731445, 257.3433837890625, 77.4679183959961, 533.7546997070312, 306.35333251953125, 186.59255981445312, -139.29119873046875, 354.4273376464844, 181.2244873046875, 495.622314453125, 467.6072998046875, 15.658416748046875, -116.15438842773438, 191.10556030273438, 223.6011962890625, 161.61380004882812, 391.29034423828125, 156.64517211914062, 440.1514587402344, 100.92250061035156, 100.45489501953125, 115.75634765625, 195.6613006591797, 323.50830078125, -100.10467529296875, -21.43255615234375, 246.19309997558594, 19.359405517578125, 75.67950439453125, -64.31057739257812, 700.4134521484375, 47.761390686035156, 222.40524291992188, -20.37720489501953, -168.50906372070312, 0.4236927032470703, -60.27442932128906, 610.8419189453125, 101.59097290039062, 209.84812927246094, 626.4784545898438, 137.0771484375, 175.3196258544922, -73.29293823242188, 288.5735168457031, -16.07162094116211, 397.7153625488281, 436.0617370605469, -199.1038818359375, -50.74684143066406, 61.90890884399414, 36.812095642089844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000315.npy"} +{"epoch": 0.46255506607929514, "step": 316, "batch_size": 64, "mean": 224.96531677246094, "std": 250.58079528808594, "min": -245.8822479248047, "p10": -9.84074745178222, "median": 162.79576873779297, "p90": 601.9384765625001, "max": 918.0781860351562, "pos_frac": 0.875, "sample": [328.64111328125, 124.97325134277344, 477.449462890625, 63.272552490234375, 67.2729721069336, 903.3135986328125, 145.69329833984375, 664.416259765625, 443.7734375, 643.1572265625, 374.717041015625, 334.1180419921875, 36.1317138671875, 37.67343521118164, 43.790245056152344, 219.61293029785156, 78.54954528808594, 125.65766906738281, 25.688594818115234, 416.92169189453125, 175.42208862304688, 44.58487319946289, 609.0640258789062, 420.91436767578125, 276.9035339355469, 94.6482162475586, 37.96847915649414, 651.9628295898438, 173.9756622314453, 521.5457763671875, 12.493513107299805, 94.15471649169922, 46.560760498046875, 142.9098663330078, 213.3238525390625, 319.61468505859375, 392.9947509765625, 70.34297943115234, 335.1807861328125, 21.850608825683594, 585.3121948242188, 319.6151428222656, 37.5509033203125, 362.4851379394531, 203.85089111328125, 151.61587524414062, -245.8822479248047, -4.826435089111328, 918.0781860351562, 128.55870056152344, 810.1741943359375, -239.07254028320312, 185.25289916992188, 183.9728240966797, 336.2525634765625, -11.989738464355469, -72.28023529052734, -84.16493225097656, -34.014503479003906, 111.12996673583984, 230.22479248046875, -102.83531951904297, 93.2882080078125, 324.24334716796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000316.npy"} +{"epoch": 0.46402349486049926, "step": 317, "batch_size": 64, "mean": 245.63772583007812, "std": 233.3943328857422, "min": -157.65711975097656, "p10": -42.48965911865234, "median": 231.04463958740234, "p90": 563.2071166992188, "max": 720.0057983398438, "pos_frac": 0.796875, "sample": [354.3599853515625, 54.53949737548828, 101.02799224853516, 458.7669982910156, 452.1285705566406, 77.23849487304688, 429.13677978515625, 388.81451416015625, -157.65711975097656, 569.3294677734375, 702.959228515625, 140.14393615722656, 248.65521240234375, -2.991455078125, 138.22909545898438, 383.279541015625, 206.7770233154297, -11.970390319824219, 109.01367950439453, 400.7760925292969, -41.9866943359375, 346.961181640625, 342.4244689941406, 539.1707763671875, 35.87055587768555, 509.4734191894531, -8.904476165771484, 126.6679916381836, 720.0057983398438, 412.68902587890625, 651.5906372070312, 483.71478271484375, 190.42835998535156, 548.921630859375, 410.24139404296875, -57.08628845214844, 240.54273986816406, 103.47273254394531, 254.85882568359375, 107.34039306640625, 195.435546875, 531.9823608398438, -27.917768478393555, -42.70521545410156, 622.4234008789062, 221.54653930664062, 587.150634765625, 315.0496826171875, 267.8262939453125, -76.94133758544922, 97.06974792480469, 96.56244659423828, 256.65118408203125, -6.042655944824219, -100.88875579833984, 115.32706451416016, 76.65524291992188, -153.22671508789062, 647.4622802734375, 401.52862548828125, 94.85974884033203, 395.7562561035156, -143.8397979736328, 390.1358947753906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000317.npy"} +{"epoch": 0.4654919236417034, "step": 318, "batch_size": 64, "mean": 186.2266387939453, "std": 234.27757263183594, "min": -319.85076904296875, "p10": -62.631797790527344, "median": 159.20938873291016, "p90": 526.458905029297, "max": 852.8662109375, "pos_frac": 0.796875, "sample": [64.5230484008789, -63.07200622558594, 852.8662109375, 100.85198974609375, 246.72763061523438, 121.87702941894531, -57.749122619628906, 180.40982055664062, 120.35381317138672, 667.6541748046875, 25.190149307250977, 108.30499267578125, 315.9975280761719, 538.0452880859375, 575.864501953125, 256.92572021484375, -14.137802124023438, 118.86344909667969, -124.6205062866211, -42.477943420410156, 573.5653686523438, 66.96065521240234, -123.87382507324219, 499.42401123046875, 772.1852416992188, 308.9371337890625, -11.677242279052734, -98.18461608886719, 226.4342498779297, 334.3711853027344, 61.886138916015625, 330.0455627441406, 72.72469329833984, 404.7071838378906, 231.03977966308594, 136.48468017578125, 564.1841430664062, 15.946823120117188, -61.604644775390625, 445.2053527832031, 429.7861328125, 185.97409057617188, 5.266986846923828, -74.0499038696289, 329.442138671875, 207.71444702148438, 119.67879486083984, 174.09194946289062, 166.9113006591797, -45.92962646484375, -312.56451416015625, 33.621498107910156, 226.29852294921875, 201.2122802734375, 356.2892761230469, 370.44500732421875, 236.29214477539062, 138.408447265625, 56.32865905761719, 282.1841125488281, 151.50747680664062, 239.3583984375, -319.85076904296875, 18.928062438964844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000318.npy"} +{"epoch": 0.4669603524229075, "step": 319, "batch_size": 64, "mean": 219.60240173339844, "std": 253.29730224609375, "min": -367.7530822753906, "p10": -107.34338073730468, "median": 227.53179931640625, "p90": 534.9291259765625, "max": 970.5200805664062, "pos_frac": 0.8125, "sample": [301.8963623046875, 158.89198303222656, 297.8945617675781, 265.0700378417969, 67.55793762207031, 271.9200439453125, 356.1446228027344, 468.27099609375, 245.46023559570312, 85.8905258178711, -96.06246185302734, -259.2591857910156, 323.8655090332031, 133.71067810058594, 272.4207458496094, 146.70921325683594, 62.166847229003906, 245.05165100097656, 266.4062194824219, 176.02236938476562, 182.78346252441406, 437.51800537109375, 212.08685302734375, 242.97674560546875, 306.108154296875, 22.714218139648438, 364.1629638671875, 816.449951171875, 425.1189270019531, 373.40185546875, -127.162841796875, 178.68850708007812, 176.5687713623047, 537.6466674804688, -306.40216064453125, 429.3546142578125, -367.7530822753906, -110.77044677734375, -99.34689331054688, 387.01068115234375, 562.82421875, 440.34320068359375, 596.2792358398438, -10.070371627807617, 159.4928436279297, 20.386978149414062, 559.1244506835938, 621.150390625, 269.4964294433594, 528.5881958007812, 970.5200805664062, -137.4399871826172, 151.93899536132812, 435.9188232421875, -34.47417068481445, 38.27838134765625, -211.67254638671875, -47.27056884765625, 358.22210693359375, 160.32005310058594, 145.0286865234375, 205.55996704101562, 245.69818115234375, 155.12538146972656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000319.npy"} +{"epoch": 0.4684287812041116, "step": 320, "batch_size": 64, "mean": 242.403564453125, "std": 263.6028137207031, "min": -271.4315490722656, "p10": -60.97780227661132, "median": 221.56637573242188, "p90": 567.2474914550781, "max": 1061.0784912109375, "pos_frac": 0.8125, "sample": [171.90206909179688, 189.29904174804688, 906.0963134765625, 120.59162902832031, 290.9598693847656, -119.3473129272461, 184.18325805664062, -271.4315490722656, 809.1620483398438, 164.5697479248047, 440.385986328125, 373.2234191894531, -42.43327331542969, 385.8063049316406, 231.49288940429688, 561.7720336914062, 613.0866088867188, 412.4479675292969, 370.63671875, -164.4927978515625, 325.668212890625, 364.01312255859375, -45.99998474121094, 200.17596435546875, 59.710182189941406, 14.858390808105469, 314.97637939453125, 295.1100769042969, -42.06361389160156, 569.5941162109375, -83.81095886230469, 251.62738037109375, -23.920143127441406, 137.28070068359375, 463.9329833984375, 130.40870666503906, 160.27764892578125, 216.9696044921875, 1061.0784912109375, 334.2908020019531, 394.1602783203125, 86.11438751220703, 113.031005859375, -55.207763671875, 656.4837036132812, 59.815162658691406, 376.0074157714844, 219.0248260498047, 3.216432571411133, 25.4625244140625, 354.9186706542969, 389.24725341796875, 306.10968017578125, 432.00543212890625, 497.215087890625, 611.921875, 526.0003662109375, 12.246931076049805, -118.6368408203125, -63.45067596435547, 48.879730224609375, 224.10792541503906, 255.90924072265625, -172.843994140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000320.npy"} +{"epoch": 0.4698972099853157, "step": 321, "batch_size": 64, "mean": 203.71778869628906, "std": 270.4161071777344, "min": -381.04852294921875, "p10": -96.24809265136719, "median": 180.65164184570312, "p90": 555.2553466796876, "max": 981.005615234375, "pos_frac": 0.78125, "sample": [681.4703369140625, 116.54573822021484, 276.34173583984375, 259.6280212402344, -63.068397521972656, 295.74884033203125, 347.0451354980469, 273.3326110839844, 131.84173583984375, 506.84234619140625, 430.14007568359375, 376.47052001953125, 82.02920532226562, 59.20307540893555, -236.9705352783203, 249.6709747314453, 436.85919189453125, 249.5599365234375, 215.17050170898438, 563.7841796875, 744.059814453125, 157.65802001953125, -381.04852294921875, 305.3675231933594, 219.4534912109375, 319.7798156738281, 753.695556640625, -317.3807067871094, -124.51322937011719, 150.59449768066406, 86.67762756347656, 535.354736328125, -93.74581909179688, 379.020263671875, 177.90615844726562, 426.7970886230469, 224.7941131591797, -262.0306396484375, 981.005615234375, 96.99646759033203, -55.6973876953125, 93.29500579833984, 14.422830581665039, 110.275146484375, -30.093727111816406, 142.15480041503906, -97.32049560546875, -15.859058380126953, 148.69992065429688, 253.8889923095703, 693.989501953125, 655.0325927734375, -2.5167198181152344, -311.2030334472656, 183.39712524414062, 191.08001708984375, 153.54571533203125, 167.02342224121094, 2.442201614379883, 469.65924072265625, 169.22930908203125, 284.603759765625, 228.527099609375, -42.725006103515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000321.npy"} +{"epoch": 0.4713656387665198, "step": 322, "batch_size": 64, "mean": 226.1587371826172, "std": 238.70956420898438, "min": -280.9570617675781, "p10": -60.75288429260253, "median": 261.537109375, "p90": 525.302899169922, "max": 791.6328125, "pos_frac": 0.78125, "sample": [-198.65777587890625, 791.6328125, 296.1373596191406, 760.8656005859375, 104.83414459228516, -2.4238319396972656, 511.56988525390625, 377.9096374511719, 306.675048828125, -63.383724212646484, -29.659652709960938, -18.572044372558594, 317.4583740234375, 292.3836669921875, 329.8859558105469, 382.5167541503906, -65.62328338623047, 238.05813598632812, -20.672393798828125, -108.67721557617188, 586.2215576171875, 588.379638671875, 279.03533935546875, 607.1903686523438, 16.22869873046875, 205.91513061523438, 341.85540771484375, 531.1884765625, 16.996620178222656, 469.7724609375, -76.65208435058594, 200.24777221679688, -37.58683395385742, 387.0096435546875, -54.6142578125, 7.300746917724609, 285.39739990234375, 175.20901489257812, 1.3445472717285156, 292.80694580078125, 480.23126220703125, 417.3516845703125, 407.0721435546875, -108.88899993896484, 434.5218811035156, 93.54798889160156, 289.24053955078125, -11.509471893310547, 250.40301513671875, 118.91603088378906, 102.09455108642578, 648.1663208007812, 209.09359741210938, 359.88702392578125, 418.0948791503906, 366.18414306640625, 75.53369140625, 37.51624298095703, 16.762697219848633, -280.9570617675781, 20.638275146484375, 431.16094970703125, 400.9215393066406, 272.67120361328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000322.npy"} +{"epoch": 0.47283406754772395, "step": 323, "batch_size": 64, "mean": 191.94479370117188, "std": 249.8715057373047, "min": -394.6297912597656, "p10": -115.26885681152343, "median": 174.6991195678711, "p90": 538.3981262207031, "max": 835.5398559570312, "pos_frac": 0.8125, "sample": [394.0244445800781, 245.75332641601562, 556.8271484375, -394.6297912597656, 619.4996337890625, -316.7319030761719, 604.141357421875, 835.5398559570312, 73.64131164550781, 106.84691619873047, 272.9020690917969, 35.278594970703125, 165.34329223632812, 135.72132873535156, 15.403205871582031, 287.568359375, 96.84830474853516, 307.8632507324219, -113.86280059814453, 624.9342651367188, -5.578529357910156, 80.10917663574219, 527.8211059570312, -147.5908203125, -147.13148498535156, 373.60015869140625, 128.41514587402344, 156.1738739013672, 436.744140625, 31.38037109375, 502.8029479980469, 245.7191162109375, 456.63079833984375, -41.516754150390625, 317.61138916015625, 22.5880126953125, -30.675552368164062, 176.76004028320312, 307.207275390625, 220.9041748046875, 287.69451904296875, 55.385986328125, 250.8321533203125, 530.5488891601562, -293.7981872558594, 172.63819885253906, 301.5118103027344, 607.3116455078125, 388.0820007324219, -66.85332489013672, 203.34024047851562, 541.7620849609375, -115.87145233154297, 290.6608581542969, -241.36256408691406, 210.80316162109375, 136.962158203125, 237.32025146484375, 68.8225326538086, 309.19976806640625, 55.16582489013672, 18.144054412841797, 137.29681396484375, 33.9832763671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000323.npy"} +{"epoch": 0.47430249632892807, "step": 324, "batch_size": 64, "mean": 180.31451416015625, "std": 291.45660400390625, "min": -397.0083312988281, "p10": -180.06527404785155, "median": 138.67731475830078, "p90": 506.7490875244141, "max": 1006.229736328125, "pos_frac": 0.734375, "sample": [-234.25491333007812, 663.7012939453125, 1.5701904296875, 234.478759765625, -13.415475845336914, 103.4578857421875, 242.56207275390625, 290.0104064941406, 115.13340759277344, 204.80694580078125, 810.027099609375, -116.72506713867188, 133.20849609375, 107.46775817871094, -22.289142608642578, 710.6470947265625, -223.0881805419922, -361.854736328125, 326.73651123046875, 233.97238159179688, 25.78485107421875, 122.30733489990234, -188.74591064453125, 202.1531982421875, 325.5386962890625, 896.2196655273438, 462.67645263671875, -148.5441131591797, 104.78118896484375, 492.2455139160156, 454.1695251464844, 144.14613342285156, 112.58238983154297, 77.22486114501953, 389.1972351074219, 329.3262023925781, 151.47840881347656, 320.3235168457031, 215.78948974609375, -16.70389747619629, 407.608642578125, -397.0083312988281, -205.5003662109375, 411.75360107421875, 512.9649047851562, 401.1115417480469, 128.59071350097656, -371.7575378417969, -159.81045532226562, 93.0481948852539, 274.0424499511719, 73.20714569091797, 268.49755859375, -50.29092025756836, 63.26478576660156, 83.138671875, 325.501953125, -91.98895263671875, -41.685489654541016, 1006.229736328125, 687.6212768554688, 155.9226531982422, -30.770843505859375, 318.3352355957031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000324.npy"} +{"epoch": 0.47577092511013214, "step": 325, "batch_size": 64, "mean": 156.92562866210938, "std": 218.8779296875, "min": -369.1091613769531, "p10": -76.42038269042969, "median": 134.3604278564453, "p90": 430.8413696289063, "max": 794.6691284179688, "pos_frac": 0.828125, "sample": [-353.60577392578125, -369.1091613769531, 214.39065551757812, 31.36402130126953, 348.97515869140625, 279.78265380859375, 151.79452514648438, -73.18061828613281, 193.4151611328125, 290.06103515625, 330.95745849609375, 523.0047607421875, 297.3358459472656, 132.9925079345703, 129.68592834472656, 103.19232940673828, 31.586647033691406, 83.71083068847656, 48.53022766113281, 254.55055236816406, -154.24465942382812, 261.6723327636719, 281.8750305175781, 553.7383422851562, 23.999774932861328, 12.235069274902344, 135.7283477783203, 53.98638153076172, 120.42579650878906, 13.290260314941406, 437.3938903808594, -37.73298645019531, 794.6691284179688, 26.75027847290039, 540.3200073242188, 51.37615203857422, 492.4871826171875, -77.80885314941406, 250.70635986328125, 198.46800231933594, -92.0591812133789, 122.12676239013672, 367.9510803222656, -30.778261184692383, 271.71588134765625, 312.2002258300781, 184.21414184570312, 272.1374816894531, 142.96139526367188, 103.4940185546875, 289.92022705078125, -307.1129150390625, 23.114715576171875, 663.6897583007812, 27.1160888671875, -8.076194763183594, 415.5521545410156, 23.188119888305664, -84.39433288574219, 249.000244140625, 153.0713348388672, 43.32033157348633, 59.85797882080078, 212.25894165039062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000325.npy"} +{"epoch": 0.47723935389133626, "step": 326, "batch_size": 64, "mean": 149.17068481445312, "std": 197.0985107421875, "min": -458.8954162597656, "p10": -25.407769393920894, "median": 137.20726013183594, "p90": 405.162924194336, "max": 633.7904052734375, "pos_frac": 0.828125, "sample": [17.079376220703125, -5.773921966552734, 97.62669372558594, 408.7213134765625, 192.6403045654297, -8.46282958984375, 184.43899536132812, 138.52395629882812, 320.7328796386719, -275.7059326171875, -157.12091064453125, -458.8954162597656, 539.1386108398438, 67.6706771850586, -113.8330307006836, 165.8928985595703, 199.04989624023438, 94.17515563964844, 246.756591796875, 228.09201049804688, 14.043731689453125, 62.682525634765625, 204.36366271972656, 207.65725708007812, -27.311264038085938, 114.86836242675781, 633.7904052734375, 189.08705139160156, 61.82867431640625, 45.512325286865234, 396.8600158691406, 202.58834838867188, 246.6842498779297, 135.89056396484375, 225.01834106445312, 13.40472412109375, 47.295074462890625, 393.00732421875, -20.96628189086914, 102.17312622070312, 121.74916076660156, -50.96661376953125, 69.97645568847656, 168.26693725585938, 19.997802734375, 92.78361511230469, 190.77166748046875, 373.7445983886719, 221.74034118652344, 230.3643341064453, -7.2545623779296875, 423.6743469238281, 593.2276611328125, 83.93172454833984, 302.50811767578125, 186.76669311523438, 472.81085205078125, 274.5261535644531, 11.305486679077148, -279.7701416015625, 431.94232177734375, 329.47137451171875, 48.314151763916016, 107.81584930419922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000326.npy"} +{"epoch": 0.4787077826725404, "step": 327, "batch_size": 64, "mean": 229.86044311523438, "std": 232.9683074951172, "min": -306.2003173828125, "p10": -83.32915802001949, "median": 247.24087524414062, "p90": 522.016146850586, "max": 726.2116088867188, "pos_frac": 0.8125, "sample": [206.07510375976562, 193.1074676513672, 470.66162109375, 196.77259826660156, 195.9616241455078, 345.6296081542969, 57.08557891845703, 293.0146484375, 326.27630615234375, 173.40762329101562, 341.54229736328125, 315.51190185546875, 120.22715759277344, 414.474365234375, -112.39038848876953, 458.2393493652344, -9.296199798583984, 485.9425964355469, 354.8942565917969, 326.05853271484375, -202.21514892578125, 620.583251953125, 549.4075927734375, 336.4615478515625, 144.99440002441406, 531.6759033203125, 290.89453125, -158.21397399902344, 247.2503662109375, 332.0082702636719, 385.577880859375, -158.4544677734375, -28.604110717773438, 158.7992401123047, 345.0595703125, 121.41361999511719, 253.64781188964844, 283.699462890625, 247.23138427734375, 151.16757202148438, 19.487747192382812, 68.43966674804688, -46.606048583984375, 459.5046081542969, 207.63333129882812, -97.69108581542969, -7.393047332763672, 357.8509521484375, 383.4527587890625, 119.78146362304688, 661.5736083984375, 95.90261840820312, 726.2116088867188, -306.2003173828125, 499.4767150878906, -49.8179931640625, 99.52726745605469, 593.1016845703125, 54.33872985839844, 414.3206787109375, 366.3774108886719, 645.366455078125, 106.96512603759766, -266.11669921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000327.npy"} +{"epoch": 0.4801762114537445, "step": 328, "batch_size": 64, "mean": 211.17984008789062, "std": 263.0715637207031, "min": -394.4688720703125, "p10": -81.3355026245117, "median": 187.1659698486328, "p90": 531.0218200683595, "max": 927.60205078125, "pos_frac": 0.71875, "sample": [190.1484375, -129.87118530273438, 202.15170288085938, 179.74276733398438, -32.70654296875, 380.54864501953125, 328.7908630371094, 145.6961212158203, 26.4219970703125, -394.4688720703125, 107.8040542602539, 468.3753967285156, 504.00213623046875, 375.1091003417969, 300.39080810546875, 472.53021240234375, -9.239921569824219, -123.90483093261719, -49.105560302734375, 225.1019287109375, -70.464599609375, -30.925750732421875, 354.6014404296875, 927.60205078125, 220.21115112304688, 356.46270751953125, 357.8139953613281, 212.74356079101562, 683.8719482421875, 364.5820617675781, -97.67825317382812, 184.18350219726562, 294.7550354003906, -85.99446105957031, 62.056182861328125, 113.54147338867188, 315.3499755859375, 415.8055725097656, 153.31277465820312, 141.61270141601562, -8.41246223449707, 191.45321655273438, -110.60630798339844, 238.05789184570312, -22.12889862060547, 496.9981689453125, 114.1595458984375, 260.72760009765625, 291.15582275390625, 182.26058959960938, 733.3486328125, 542.6016845703125, 19.853012084960938, -22.342193603515625, -15.193248748779297, 795.77783203125, 874.7900390625, 68.41793823242188, 634.088134765625, -31.09450912475586, -4.821586608886719, 302.7570495605469, 73.76150512695312, -131.06005859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000328.npy"} +{"epoch": 0.48164464023494863, "step": 329, "batch_size": 64, "mean": 219.3310546875, "std": 287.4814453125, "min": -695.8074951171875, "p10": -91.39138183593748, "median": 206.87662506103516, "p90": 591.0613769531251, "max": 916.3632202148438, "pos_frac": 0.796875, "sample": [131.31161499023438, 414.84466552734375, 565.8111572265625, 339.9556884765625, 330.5800476074219, -116.3260726928711, 514.926025390625, 31.616004943847656, 520.6757202148438, 214.77801513671875, 14.41278076171875, 415.3094787597656, 81.1987075805664, -70.96134948730469, 84.38255310058594, 131.01547241210938, 244.79383850097656, 166.13491821289062, 55.078155517578125, 817.0081787109375, 107.7937240600586, 101.9073486328125, 181.73301696777344, 155.53692626953125, -695.8074951171875, 304.3763732910156, 63.637474060058594, 916.3632202148438, 640.1202392578125, -99.84727478027344, 241.49124145507812, -49.08907699584961, 392.1007385253906, 90.86542510986328, 236.88555908203125, 403.1288757324219, 582.5524291992188, 395.77886962890625, 764.1805419921875, 241.32884216308594, 3.4068946838378906, -71.66096496582031, -179.84068298339844, 622.3096313476562, 504.2807922363281, -156.65966796875, 291.342529296875, -134.03392028808594, 344.61346435546875, 198.97523498535156, 222.9208984375, -51.47279357910156, 237.52207946777344, 255.6669464111328, 73.70978546142578, 48.355201721191406, 270.84417724609375, 467.74761962890625, 797.0767211914062, -5.84185791015625, -334.6088562011719, 176.82650756835938, 594.7080688476562, -0.5819625854492188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000329.npy"} +{"epoch": 0.4831130690161527, "step": 330, "batch_size": 64, "mean": 231.99075317382812, "std": 272.832275390625, "min": -301.3773193359375, "p10": -95.2630905151367, "median": 221.10419464111328, "p90": 507.24488525390626, "max": 925.998779296875, "pos_frac": 0.796875, "sample": [507.5482177734375, -40.46746826171875, -150.52178955078125, -106.11582946777344, 352.7876281738281, 504.49578857421875, 175.67356872558594, 199.08245849609375, -64.0770492553711, 83.35022735595703, -155.12754821777344, 122.12287902832031, 171.34652709960938, 458.50396728515625, 548.2911376953125, 31.83734893798828, 287.8984069824219, 281.6029968261719, 252.84487915039062, 443.8815612792969, -51.95533752441406, 102.30059814453125, 276.9589538574219, 58.34428024291992, 144.56011962890625, 327.9150695800781, -301.3773193359375, 325.9052734375, 232.4055633544922, 191.29031372070312, 421.9158020019531, 1.3176841735839844, -270.4754333496094, 273.4162292480469, 466.2709655761719, 208.5211181640625, 48.42936706542969, 925.998779296875, 36.693763732910156, 130.3629913330078, -18.25214385986328, 474.23541259765625, 137.28887939453125, 919.2200927734375, -231.0321502685547, 863.44970703125, 747.1354370117188, 341.83587646484375, 374.8955078125, 78.11360931396484, 318.91094970703125, 461.64044189453125, 209.80282592773438, 755.2180786132812, -7.651771545410156, 318.74835205078125, 250.895263671875, 506.537109375, -163.37814331054688, 454.9283447265625, 86.39678955078125, 284.278564453125, 300.3741149902344, -69.94003295898438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000330.npy"} +{"epoch": 0.4845814977973568, "step": 331, "batch_size": 64, "mean": 227.93499755859375, "std": 194.73362731933594, "min": -175.59027099609375, "p10": 2.8280784606933906, "median": 213.33824157714844, "p90": 427.2332061767579, "max": 850.4680786132812, "pos_frac": 0.890625, "sample": [166.1044158935547, 756.7725219726562, 283.8758544921875, -12.49945068359375, 271.8310241699219, 397.66156005859375, 114.05242919921875, 42.08775329589844, 239.44703674316406, 188.80926513671875, 730.6722412109375, 264.18231201171875, 850.4680786132812, -10.374015808105469, 136.11451721191406, 344.5065002441406, 592.77392578125, 115.01347351074219, 123.87603759765625, 230.63137817382812, 439.9067687988281, 395.2611083984375, 495.41082763671875, -58.4723014831543, 105.64405822753906, 186.96351623535156, 208.6100616455078, 240.78512573242188, 252.32122802734375, -35.79449462890625, 263.9349060058594, -64.05891418457031, 261.47625732421875, 176.18209838867188, 36.328880310058594, 183.25119018554688, -146.97891235351562, 199.86590576171875, 306.6778259277344, 554.1128540039062, 277.8379211425781, 218.06642150878906, 136.08473205566406, 360.68896484375, 87.05962371826172, 74.63311767578125, 292.0384216308594, 152.26626586914062, 295.4801940917969, 119.81547546386719, -175.59027099609375, 279.0188903808594, 129.57118225097656, 98.28178405761719, 369.7711181640625, 319.94305419921875, 304.4711608886719, 61.86731719970703, 197.2277069091797, 342.3577575683594, 148.8490447998047, 285.40301513671875, 351.6298828125, 33.632965087890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000331.npy"} +{"epoch": 0.48604992657856094, "step": 332, "batch_size": 64, "mean": 165.63372802734375, "std": 239.31832885742188, "min": -197.90390014648438, "p10": -103.47993469238281, "median": 128.06750106811523, "p90": 449.2050994873047, "max": 1059.9190673828125, "pos_frac": 0.75, "sample": [698.9796752929688, 157.3255615234375, 152.86387634277344, 289.8193664550781, 23.57830810546875, -12.027847290039062, 77.29618835449219, 439.30694580078125, 386.6502685546875, 192.79551696777344, -80.40071105957031, 53.26869201660156, -104.611572265625, 1059.9190673828125, 192.26158142089844, 73.34468841552734, 151.452392578125, 146.2694549560547, 142.8328857421875, 2.9025402069091797, 102.68621063232422, 96.16014099121094, 443.2127380371094, -117.37452697753906, 360.84197998046875, 37.07246780395508, -197.90390014648438, 120.37483978271484, 623.9464721679688, 214.07981872558594, 170.6220703125, 180.93838500976562, 270.89068603515625, -59.420082092285156, -66.06629180908203, 78.06128692626953, 508.36712646484375, 565.1856689453125, -100.83944702148438, 237.93190002441406, 135.76016235351562, -159.81387329101562, 384.88519287109375, 71.20881652832031, 306.76336669921875, 678.9718627929688, 105.37730407714844, 451.77325439453125, -78.32608032226562, 76.06315612792969, -173.8966064453125, 275.111328125, 369.5798645019531, -143.23876953125, 337.5663146972656, 68.45997619628906, -94.02558898925781, -9.526531219482422, -113.76713562011719, 161.11959838867188, 282.92645263671875, 114.71541595458984, 110.49925231933594, -70.2227783203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000332.npy"} +{"epoch": 0.48751835535976507, "step": 333, "batch_size": 64, "mean": 214.22947692871094, "std": 300.14874267578125, "min": -339.0847473144531, "p10": -177.1277099609375, "median": 197.18736267089844, "p90": 609.0979919433595, "max": 914.2716064453125, "pos_frac": 0.703125, "sample": [-44.93244934082031, 305.4845275878906, 914.2716064453125, -19.870681762695312, 78.4979019165039, 49.44798278808594, 331.384521484375, 207.67945861816406, 62.2470703125, 51.63523483276367, 817.2557983398438, -169.92425537109375, 184.57785034179688, -274.7633972167969, 528.9158325195312, -224.02835083007812, 639.7682495117188, -93.25762939453125, 639.21240234375, 385.3088684082031, -3.1029701232910156, 310.3483581542969, -25.342178344726562, 348.0110778808594, 441.92974853515625, 554.8391723632812, -180.21490478515625, 108.3663330078125, 7.25579833984375, 439.2688293457031, 146.5115203857422, -2.6907520294189453, -79.1759033203125, 477.4477233886719, 619.2545776367188, 410.5421447753906, -241.62478637695312, 9.754852294921875, 548.0527954101562, 438.1987609863281, 446.80419921875, -293.73822021484375, -15.785125732421875, 507.0599060058594, 476.95611572265625, -13.697242736816406, 234.6239013671875, 155.9098358154297, 776.2474975585938, 275.3443603515625, 485.41375732421875, 269.0826416015625, 585.3992919921875, 188.51708984375, 205.85763549804688, -149.2342987060547, 409.54736328125, -0.4069786071777344, 315.8621826171875, -302.8375244140625, 633.0739135742188, -339.0847473144531, 22.604385375976562, 140.6254425048828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000333.npy"} +{"epoch": 0.4889867841409692, "step": 334, "batch_size": 64, "mean": 190.96356201171875, "std": 305.06640625, "min": -543.4904174804688, "p10": -143.72344665527342, "median": 144.93572235107422, "p90": 655.6058715820313, "max": 891.072998046875, "pos_frac": 0.796875, "sample": [626.0465087890625, 48.75689697265625, -147.03085327148438, 490.10980224609375, 246.43377685546875, 41.209312438964844, -122.18657684326172, 45.155303955078125, 234.87161254882812, 30.09844970703125, 230.06753540039062, 43.26549530029297, 360.979736328125, 248.02362060546875, -159.6116485595703, 200.64988708496094, 66.05459594726562, 435.24969482421875, -543.4904174804688, 675.38134765625, 51.9267463684082, 222.08425903320312, -52.80732727050781, -288.9051208496094, 370.84075927734375, 262.648681640625, 495.26202392578125, -403.70635986328125, -227.92311096191406, -17.500350952148438, 35.223785400390625, -119.808349609375, 126.65925598144531, 57.501220703125, 791.8237915039062, 891.072998046875, 291.0646057128906, 15.683496475219727, 50.76321029663086, 518.6268310546875, 89.76195526123047, 242.19158935546875, 546.5099487304688, 410.97686767578125, -92.85420227050781, 81.9720458984375, 809.4134521484375, 24.187530517578125, 767.9150390625, 466.49072265625, -136.00616455078125, -355.01739501953125, 26.90643310546875, 248.67654418945312, 199.3628692626953, 668.274169921875, 487.2865295410156, 101.11601257324219, 51.49449157714844, 346.1928405761719, 68.48582458496094, 691.4879150390625, 193.0653533935547, 163.21218872070312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000334.npy"} +{"epoch": 0.49045521292217326, "step": 335, "batch_size": 64, "mean": 150.09857177734375, "std": 276.1180725097656, "min": -312.0198669433594, "p10": -207.45296630859372, "median": 129.47682189941406, "p90": 447.41028442382816, "max": 1013.5608520507812, "pos_frac": 0.703125, "sample": [166.80340576171875, 387.6154479980469, 432.8692626953125, 6.4346923828125, 182.56993103027344, -74.92044830322266, 31.638254165649414, 426.1767272949219, 72.78575134277344, 35.709434509277344, -240.88099670410156, 19.827072143554688, 278.9903564453125, 248.03004455566406, -54.96013641357422, 717.3401489257812, -22.91236114501953, 51.342201232910156, -247.09234619140625, 230.14227294921875, -132.79949951171875, 32.90351867675781, 223.89923095703125, 152.84951782226562, 692.9884033203125, 581.1749877929688, 388.9554138183594, 293.1260986328125, 11.697572708129883, 657.1699829101562, 1013.5608520507812, -284.0589904785156, 136.74368286132812, -312.0198669433594, 392.7287902832031, -29.619516372680664, 395.7568359375, 52.41932678222656, 68.91195678710938, 568.47119140625, -113.2655029296875, 350.6194152832031, -188.08346557617188, 122.2099609375, 282.4351501464844, -104.0693359375, 240.92291259765625, -23.803314208984375, -266.99835205078125, -130.87078857421875, 137.37295532226562, -140.9062042236328, 390.65203857421875, -256.10400390625, -123.14398956298828, 378.2601013183594, 273.741943359375, 453.64215087890625, 5.9597320556640625, -215.75418090820312, 308.07928466796875, 365.3263854980469, 214.58685302734375, 93.13143920898438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000335.npy"} +{"epoch": 0.4919236417033774, "step": 336, "batch_size": 64, "mean": 241.56228637695312, "std": 346.3768005371094, "min": -635.6212768554688, "p10": -99.02403411865234, "median": 219.5210952758789, "p90": 754.526531982422, "max": 1147.7218017578125, "pos_frac": 0.828125, "sample": [289.3422546386719, 64.83197021484375, 953.742919921875, -45.45978546142578, 404.1767883300781, 232.45074462890625, 277.3003845214844, -127.17391204833984, -79.70770263671875, 873.2025146484375, 302.24700927734375, 332.183837890625, 144.77496337890625, 453.3960266113281, 1147.7218017578125, -52.34521484375, 750.3787841796875, 374.4394836425781, 213.29029846191406, 397.65533447265625, -100.84288024902344, 75.22166442871094, 121.08210754394531, 284.1637878417969, 160.98492431640625, 394.8376159667969, 129.12405395507812, 29.970279693603516, 769.0692138671875, 225.75189208984375, 200.28248596191406, 62.01611328125, 39.09761047363281, 235.66622924804688, 109.14671325683594, 8.6673583984375, -461.7095947265625, 742.5093383789062, -94.78005981445312, -221.09356689453125, 756.3041381835938, 1.9606399536132812, 818.1976318359375, 65.70915222167969, 456.869873046875, 74.86431884765625, -635.6212768554688, 94.0455551147461, 343.52996826171875, 329.4278564453125, 119.37858581542969, 126.26187896728516, 182.15042114257812, 56.668495178222656, 256.6804504394531, 509.148193359375, 506.5269775390625, 1037.4013671875, -346.94610595703125, 338.24658203125, -459.16876220703125, 378.8330078125, 294.87274169921875, 539.0301513671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000336.npy"} +{"epoch": 0.4933920704845815, "step": 337, "batch_size": 64, "mean": 239.69003295898438, "std": 273.3504638671875, "min": -267.20697021484375, "p10": -126.67315139770507, "median": 199.48836517333984, "p90": 606.6661804199221, "max": 1031.8460693359375, "pos_frac": 0.8125, "sample": [928.962646484375, 285.0145568847656, -164.7924346923828, 103.5544662475586, 563.6907348632812, 520.18310546875, -58.524940490722656, 279.30126953125, 23.827281951904297, 429.9686279296875, 12.037605285644531, 248.30062866210938, 306.80548095703125, 415.07965087890625, 392.2155456542969, -122.6221694946289, 230.80442810058594, 177.94003295898438, -128.40928649902344, 519.8432006835938, 177.8056182861328, 195.48626708984375, 11.029012680053711, 632.8392333984375, 173.11439514160156, -139.594970703125, 1031.8460693359375, 75.59765625, -167.76760864257812, -163.99652099609375, 217.016357421875, 196.66639709472656, 279.86822509765625, -185.8261260986328, 186.9737091064453, 140.31040954589844, 503.7095642089844, -32.44804382324219, 293.20281982421875, 367.53387451171875, -22.470020294189453, 707.2698364257812, 85.7393798828125, 182.05908203125, -267.20697021484375, 142.72555541992188, 837.6836547851562, 523.51513671875, 354.10528564453125, 60.506690979003906, 202.31033325195312, 120.64295196533203, 625.084228515625, 360.521728515625, 145.9740753173828, 670.430419921875, 369.88055419921875, 193.47247314453125, 110.3349838256836, 370.27178955078125, 287.7416687011719, 268.3658142089844, -76.29008483886719, 330.9473571777344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000337.npy"} +{"epoch": 0.4948604992657856, "step": 338, "batch_size": 64, "mean": 232.6099853515625, "std": 230.09146118164062, "min": -226.08120727539062, "p10": -15.663192749023429, "median": 202.8077392578125, "p90": 531.3968566894531, "max": 804.096435546875, "pos_frac": 0.875, "sample": [84.23390197753906, 533.0377197265625, 102.65052795410156, -213.77838134765625, 210.17929077148438, 128.38308715820312, -109.48028564453125, 244.7578887939453, 667.40625, 244.33432006835938, 108.45771789550781, 76.64982604980469, -108.46295166015625, 515.5517578125, 114.43194580078125, 320.179931640625, 51.457489013671875, 506.5989685058594, 435.4817199707031, 549.32958984375, 678.1541748046875, 13.411460876464844, 248.5391845703125, 213.0561065673828, 283.45343017578125, 14.500473022460938, 439.1395263671875, -19.46672821044922, -31.20825958251953, -6.788276672363281, 233.01467895507812, 512.5732421875, 468.7191467285156, 103.40057373046875, 233.2337646484375, 399.6445617675781, 215.03427124023438, 56.62137222290039, 375.0640563964844, 135.2493133544922, 397.7383117675781, 45.89704895019531, 70.62063598632812, 102.65303039550781, 506.60009765625, 34.175933837890625, 195.43618774414062, 136.05987548828125, 353.60711669921875, 158.08995056152344, 17.98394775390625, 36.82919692993164, 566.284423828125, 804.096435546875, -55.67394256591797, 245.767578125, 508.6734619140625, 182.56622314453125, 360.31341552734375, 157.6590576171875, 662.1586303710938, -226.08120727539062, 71.26921081542969, 527.5681762695312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000338.npy"} +{"epoch": 0.49632892804698975, "step": 339, "batch_size": 64, "mean": 229.14202880859375, "std": 247.76565551757812, "min": -250.45767211914062, "p10": -70.49854431152343, "median": 211.84536743164062, "p90": 607.7530883789062, "max": 829.6860961914062, "pos_frac": 0.828125, "sample": [42.671600341796875, -120.46021270751953, -73.9652099609375, -42.55543518066406, 636.1575927734375, 35.84742736816406, 55.12837219238281, 181.1865997314453, 351.845947265625, 28.53443145751953, 829.6860961914062, 59.51002502441406, 89.80020141601562, 159.61087036132812, 358.99951171875, 353.7117919921875, 15.131139755249023, 17.80931854248047, 78.08118438720703, 609.4442138671875, 249.95115661621094, -12.571525573730469, 266.5998229980469, -67.6163101196289, 237.72561645507812, 121.75869750976562, 685.2430419921875, 27.855209350585938, 361.39306640625, 655.20263671875, -151.75592041015625, 261.28643798828125, -250.45767211914062, 159.57464599609375, 476.2535705566406, 333.1395263671875, 177.27932739257812, -25.775049209594727, 271.6036376953125, 486.5636901855469, 152.19337463378906, 257.77825927734375, 454.7303466796875, 529.2811279296875, 127.5705337524414, 637.6309204101562, -164.41729736328125, 693.5616455078125, 365.2469787597656, -198.62672424316406, 223.5390625, 200.15167236328125, 54.23838806152344, 243.78213500976562, 60.880733489990234, 197.52525329589844, 282.50579833984375, -71.7337875366211, 514.8443603515625, 343.79840087890625, 531.1136474609375, 603.80712890625, 415.87744140625, 280.3814392089844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000339.npy"} +{"epoch": 0.4977973568281938, "step": 340, "batch_size": 64, "mean": 195.61285400390625, "std": 276.29559326171875, "min": -455.92327880859375, "p10": -132.54623336791988, "median": 164.7219467163086, "p90": 565.9695007324219, "max": 975.591552734375, "pos_frac": 0.78125, "sample": [168.87042236328125, 252.19985961914062, 94.85028839111328, 204.8125, 210.58140563964844, 492.2174377441406, 652.5108642578125, -455.92327880859375, 275.80035400390625, -62.788963317871094, 354.9290771484375, 975.591552734375, 135.20913696289062, -257.2806701660156, 571.599609375, -77.02721405029297, 304.78143310546875, 126.88903045654297, -44.26618194580078, 62.031185150146484, 182.71824645996094, -195.23873901367188, -355.8260803222656, 552.8325805664062, -152.1031036376953, 137.42477416992188, 38.705955505371094, 419.6033935546875, -34.268463134765625, 314.2471618652344, 521.2552490234375, 83.91285705566406, 859.2498168945312, 662.6228637695312, 5.284421920776367, 160.57347106933594, 118.82989501953125, 250.62774658203125, 133.3632354736328, 198.86605834960938, 224.53079223632812, 126.35409545898438, 211.21261596679688, -14.458560943603516, -158.27186584472656, 308.7403259277344, 253.8404541015625, -86.91353607177734, 524.1083374023438, 7.0615081787109375, 139.06924438476562, 436.50750732421875, 607.20458984375, 602.2591552734375, 368.56207275390625, 207.73226928710938, 130.64002990722656, 45.481449127197266, 387.4795227050781, 154.83920288085938, -275.072265625, 406.6091613769531, -55.544830322265625, 78.98235321044922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000340.npy"} +{"epoch": 0.49926578560939794, "step": 341, "batch_size": 64, "mean": 225.42550659179688, "std": 265.3753967285156, "min": -302.9471435546875, "p10": -72.36225662231445, "median": 204.67798614501953, "p90": 541.249090576172, "max": 1200.242919921875, "pos_frac": 0.796875, "sample": [1200.242919921875, -94.2799072265625, -77.35621643066406, -211.06199645996094, 406.3830871582031, 553.3726806640625, 697.4271240234375, 488.5329895019531, 206.3834686279297, 202.97250366210938, -5.543245315551758, 173.47918701171875, 132.75894165039062, 266.7592468261719, 28.693464279174805, -56.96946716308594, -55.82838439941406, 339.12860107421875, 69.7442855834961, 294.32257080078125, -145.28515625, 386.8619384765625, 426.71826171875, 257.49774169921875, 71.04598236083984, 584.6060791015625, 159.43093872070312, 197.68991088867188, -21.336166381835938, -77.18901062011719, 155.00607299804688, 365.75933837890625, 616.345458984375, 297.9347229003906, 279.1236572265625, 439.0452575683594, 92.71473693847656, 24.471277236938477, -61.099830627441406, 446.06634521484375, 489.99102783203125, 274.85052490234375, 64.64720153808594, 109.09821319580078, -302.9471435546875, 551.3076782226562, 68.97065734863281, 234.83470153808594, -265.3128662109375, 361.88507080078125, 216.21600341796875, 267.34521484375, 415.0831298828125, -59.239173889160156, 769.5956420898438, 17.820236206054688, 345.15911865234375, 450.3446044921875, 433.9737548828125, 46.2474479675293, 138.18356323242188, 110.0634536743164, 517.779052734375, 116.76537322998047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000341.npy"} +{"epoch": 0.5007342143906021, "step": 342, "batch_size": 64, "mean": 199.92034912109375, "std": 274.50823974609375, "min": -589.0936889648438, "p10": -89.56423645019531, "median": 162.89828491210938, "p90": 507.3732727050783, "max": 998.4908447265625, "pos_frac": 0.75, "sample": [715.7659301757812, -589.0936889648438, -200.48300170898438, 544.6407470703125, 62.662479400634766, 36.086639404296875, 156.4041748046875, 378.08258056640625, 346.6519775390625, 324.3025817871094, 147.3892059326172, 103.94651794433594, 201.83953857421875, 193.1957244873047, 527.299560546875, 304.2987060546875, 88.81779479980469, 449.6796569824219, -185.49240112304688, -35.82151794433594, 409.78094482421875, -78.5289535522461, 79.31924438476562, -86.1429672241211, -91.0304946899414, 317.9162292480469, 990.0318603515625, 364.65777587890625, 115.51573181152344, -243.5421600341797, -63.93940353393555, 998.4908447265625, 212.06915283203125, 198.52420043945312, -2.6533660888671875, -65.8255844116211, 560.0706176757812, 440.9184875488281, 577.7898559570312, 136.24012756347656, -19.764097213745117, 166.70269775390625, 351.5362854003906, -39.887298583984375, 442.1194152832031, 381.326904296875, 159.0938720703125, 386.4883728027344, 107.15997314453125, 416.42938232421875, 457.1092224121094, 101.8538589477539, -143.74761962890625, 109.87956237792969, 367.1031494140625, -97.95024871826172, 168.5762481689453, -81.13703155517578, 189.94427490234375, 123.21839904785156, 226.62551879882812, 460.87860107421875, 137.69491577148438, 83.8119125366211], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000342.npy"} +{"epoch": 0.5022026431718062, "step": 343, "batch_size": 64, "mean": 242.99090576171875, "std": 215.8848114013672, "min": -346.01031494140625, "p10": 15.957244110107437, "median": 225.85455322265625, "p90": 509.2142028808595, "max": 833.1605834960938, "pos_frac": 0.921875, "sample": [380.3824157714844, 197.71722412109375, 189.8005828857422, 139.31512451171875, 238.91470336914062, 9.619110107421875, 313.8232421875, 345.66839599609375, 197.92572021484375, 703.9063110351562, 137.08753967285156, -148.32000732421875, 267.24786376953125, 82.41574096679688, 195.02590942382812, 90.19927978515625, 70.9078369140625, 243.4319305419922, 240.81613159179688, 588.8370361328125, 183.35365295410156, 284.5170593261719, 65.18232727050781, 301.2117614746094, 399.60528564453125, 335.2684631347656, 136.95314025878906, 110.82150268554688, 833.1605834960938, 203.4805145263672, 30.74622344970703, 92.52915954589844, 342.9205017089844, 59.398193359375, 268.83062744140625, 334.9310302734375, 244.40298461914062, 92.32200622558594, 6.311031341552734, 443.8487243652344, -346.01031494140625, 321.8801574707031, 163.87979125976562, 425.27716064453125, 317.7696228027344, 768.3525390625, 422.180908203125, 313.61846923828125, 266.6272277832031, 325.8477783203125, 187.17730712890625, 383.5408630371094, 50.75115966796875, -99.94927978515625, 487.57867431640625, 518.486572265625, 179.28965759277344, 164.0310821533203, 185.72299194335938, 212.79440307617188, -38.43262481689453, 643.7760009765625, 621.0908203125, -178.37994384765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000343.npy"} +{"epoch": 0.5036710719530103, "step": 344, "batch_size": 64, "mean": 206.59829711914062, "std": 286.664794921875, "min": -272.1086120605469, "p10": -46.819253540039064, "median": 167.5109100341797, "p90": 556.1293334960938, "max": 1542.0396728515625, "pos_frac": 0.796875, "sample": [37.47114562988281, -74.33667755126953, -44.37937927246094, -12.806312561035156, 52.9300422668457, 74.44680786132812, 160.1852264404297, 110.4963150024414, 78.11216735839844, 256.71661376953125, 4.944368362426758, 858.64794921875, 102.0177230834961, 229.69749450683594, 85.97660827636719, 163.31414794921875, 215.3296661376953, -253.72935485839844, -272.1086120605469, 275.374755859375, 265.13177490234375, 53.91810607910156, 187.1180877685547, 4.909778594970703, 214.39190673828125, 552.8023681640625, 622.9296264648438, 411.4470520019531, 95.12002563476562, -20.899169921875, 392.8197326660156, 235.98779296875, 303.6985778808594, 72.33761596679688, 189.73541259765625, -46.969757080078125, 344.24847412109375, -46.46807861328125, 260.01556396484375, -4.01317024230957, 561.0806884765625, 278.3226013183594, 175.8265838623047, 92.79912567138672, 77.13333892822266, 1542.0396728515625, 412.1560363769531, 297.3023376464844, 6.540666580200195, 638.4414672851562, -23.853172302246094, 498.6276550292969, 171.70767211914062, 563.0457763671875, 93.07844543457031, -195.10060119628906, -156.23394775390625, 474.63482666015625, 557.55517578125, -216.16305541992188, 261.971923828125, 381.1175231933594, 88.72541809082031, 504.9723205566406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000344.npy"} +{"epoch": 0.5051395007342144, "step": 345, "batch_size": 64, "mean": 234.78372192382812, "std": 278.74237060546875, "min": -406.66937255859375, "p10": -57.9231948852539, "median": 188.32535552978516, "p90": 611.0756835937501, "max": 950.9000244140625, "pos_frac": 0.796875, "sample": [-29.96521759033203, 628.5743408203125, 143.65631103515625, 421.6838684082031, 141.88832092285156, -208.68695068359375, 735.4859619140625, -217.9757843017578, 178.32281494140625, 472.23834228515625, -26.628957748413086, 308.40606689453125, 138.8992919921875, 423.0138244628906, 570.2454833984375, 80.81796264648438, -355.8051452636719, 346.4570617675781, 60.64081954956055, 88.87391662597656, 415.1438903808594, 417.7447509765625, 82.71650695800781, 367.64019775390625, 234.0072021484375, 552.0321044921875, 105.00313568115234, 321.40667724609375, 117.76712036132812, 195.0669708251953, 12.45489501953125, 866.3322143554688, 420.4962158203125, 23.483688354492188, 99.802734375, 387.0102233886719, 444.43109130859375, 476.8306884765625, 382.14349365234375, 251.5159454345703, 407.48065185546875, 950.9000244140625, 53.36947250366211, 124.56256103515625, 384.32244873046875, 255.90982055664062, -23.922252655029297, 752.5687255859375, 154.50656127929688, 450.1622314453125, -61.213645935058594, -96.91666412353516, 235.78799438476562, 660.0609741210938, -12.508939743041992, -153.75082397460938, -50.24547576904297, 129.42550659179688, 686.3099365234375, 181.583740234375, 218.04351806640625, 135.66473388671875, -406.66937255859375, -22.446487426757812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000345.npy"} +{"epoch": 0.5066079295154186, "step": 346, "batch_size": 64, "mean": 223.27017211914062, "std": 262.3050537109375, "min": -581.8436279296875, "p10": -101.6506507873535, "median": 237.97847747802734, "p90": 582.5382690429689, "max": 774.1378173828125, "pos_frac": 0.796875, "sample": [312.67779541015625, 168.93336486816406, 61.95824432373047, 291.80853271484375, 670.8939208984375, 528.1800537109375, 668.3870849609375, 228.55621337890625, 168.7998504638672, 760.7730102539062, -581.8436279296875, 93.21733093261719, 405.96044921875, 9.67642593383789, 360.2664489746094, 664.0234375, -20.824462890625, 326.0106201171875, 774.1378173828125, 13.694215774536133, 382.29168701171875, 180.74850463867188, -139.99578857421875, 392.0516357421875, 333.7622375488281, 294.78076171875, 325.10003662109375, 243.69866943359375, 404.8995361328125, 144.69003295898438, 8.423643112182617, -106.32662200927734, 212.95201110839844, 84.02534484863281, 613.33251953125, 232.25828552246094, 263.586669921875, 130.7667236328125, 291.73468017578125, -160.44996643066406, -31.171669006347656, 595.9351806640625, 308.5205993652344, 538.5280151367188, 160.7188262939453, 377.86224365234375, 418.238037109375, 359.506103515625, -18.638935089111328, -67.04766845703125, 257.5100402832031, -65.33622741699219, 66.10690307617188, -199.37196350097656, 149.4930877685547, 551.27880859375, 116.09115600585938, 138.91583251953125, 472.0673522949219, -200.45138549804688, -90.74005126953125, -169.24041748046875, 289.54254150390625, 293.3576965332031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000346.npy"} +{"epoch": 0.5080763582966226, "step": 347, "batch_size": 64, "mean": 222.094482421875, "std": 288.4271240234375, "min": -362.2097473144531, "p10": -104.44055175781249, "median": 180.3140640258789, "p90": 607.3411437988283, "max": 906.79248046875, "pos_frac": 0.765625, "sample": [-362.2097473144531, 273.6938171386719, 452.1251220703125, 370.8460998535156, 75.13335418701172, -34.343971252441406, 191.42355346679688, 304.13018798828125, 708.7015380859375, 9.980400085449219, 468.96942138671875, 32.14906311035156, 198.8563995361328, -109.92556762695312, 418.9510498046875, -261.75799560546875, -277.954833984375, -91.64218139648438, 798.8404541015625, 105.8744125366211, 496.8995056152344, 56.106204986572266, 172.68853759765625, 318.56646728515625, -199.11273193359375, 535.0902709960938, 569.693115234375, 343.8525390625, -7.850700378417969, 709.7235107421875, 338.37225341796875, 692.3095092773438, 328.2703857421875, -57.497474670410156, 223.283447265625, -70.54680633544922, 501.550048828125, 153.14013671875, 118.81256103515625, -82.04911804199219, 145.60110473632812, 290.9647521972656, 86.68651580810547, 334.556396484375, 159.02865600585938, 66.63943481445312, 120.51959228515625, -28.32508087158203, 906.79248046875, 89.46414947509766, 365.87353515625, 623.4760131835938, 187.93959045410156, 844.5555419921875, -86.6343765258789, 36.855201721191406, 370.4533386230469, -209.0651397705078, 462.77093505859375, 2.96466064453125, 466.3639831542969, -134.80821228027344, 151.1586151123047, 547.0722045898438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000347.npy"} +{"epoch": 0.5095447870778267, "step": 348, "batch_size": 64, "mean": 187.81101989746094, "std": 270.87396240234375, "min": -363.18511962890625, "p10": -127.00883560180662, "median": 188.74897003173828, "p90": 458.00883483886724, "max": 1167.1175537109375, "pos_frac": 0.765625, "sample": [300.0367126464844, 79.13262176513672, 157.44174194335938, 428.04278564453125, 326.1592102050781, -115.8089370727539, 60.11890411376953, 364.3653869628906, 373.88958740234375, -363.18511962890625, 85.53770446777344, 68.18896484375, 633.8875732421875, 251.04122924804688, 126.69551086425781, 322.2403564453125, 9.74940299987793, 353.5805358886719, 123.62872314453125, 142.59201049804688, 334.1405944824219, 329.27947998046875, -100.47856903076172, 1167.1175537109375, 463.8838195800781, 257.09906005859375, -41.241600036621094, 255.54653930664062, -45.42974853515625, 61.78374481201172, -65.44172668457031, -185.90757751464844, -165.60916137695312, 849.7902221679688, 626.219482421875, 334.08087158203125, 340.1655578613281, 280.357666015625, 248.71119689941406, 1.5337047576904297, -273.1419677734375, 438.28594970703125, -39.21833038330078, 327.6294860839844, 267.3740234375, 248.01449584960938, -189.6295166015625, 28.311128616333008, 574.129150390625, 244.6401824951172, 141.15585327148438, -81.71806335449219, 444.300537109375, -272.66741943359375, 361.6112060546875, 220.0561981201172, 73.97748565673828, 15.19351577758789, 29.60760498046875, 559.4339599609375, 87.71505737304688, 329.8070983886719, -131.8087921142578, -56.08981704711914], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000348.npy"} +{"epoch": 0.5110132158590308, "step": 349, "batch_size": 64, "mean": 217.50335693359375, "std": 306.0675048828125, "min": -475.1156311035156, "p10": -101.57076568603513, "median": 139.35480117797852, "p90": 572.3873413085938, "max": 1244.89794921875, "pos_frac": 0.8125, "sample": [115.1472396850586, -347.546630859375, 481.9090270996094, 117.79792022705078, 105.83285522460938, 118.08486938476562, 464.1806640625, -113.33138275146484, 301.5773010253906, 451.5367736816406, 157.56085205078125, 510.467529296875, 124.91922760009766, -39.132102966308594, 28.788557052612305, 88.30290222167969, -33.52393341064453, 10.448997497558594, 267.22113037109375, -170.16900634765625, 533.7699584960938, 367.2481384277344, 353.5882263183594, 17.546850204467773, 347.1381530761719, 2.361858367919922, 432.79345703125, -74.12932586669922, 120.6245346069336, 195.31881713867188, 798.7991333007812, 11.050407409667969, -51.56449508666992, 521.2607421875, 196.1505126953125, -475.1156311035156, -169.02255249023438, 105.30204010009766, 604.271728515625, 59.830265045166016, 743.2655029296875, 10.107879638671875, -247.45477294921875, -44.55085754394531, 555.8693237304688, 610.2361450195312, 949.9808349609375, 191.28456115722656, 579.4664916992188, 153.79037475585938, 410.99627685546875, 428.15936279296875, 8.837760925292969, 336.0804443359375, 283.98284912109375, 260.5033264160156, -163.83132934570312, 76.61701965332031, 8.304454803466797, 482.6034240722656, 392.1660461425781, 37.7109375, 73.89422607421875, 1244.89794921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000349.npy"} +{"epoch": 0.5124816446402349, "step": 350, "batch_size": 64, "mean": 210.67300415039062, "std": 290.224609375, "min": -559.404052734375, "p10": -168.65575714111327, "median": 202.6007537841797, "p90": 552.2191833496096, "max": 919.5656127929688, "pos_frac": 0.8125, "sample": [781.247802734375, -393.1530456542969, 127.42456817626953, 406.3968200683594, 318.8140563964844, 196.0748291015625, 469.2659912109375, 90.33535766601562, -129.0419464111328, 122.57844543457031, 100.57023620605469, 410.20953369140625, -466.1943359375, 133.98313903808594, 200.71319580078125, -1.860015869140625, 658.7420043945312, 502.51837158203125, 919.5656127929688, 269.7825927734375, 189.19668579101562, 216.2998809814453, -31.001150131225586, 473.0497131347656, 252.51397705078125, 728.1618041992188, 163.83480834960938, -189.01556396484375, 153.504638671875, 45.95392608642578, 573.51953125, 87.78376770019531, 178.46487426757812, 170.22305297851562, 365.5515441894531, 306.52642822265625, 179.88482666015625, 302.66912841796875, -269.94793701171875, 699.080078125, 184.51353454589844, 501.85003662109375, 307.5508728027344, -168.52682495117188, 164.96868896484375, 215.6588134765625, 651.4196166992188, 425.8121032714844, 213.76577758789062, 300.64398193359375, -559.404052734375, 460.55084228515625, 206.2333526611328, 260.2356872558594, 97.00862884521484, 204.48831176757812, -345.2802734375, -94.80532836914062, 48.695457458496094, 186.12918090820312, 402.96148681640625, 209.27572631835938, -168.7110137939453, 463.8109130859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000350.npy"} +{"epoch": 0.5139500734214391, "step": 351, "batch_size": 64, "mean": 249.109375, "std": 315.7483825683594, "min": -362.5836181640625, "p10": -121.31057586669915, "median": 225.5657958984375, "p90": 686.1263916015625, "max": 1165.0716552734375, "pos_frac": 0.765625, "sample": [108.01669311523438, 289.76031494140625, 493.60028076171875, 991.5946044921875, 1165.0716552734375, 111.67298889160156, 419.71063232421875, 110.33424377441406, 267.525146484375, 312.4178466796875, 34.46826171875, 224.31637573242188, 715.7949829101562, 801.679443359375, 366.822998046875, 383.0326232910156, 508.03863525390625, 578.3657836914062, 689.7197265625, -30.2067928314209, -254.69711303710938, 407.2801208496094, 132.02468872070312, 242.154296875, 177.92330932617188, 374.4780578613281, 540.7890625, 224.2983856201172, -362.5836181640625, -51.97123718261719, -278.0638732910156, 192.3145751953125, -174.0430908203125, 106.6115951538086, 14.45025634765625, 863.6439208984375, 27.48735809326172, 155.52020263671875, 226.81521606445312, 9.159385681152344, -151.02743530273438, 406.4799499511719, -22.332988739013672, 677.741943359375, 39.197906494140625, -42.96129608154297, 187.9703826904297, -20.56268310546875, -14.43341064453125, -227.7377166748047, -39.306129455566406, 658.2700805664062, 240.03147888183594, 328.8087158203125, 727.476318359375, -181.70091247558594, 298.33038330078125, 660.6409912109375, 330.5956726074219, 281.9280090332031, 167.6889190673828, 320.8551025390625, 248.31837463378906, -46.59984588623047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000351.npy"} +{"epoch": 0.5154185022026432, "step": 352, "batch_size": 64, "mean": 163.22300720214844, "std": 270.824951171875, "min": -548.5924682617188, "p10": -124.13136901855466, "median": 135.60675048828125, "p90": 505.34832153320315, "max": 988.5267944335938, "pos_frac": 0.6875, "sample": [387.8777160644531, 370.7383117675781, 162.42044067382812, 468.0722961425781, -328.4892883300781, 124.62850189208984, 114.91226959228516, 426.7314453125, 97.49577331542969, 219.7432403564453, 625.7510986328125, 495.72430419921875, -38.3562126159668, 22.89806365966797, 215.28355407714844, -32.11872863769531, -86.2177734375, 988.5267944335938, 288.7672424316406, -135.63987731933594, 204.45223999023438, 736.7357177734375, 135.73899841308594, 431.5301513671875, 71.87773132324219, -38.711708068847656, -93.47634887695312, -220.89886474609375, 45.086463928222656, 113.2637939453125, -89.86337280273438, 509.472900390625, 17.240291595458984, 35.287620544433594, 559.0362548828125, 138.08282470703125, -548.5924682617188, 89.48006439208984, 238.93267822265625, 372.64453125, 239.7925567626953, 196.69125366210938, 187.64393615722656, -202.01034545898438, -62.47053909301758, -57.430938720703125, -185.72189331054688, 544.9572143554688, 463.031494140625, -185.4149627685547, 270.81829833984375, -59.962852478027344, -77.29170989990234, 468.72906494140625, 258.5804138183594, 201.53091430664062, 408.75152587890625, -4.577568054199219, 557.3574829101562, 135.47450256347656, -2.393890380859375, -97.27818298339844, 226.74270629882812, 124.65505981445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000352.npy"} +{"epoch": 0.5168869309838473, "step": 353, "batch_size": 64, "mean": 205.90402221679688, "std": 277.3384704589844, "min": -422.21875, "p10": -82.2024528503418, "median": 165.5747299194336, "p90": 631.4282897949219, "max": 838.9122314453125, "pos_frac": 0.71875, "sample": [384.8399658203125, 360.5799865722656, 27.24181365966797, 454.84344482421875, 158.81219482421875, 402.8148498535156, 57.30754089355469, -101.98181915283203, 569.3756713867188, 203.1766357421875, 221.84579467773438, -80.83997344970703, 267.84991455078125, 630.7586669921875, 69.61056518554688, 423.23370361328125, -136.3164520263672, 52.823455810546875, 579.0679931640625, 641.4387817382812, 159.4966278076172, 294.9997863769531, -33.06424331665039, 90.53616333007812, -422.21875, 363.29150390625, -237.62640380859375, -79.36359405517578, -82.2647476196289, 270.6630554199219, -28.69227409362793, 171.65283203125, 327.900146484375, 331.2127685546875, 35.483551025390625, -30.100154876708984, 46.984405517578125, -156.73519897460938, 810.0831298828125, 1.605499267578125, -53.43011474609375, 641.6067504882812, -24.250526428222656, 60.67230987548828, 118.44405364990234, 340.28216552734375, 337.18280029296875, 202.38670349121094, -22.548309326171875, 645.677001953125, 266.5663146972656, -49.47261047363281, 593.7626342773438, 372.9651794433594, 838.9122314453125, 770.0934448242188, -161.40098571777344, 357.7911376953125, 631.7152709960938, -82.05709838867188, 91.60472869873047, 83.05088806152344, -4.27911376953125, 202.25631713867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000353.npy"} +{"epoch": 0.5183553597650514, "step": 354, "batch_size": 64, "mean": 225.83035278320312, "std": 217.39413452148438, "min": -169.05807495117188, "p10": -45.262419128417946, "median": 189.5939483642578, "p90": 465.3043975830078, "max": 857.239990234375, "pos_frac": 0.84375, "sample": [303.31182861328125, 438.90057373046875, 236.46250915527344, 58.29266357421875, 463.8636169433594, 186.97348022460938, 623.3359375, -84.85835266113281, 225.27944946289062, -99.35398864746094, -19.801307678222656, -169.05807495117188, 249.4545135498047, 857.239990234375, 432.9577331542969, 180.69216918945312, -15.814933776855469, 379.925537109375, 474.9245910644531, 284.3936767578125, 30.30628204345703, 375.01678466796875, 175.38699340820312, 87.44744873046875, -73.37310791015625, 128.60459899902344, 378.5347595214844, 214.92433166503906, 112.73128509521484, 362.8458251953125, 492.3572998046875, -109.75169372558594, 289.4952697753906, 267.2205505371094, 164.2452392578125, 145.46339416503906, -56.17432403564453, 457.1093444824219, 192.21441650390625, 100.05641174316406, 437.8799743652344, 7.404121398925781, 367.18463134765625, 181.46591186523438, 42.08618927001953, 316.1876220703125, 96.7138900756836, 664.2189331054688, 393.4412841796875, -16.519485473632812, 14.693960189819336, 388.2969970703125, 366.73638916015625, 111.76849365234375, -80.87805938720703, 741.507080078125, 71.80931854248047, 370.8953857421875, 398.09100341796875, 72.09089660644531, 465.921875, 142.42816162109375, 152.73135375976562, 5.203243255615234], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000354.npy"} +{"epoch": 0.5198237885462555, "step": 355, "batch_size": 64, "mean": 217.17283630371094, "std": 231.02503967285156, "min": -327.46044921875, "p10": -49.661719131469724, "median": 205.7917251586914, "p90": 510.7481903076172, "max": 887.240234375, "pos_frac": 0.796875, "sample": [116.16668701171875, 533.6483154296875, 278.3053894042969, 505.6816711425781, 40.656883239746094, 252.47349548339844, 245.6171875, 887.240234375, -293.6103515625, 277.38037109375, 209.0682373046875, 603.9563598632812, -13.537178039550781, -17.543163299560547, 447.95989990234375, 503.3978271484375, -99.651123046875, 558.8836059570312, 160.5736541748047, 140.1217041015625, 425.26165771484375, 260.8700866699219, -64.52674865722656, -8.357181549072266, 373.9976806640625, 512.9195556640625, 465.44366455078125, 257.1672058105469, 151.218994140625, -2.527587890625, -52.315181732177734, 194.07745361328125, 29.709671020507812, -327.46044921875, 211.65443420410156, -142.09410095214844, 334.6954345703125, 613.1546630859375, 312.1220703125, 699.6394653320312, 99.28009796142578, 202.5152130126953, 117.64109802246094, 169.88711547851562, 139.87185668945312, 269.8138732910156, 106.79373931884766, 409.9886779785156, 88.4473876953125, 194.48794555664062, 476.268798828125, 314.7740173339844, 311.76983642578125, 223.64111328125, 111.10945129394531, -43.470306396484375, 120.04212951660156, -112.44973754882812, 221.80325317382812, -22.79788589477539, 71.4034423828125, 324.9000244140625, 370.2427978515625, 151.65655517578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000355.npy"} +{"epoch": 0.5212922173274597, "step": 356, "batch_size": 64, "mean": 123.26990509033203, "std": 301.80596923828125, "min": -621.2181396484375, "p10": -238.04916839599608, "median": 97.69329833984375, "p90": 515.8371734619142, "max": 956.5797119140625, "pos_frac": 0.65625, "sample": [219.978515625, 8.569938659667969, 349.00836181640625, 76.84358978271484, -42.15108108520508, -90.00392150878906, -98.74748229980469, 220.48291015625, -103.7808837890625, 217.7760009765625, 319.6617431640625, 0.7723579406738281, 723.2360229492188, 478.6294860839844, -101.9127197265625, 559.1945190429688, -151.67897033691406, -212.13209533691406, -621.2181396484375, -243.90524291992188, 39.43730926513672, 766.0503540039062, -103.25559997558594, 121.19192504882812, 346.813720703125, 531.7833251953125, -59.290855407714844, 372.8605041503906, -297.1271667480469, 54.36608123779297, -270.709228515625, 673.0693969726562, 98.75187683105469, -282.7626953125, 411.4334716796875, 96.63471984863281, 268.80462646484375, 352.129638671875, -220.1803436279297, 77.13975524902344, -78.54130554199219, -106.9095230102539, 165.183837890625, 261.60943603515625, 8.365638732910156, 239.79788208007812, 285.79559326171875, -224.38499450683594, 696.7535400390625, 251.27810668945312, 181.09735107421875, 27.696990966796875, -326.26910400390625, -4.186836242675781, 230.06100463867188, 107.56620025634766, 154.21067810058594, 281.9537048339844, -448.1015930175781, 956.5797119140625, 371.2412109375, 308.2940673828125, -3.8376846313476562, 68.25614166259766], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000356.npy"} +{"epoch": 0.5227606461086637, "step": 357, "batch_size": 64, "mean": 204.9359588623047, "std": 254.5968017578125, "min": -643.0918579101562, "p10": -50.497539520263665, "median": 208.58535766601562, "p90": 569.1196350097656, "max": 813.190673828125, "pos_frac": 0.796875, "sample": [375.7826232910156, 568.7337036132812, -22.680679321289062, 627.0646362304688, 511.2090148925781, 12.186019897460938, 72.77601623535156, 427.3900146484375, 148.82284545898438, 569.2850341796875, 278.397705078125, 72.59542083740234, -111.18607330322266, 552.9832153320312, -54.62481689453125, 126.67960357666016, 221.98178100585938, 223.89639282226562, 296.52337646484375, -40.27961730957031, 813.190673828125, -40.867225646972656, 328.30377197265625, -2.9262847900390625, -643.0918579101562, 52.93991470336914, 266.23272705078125, 161.96212768554688, -59.361289978027344, 584.8221435546875, 257.7695617675781, -157.07855224609375, 479.7547302246094, -149.72006225585938, 339.48651123046875, 12.974834442138672, 415.766357421875, 794.63427734375, 301.7293701171875, 287.8081970214844, 2.760223388671875, -5.278436660766602, 292.34490966796875, 587.9090576171875, 87.05841827392578, 102.72848510742188, 68.36790466308594, 235.87826538085938, 9.660152435302734, 202.7564697265625, -21.130613327026367, 304.31890869140625, 214.41424560546875, 10.398513793945312, 289.08062744140625, 301.94940185546875, 115.3683853149414, 304.242919921875, -67.96731567382812, 238.79725646972656, 125.15278625488281, 662.8575439453125, 14.98876953125, 137.37802124023438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000357.npy"} +{"epoch": 0.5242290748898678, "step": 358, "batch_size": 64, "mean": 193.22451782226562, "std": 253.00112915039062, "min": -405.0308837890625, "p10": -54.989661026000974, "median": 164.70167541503906, "p90": 514.1653564453127, "max": 873.9393310546875, "pos_frac": 0.765625, "sample": [71.14309692382812, 615.77294921875, 74.85267639160156, 295.2507019042969, 234.7978973388672, 364.5135192871094, 44.11436462402344, 290.56640625, 120.34248352050781, 84.10945129394531, -14.104438781738281, -60.88946533203125, -129.16256713867188, -27.704421997070312, 11.228322982788086, 705.430908203125, -35.446189880371094, 529.69921875, 51.865814208984375, 95.33175659179688, 36.12548065185547, 6.3000335693359375, 17.977737426757812, 57.321266174316406, 165.96713256835938, -405.0308837890625, 339.14691162109375, -157.0172119140625, 873.9393310546875, 410.62322998046875, 450.95831298828125, 174.9915771484375, 302.56512451171875, -19.362037658691406, 570.6104125976562, 204.03424072265625, -17.21746063232422, 82.72723388671875, -48.72325897216797, 355.1905822753906, 330.75018310546875, 432.1198425292969, -147.69256591796875, 316.203369140625, 410.5723571777344, -56.256866455078125, 351.2312927246094, 821.76513671875, -52.0328483581543, 100.95378112792969, 388.90216064453125, 165.18508911132812, 99.35104370117188, -18.56678009033203, 477.919677734375, 866.8441162109375, 88.11620330810547, 171.79202270507812, 228.55259704589844, -71.69400024414062, 186.8194122314453, 164.21826171875, 219.1600341796875, 169.315673828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000358.npy"} +{"epoch": 0.5256975036710719, "step": 359, "batch_size": 64, "mean": 226.85186767578125, "std": 291.6161804199219, "min": -424.29290771484375, "p10": -117.23266601562499, "median": 179.84461975097656, "p90": 629.8946411132814, "max": 1042.3411865234375, "pos_frac": 0.78125, "sample": [-424.29290771484375, 239.85284423828125, 251.2032470703125, 493.4065856933594, -175.52719116210938, 709.6019897460938, -171.73947143554688, 929.85791015625, 139.5081787109375, -78.84074401855469, -120.56642150878906, -53.795509338378906, 368.181396484375, 316.9270935058594, -205.48883056640625, 41.20790100097656, 387.42059326171875, -37.710601806640625, 437.789794921875, -145.48025512695312, 297.2698059082031, 216.912841796875, 489.3533935546875, 90.60033416748047, 24.499835968017578, 153.6354522705078, 31.32541847229004, 514.13720703125, 103.1246337890625, 25.145469665527344, 465.154541015625, 329.64666748046875, 1042.3411865234375, 450.53936767578125, -109.45390319824219, 45.71497344970703, 187.1065673828125, 101.26506805419922, -190.062744140625, 700.5738525390625, 165.01089477539062, 473.9675598144531, 253.05210876464844, 354.07037353515625, -44.950714111328125, 164.79542541503906, 481.84197998046875, 236.57257080078125, 126.19458770751953, 692.501220703125, 53.036170959472656, -37.25907897949219, -52.90000915527344, 351.9571838378906, 191.2736358642578, 641.3302001953125, 3.171661376953125, 235.92039489746094, 773.8922119140625, 517.6588134765625, 139.77407836914062, 603.211669921875, 151.4687042236328, 172.58267211914062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000359.npy"} +{"epoch": 0.527165932452276, "step": 360, "batch_size": 64, "mean": 171.06539916992188, "std": 208.84901428222656, "min": -154.85736083984375, "p10": -77.15728683471679, "median": 123.34841918945312, "p90": 467.5110412597657, "max": 663.939697265625, "pos_frac": 0.75, "sample": [54.676605224609375, 475.98602294921875, -0.34433746337890625, 123.3043212890625, -67.32466888427734, 365.46478271484375, -21.562583923339844, 183.53062438964844, 340.7674865722656, 184.21730041503906, 113.92777252197266, 495.3102722167969, -117.86405944824219, 307.23486328125, 663.939697265625, 121.28193664550781, 36.54471969604492, 21.953262329101562, 315.05462646484375, 64.14236450195312, 408.5226745605469, 213.43136596679688, -55.29540252685547, 249.2801971435547, 48.85042953491211, 329.68475341796875, 447.736083984375, 21.312301635742188, 7.163043975830078, 496.9223937988281, 541.263671875, 360.1871032714844, 532.26953125, -60.55027770996094, 52.101173400878906, 142.99183654785156, 185.3875732421875, 111.12945556640625, 398.91839599609375, -80.55560302734375, 93.25907897949219, 628.3193359375, -145.58526611328125, -108.10052490234375, -19.439422607421875, 160.73541259765625, -154.85736083984375, 163.42884826660156, -26.2668514251709, -128.95216369628906, -4.0553131103515625, 354.4296875, 23.860998153686523, 340.9753723144531, 352.7489318847656, 258.8656005859375, 283.77569580078125, -94.2626724243164, 123.39251708984375, 408.0360412597656, 101.03120422363281, -69.2278823852539, 24.997215270996094, 370.0855712890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000360.npy"} +{"epoch": 0.5286343612334802, "step": 361, "batch_size": 64, "mean": 172.15640258789062, "std": 256.9750061035156, "min": -437.1697082519531, "p10": -114.23766632080077, "median": 177.1668243408203, "p90": 538.2269409179688, "max": 762.0772705078125, "pos_frac": 0.71875, "sample": [21.334890365600586, 194.56243896484375, 2.519390106201172, 390.6424560546875, -66.4419937133789, -264.37554931640625, -35.97937774658203, 202.8860626220703, 548.3711547851562, 139.8985137939453, 178.58126831054688, 243.32406616210938, -437.1697082519531, 410.0882568359375, -64.66041564941406, -78.55204010009766, 212.23895263671875, 157.58876037597656, -278.33953857421875, 7.546201705932617, 228.18649291992188, -142.30096435546875, 39.36444091796875, -8.49197006225586, 211.92872619628906, 762.0772705078125, 307.3194274902344, 476.481201171875, 143.94677734375, 424.42816162109375, -54.39741516113281, -57.63291549682617, 186.7921905517578, 159.98867797851562, 443.76153564453125, 542.934326171875, -99.63929748535156, 746.73046875, 677.2399291992188, 486.3592529296875, 195.1734161376953, 159.97398376464844, 543.3037109375, 400.6988220214844, 175.75238037109375, 188.73419189453125, 76.02825164794922, 217.02523803710938, -56.0482177734375, -89.93538665771484, 44.138824462890625, -120.49411010742188, 527.2430419921875, -21.511520385742188, 333.75970458984375, 282.1224365234375, 143.75146484375, -143.8756866455078, 276.8079528808594, 5.6028289794921875, 202.35552978515625, -257.2837219238281, 546.5445556640625, 429.0020751953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000361.npy"} +{"epoch": 0.5301027900146843, "step": 362, "batch_size": 64, "mean": 270.275146484375, "std": 266.3769836425781, "min": -165.82254028320312, "p10": -38.70624675750731, "median": 226.66297149658203, "p90": 607.7518737792968, "max": 1299.4837646484375, "pos_frac": 0.828125, "sample": [100.33413696289062, 124.97161865234375, 497.37646484375, 598.0282592773438, 307.1829528808594, -16.28545379638672, 1299.4837646484375, 250.25473022460938, 145.36947631835938, 660.837646484375, 511.7833557128906, 110.68950653076172, 33.252235412597656, 141.21060180664062, 244.20729064941406, 512.4608154296875, 225.56761169433594, -45.86341094970703, 99.66448974609375, 367.09344482421875, 459.39306640625, 117.28994750976562, 32.813819885253906, -70.230224609375, -11.457992553710938, -22.006196975708008, 153.96905517578125, 189.0505828857422, -88.97039031982422, 620.1868286132812, 608.8117065429688, 128.4949188232422, -88.913818359375, 280.6134948730469, 278.7261962890625, 179.4718017578125, 544.5028076171875, -47.453773498535156, 204.98216247558594, 605.2789306640625, 237.5467071533203, 74.95970153808594, 724.771484375, 463.4432067871094, 135.72543334960938, 135.10699462890625, 393.81036376953125, 183.15325927734375, 40.12037658691406, 771.099365234375, 241.55502319335938, 689.792236328125, 528.4293212890625, 379.07977294921875, 227.75833129882812, 466.9929504394531, 527.7120361328125, -93.18991088867188, 137.68983459472656, 346.9886779785156, 287.7827453613281, 323.9691467285156, -3.036773681640625, -165.82254028320312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000362.npy"} +{"epoch": 0.5315712187958884, "step": 363, "batch_size": 64, "mean": 278.87579345703125, "std": 276.615966796875, "min": -274.6435546875, "p10": -25.984047698974603, "median": 258.3417205810547, "p90": 635.4634216308594, "max": 1295.371337890625, "pos_frac": 0.84375, "sample": [-274.6435546875, 357.59722900390625, 615.2916259765625, 251.7391815185547, 108.55593872070312, 258.0469970703125, 289.0909423828125, 256.4764709472656, 236.69898986816406, 130.6510772705078, 358.65234375, 250.5505828857422, 338.423583984375, 18.59156036376953, 366.93280029296875, 875.128173828125, -48.20855712890625, 641.292724609375, 61.31919860839844, 724.2388305664062, 467.2854919433594, 69.9936752319336, 326.021240234375, 387.03326416015625, 295.1756896972656, 412.8154296875, 1295.371337890625, 579.2277221679688, -28.796142578125, 606.6651611328125, 183.89630126953125, 205.31375122070312, 29.494049072265625, 292.9000244140625, 457.0859680175781, 75.06824493408203, 245.1021728515625, -60.441986083984375, 640.2781982421875, 458.0518798828125, 409.1338806152344, -162.23812866210938, 40.918540954589844, 403.9211120605469, -35.99065399169922, 372.5360412597656, 624.2289428710938, 3.0277099609375, 261.79669189453125, 451.8146667480469, -19.42249298095703, 138.2668914794922, -120.39131927490234, 420.17156982421875, 258.6364440917969, 25.81037139892578, -18.167213439941406, 229.40545654296875, 49.09579086303711, 38.69483184814453, 699.6109008789062, 720.3073120117188, -6.158454895019531, 309.0723876953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000363.npy"} +{"epoch": 0.5330396475770925, "step": 364, "batch_size": 64, "mean": 206.98001098632812, "std": 284.074462890625, "min": -503.4626770019531, "p10": -75.51435012817382, "median": 197.69229125976562, "p90": 539.1287536621095, "max": 929.097412109375, "pos_frac": 0.765625, "sample": [455.67767333984375, 264.6579284667969, -201.2825164794922, 48.01299285888672, -503.4626770019531, 579.0126342773438, -278.07452392578125, 321.4465637207031, 237.22000122070312, 895.8645629882812, 183.46885681152344, 659.273681640625, 429.9105224609375, 347.839599609375, -67.28389739990234, 71.25350952148438, 54.5244140625, -262.9456481933594, 314.5033874511719, -27.165130615234375, -79.04168701171875, 441.8751525878906, 220.88922119140625, 428.6942138671875, 351.4113464355469, 18.056001663208008, -10.70938491821289, 508.51519775390625, 211.9157257080078, 306.22564697265625, -18.245407104492188, 685.1993408203125, 134.782958984375, -27.715362548828125, 89.14212799072266, 100.77423095703125, 431.357421875, 327.9013977050781, 284.1512756347656, -153.63455200195312, 102.0810775756836, 551.8283081054688, 693.09228515625, 96.96604919433594, 331.53594970703125, -20.01886749267578, -9.337259292602539, 386.50189208984375, 509.4964599609375, 371.9898986816406, 43.77534103393555, 172.11636352539062, 1.9919414520263672, 156.8759765625, 929.097412109375, 73.16552734375, 379.943603515625, 316.966796875, -60.17420196533203, 240.87164306640625, 59.2572021484375, 180.313720703125, 436.0001220703125, -471.6141662597656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000364.npy"} +{"epoch": 0.5345080763582967, "step": 365, "batch_size": 64, "mean": 220.86419677734375, "std": 265.739501953125, "min": -223.26499938964844, "p10": -73.94388465881346, "median": 171.18619537353516, "p90": 531.5027709960939, "max": 1258.5836181640625, "pos_frac": 0.84375, "sample": [187.06076049804688, 169.68460083007812, 86.23272705078125, 139.1869354248047, 138.885498046875, 140.01654052734375, 357.2535705566406, 101.04480743408203, 215.63714599609375, 104.47134399414062, 268.20379638671875, 492.94305419921875, -91.28053283691406, 221.4537353515625, 85.05195617675781, -40.789093017578125, 105.49674987792969, 739.561767578125, 577.9818725585938, 291.4723815917969, 146.86874389648438, 58.08362579345703, 65.5234603881836, 17.781005859375, 258.14410400390625, -123.00201416015625, 93.56991577148438, 35.50967788696289, 172.6877899169922, 190.96490478515625, -81.89768981933594, -55.385005950927734, 111.54312896728516, 37.35123062133789, 415.28662109375, 646.5156860351562, 435.568359375, 78.47817993164062, -152.69744873046875, 511.2640380859375, 270.8817138671875, -172.76690673828125, 226.06277465820312, 41.480491638183594, -11.607904434204102, 362.9383544921875, 258.74896240234375, 333.1632080078125, 337.0144348144531, -174.20184326171875, 52.086524963378906, 402.28076171875, 7.857919692993164, 278.1658630371094, 258.638916015625, 318.7627258300781, 1258.5836181640625, 540.176513671875, 454.7574462890625, -223.26499938964844, 836.149658203125, 809.84423828125, 127.795166015625, 390.03265380859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000365.npy"} +{"epoch": 0.5359765051395007, "step": 366, "batch_size": 64, "mean": 224.57408142089844, "std": 290.8706359863281, "min": -331.88525390625, "p10": -118.52190933227537, "median": 195.93498992919922, "p90": 603.5544738769531, "max": 1046.95556640625, "pos_frac": 0.78125, "sample": [-87.52205657958984, 203.11351013183594, 322.6123046875, 114.14015197753906, -273.9696044921875, 34.331138610839844, -93.45601654052734, 68.57464599609375, -84.23583984375, 465.42767333984375, 77.91476440429688, -226.40512084960938, 524.1589965820312, 712.299072265625, 188.7564697265625, -77.11372375488281, 331.1839294433594, 32.409847259521484, 141.08518981933594, -88.94038391113281, 327.1669006347656, 603.7803344726562, 360.21636962890625, 19.695053100585938, 575.6102294921875, 1046.95556640625, 352.88665771484375, 107.00676727294922, 619.2191162109375, 213.8502197265625, 180.15081787109375, -54.9912109375, -218.08255004882812, 593.9624633789062, 374.7530517578125, 360.50933837890625, 842.1207275390625, 352.7027893066406, 229.80657958984375, 124.15855407714844, 227.8479461669922, 144.67929077148438, 58.873199462890625, -175.45086669921875, 548.2000122070312, -158.68234252929688, 308.34442138671875, 41.4218635559082, 519.8411254882812, 603.0274658203125, 401.329345703125, 306.087158203125, 720.1510009765625, 377.0036926269531, 730.1119384765625, 252.14126586914062, -66.15644836425781, 40.94044494628906, -129.26443481445312, 125.99099731445312, 131.8443603515625, 102.2654800415039, 298.236572265625, -331.88525390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000366.npy"} +{"epoch": 0.5374449339207048, "step": 367, "batch_size": 64, "mean": 185.85562133789062, "std": 235.98663330078125, "min": -499.4107360839844, "p10": -67.85486907958985, "median": 130.7096405029297, "p90": 495.7415222167969, "max": 776.8563232421875, "pos_frac": 0.78125, "sample": [99.56817626953125, 146.11888122558594, 500.16741943359375, 82.30305480957031, 639.087646484375, -0.7113056182861328, 377.310791015625, 112.107666015625, 510.67596435546875, 80.51422882080078, 27.862564086914062, 1.2100849151611328, -65.00469970703125, 511.2389831542969, 467.7801513671875, 285.27606201171875, 450.292236328125, 128.13706970214844, 219.00379943847656, -69.07637023925781, 272.2195129394531, 402.89599609375, -75.44501495361328, -19.127479553222656, 377.2574768066406, 282.1978454589844, 776.8563232421875, -18.00483512878418, 532.77734375, 116.69415283203125, 466.83184814453125, 187.5366973876953, -25.639156341552734, 324.75396728515625, 74.63276672363281, 226.62844848632812, 330.40264892578125, 276.59307861328125, 400.6221008300781, 74.11085510253906, 198.27113342285156, -243.291015625, -193.0425262451172, -38.78594970703125, -82.4718017578125, 46.469688415527344, 99.4920654296875, 423.5298767089844, 96.74757385253906, 67.50054931640625, -214.32553100585938, -499.4107360839844, 115.22721099853516, 121.95927429199219, 248.08413696289062, 364.2083740234375, 433.5205383300781, 307.05780029296875, 530.1464233398438, 2.2109222412109375, 133.28221130371094, -6.711326599121094, 485.4144287109375, 11.019134521484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000367.npy"} +{"epoch": 0.5389133627019089, "step": 368, "batch_size": 64, "mean": 291.7165222167969, "std": 224.1376953125, "min": -209.4979248046875, "p10": 5.745568275451675, "median": 309.0730438232422, "p90": 556.2475524902344, "max": 1037.515625, "pos_frac": 0.890625, "sample": [298.2129821777344, 318.2940673828125, 201.20054626464844, 536.4482421875, 388.046630859375, 597.556396484375, 148.89752197265625, 63.19331359863281, 571.6888427734375, 206.74081420898438, -18.93008041381836, 381.1854248046875, 472.70166015625, 57.384395599365234, 306.87738037109375, 375.28759765625, 311.2687072753906, 209.8287811279297, -31.84082794189453, 414.7477111816406, 219.26882934570312, 277.1521911621094, 562.42822265625, 347.048583984375, 376.6642761230469, 131.02642822265625, 403.22308349609375, -0.7414493560791016, 46.13924789428711, 35.11100769042969, 192.7979278564453, 417.41607666015625, 435.5819396972656, 397.58514404296875, 427.4036865234375, 541.8259887695312, 392.51422119140625, 46.4296875, 183.2986297607422, -23.837669372558594, 326.78765869140625, 509.4749755859375, 412.7480773925781, 643.420166015625, 162.05784606933594, 335.3507385253906, 677.7621459960938, 20.881942749023438, -209.4979248046875, 116.45211791992188, 106.41707611083984, 514.0016479492188, 462.79345703125, 500.7160339355469, 360.89019775390625, 32.489044189453125, 248.48480224609375, 1037.515625, 148.3541259765625, 245.2274169921875, 296.2356262207031, -41.27093505859375, -93.90888977050781, 639.347900390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000368.npy"} +{"epoch": 0.540381791483113, "step": 369, "batch_size": 64, "mean": 150.88702392578125, "std": 281.6910705566406, "min": -675.739501953125, "p10": -158.70631256103516, "median": 127.66310501098633, "p90": 486.90813598632826, "max": 837.679931640625, "pos_frac": 0.765625, "sample": [-229.9666748046875, -123.71820831298828, 837.679931640625, -34.066932678222656, -251.5995635986328, 19.22450065612793, -54.88530731201172, 280.1546936035156, 29.267024993896484, 692.3514404296875, 213.1963653564453, -138.067138671875, 127.60208892822266, 337.1683654785156, 318.84600830078125, 812.7274780273438, 519.3330078125, 158.93118286132812, 42.562442779541016, 405.5589904785156, 295.1152648925781, 112.99641418457031, 110.55197143554688, 275.1623229980469, 218.93478393554688, 216.249755859375, -470.86279296875, 271.17047119140625, 126.47036743164062, 182.50836181640625, 262.32061767578125, 343.662109375, 76.37419891357422, 126.33662414550781, 174.94851684570312, 1.0056381225585938, 341.0634765625, 91.68511199951172, -150.85719299316406, 292.55902099609375, 499.1853942871094, -205.2745819091797, 722.2637939453125, 31.132308959960938, 458.2611999511719, 170.60906982421875, 382.307861328125, -479.3484802246094, 0.4967994689941406, 600.6104736328125, 157.7859649658203, 117.82749938964844, -675.739501953125, 86.87833404541016, 220.573486328125, -162.07022094726562, -35.21937561035156, 94.86280059814453, 121.41999053955078, 420.93511962890625, 184.88980102539062, -6.150032043457031, 127.72412109375, -36.887718200683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000369.npy"} +{"epoch": 0.5418502202643172, "step": 370, "batch_size": 64, "mean": 148.7425537109375, "std": 294.4658508300781, "min": -408.1298522949219, "p10": -231.6700729370117, "median": 153.73001098632812, "p90": 524.8428405761719, "max": 907.7745361328125, "pos_frac": 0.734375, "sample": [104.18811798095703, 307.9581604003906, 239.52476501464844, -396.65850830078125, 692.9439697265625, 422.63873291015625, 132.2694091796875, 308.8455810546875, 10.618772506713867, -154.65029907226562, 443.3802490234375, 518.4241943359375, 44.2825927734375, 101.222412109375, -103.44175720214844, 527.5936889648438, 224.96023559570312, 199.27587890625, 239.556396484375, 907.7745361328125, 881.2099609375, 227.91928100585938, 320.76910400390625, -212.33956909179688, 296.4705810546875, -124.3050308227539, 2.398681640625, 294.63421630859375, -203.38653564453125, 173.51055908203125, 532.7125854492188, 141.04150390625, 6.4398956298828125, 283.13482666015625, -23.73111343383789, 408.5389404296875, 245.47280883789062, -193.95797729492188, -350.17816162109375, 41.07817077636719, 176.29855346679688, 225.9080810546875, -239.95457458496094, 70.87689208984375, 451.6136474609375, 86.77606201171875, 166.41851806640625, 303.8426513671875, 231.03465270996094, -408.1298522949219, -112.80195617675781, -49.663055419921875, 119.622314453125, 116.61962890625, -300.8988952636719, -337.66656494140625, 9.671279907226562, -303.9982604980469, -155.79119873046875, 508.068115234375, 613.645751953125, 236.57150268554688, 3.3883056640625, 589.9326171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000370.npy"} +{"epoch": 0.5433186490455213, "step": 371, "batch_size": 64, "mean": 213.51039123535156, "std": 302.1003112792969, "min": -525.7710571289062, "p10": -112.03485946655273, "median": 173.6386947631836, "p90": 620.3423461914065, "max": 948.9454956054688, "pos_frac": 0.734375, "sample": [414.0886535644531, 265.64654541015625, 502.0444030761719, 93.98372650146484, -525.7710571289062, 155.08657836914062, 525.01220703125, 227.8270263671875, 825.3206176757812, 291.887939453125, 444.7469482421875, -37.159889221191406, 371.08685302734375, 217.65530395507812, -321.7696838378906, 114.44819641113281, -12.044921875, 181.85076904296875, 523.041748046875, -13.57330322265625, -51.34605407714844, -243.63380432128906, 640.903076171875, -109.34025573730469, -201.35968017578125, 178.13975524902344, 943.1183471679688, 686.9771728515625, -234.5986785888672, 21.632749557495117, 652.395263671875, -21.318649291992188, 380.1371154785156, 254.70362854003906, -113.18968963623047, -49.62841796875, 572.3673095703125, -134.54010009765625, 272.77215576171875, 427.1435546875, -18.272005081176758, 117.46236419677734, 768.3887939453125, 86.8384780883789, 21.171733856201172, 272.3102722167969, 328.83721923828125, 384.75, 509.225830078125, -30.429153442382812, 428.5413818359375, 113.26897430419922, 78.66261291503906, 337.8795166015625, 153.08116149902344, 169.13763427734375, 118.18379211425781, 73.32341003417969, 4.9844818115234375, 489.18597412109375, 288.5776062011719, 948.9454956054688, -107.40946960449219, 13.276016235351562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000371.npy"} +{"epoch": 0.5447870778267254, "step": 372, "batch_size": 64, "mean": 255.05174255371094, "std": 344.44561767578125, "min": -474.307861328125, "p10": -139.32240753173826, "median": 187.04007720947266, "p90": 744.1325866699219, "max": 1469.330322265625, "pos_frac": 0.765625, "sample": [311.4921875, 291.44970703125, 745.8792114257812, -278.40203857421875, 34.75242614746094, 782.6671142578125, 508.3762512207031, 238.0758056640625, 333.16448974609375, -20.601058959960938, 1469.330322265625, 278.7440185546875, -49.402587890625, 146.48556518554688, -65.87991333007812, 126.32140350341797, 93.24845886230469, 75.05284881591797, 809.7259521484375, -201.34881591796875, 110.99024963378906, 214.84478759765625, 728.9163818359375, -153.88314819335938, 493.8915710449219, 165.03131103515625, 120.5334701538086, 199.51031494140625, 149.7359619140625, 147.77688598632812, 172.13369750976562, 370.0181579589844, 378.10888671875, -103.9732894897461, 161.76339721679688, -168.84315490722656, 372.0992126464844, 740.05712890625, 110.19171905517578, 1018.5391845703125, 613.5473022460938, 357.79278564453125, 187.93019104003906, -113.29501342773438, 483.85546875, 485.5118103027344, 721.0983276367188, 269.45172119140625, -10.843063354492188, 348.49481201171875, 93.44662475585938, 272.87542724609375, 107.00480651855469, -76.2760238647461, 788.00732421875, 151.3739471435547, -57.196197509765625, 186.14996337890625, -150.4770050048828, 855.977783203125, -236.94876098632812, -474.307861328125, 472.37969970703125, 191.18392944335938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000372.npy"} +{"epoch": 0.5462555066079295, "step": 373, "batch_size": 64, "mean": 244.83599853515625, "std": 295.3245849609375, "min": -566.85693359375, "p10": -54.396836853027324, "median": 212.18781280517578, "p90": 631.2705932617189, "max": 926.7122192382812, "pos_frac": 0.796875, "sample": [135.54183959960938, -127.73286437988281, -221.37823486328125, 320.443603515625, -0.6485271453857422, 926.7122192382812, 479.0552978515625, 12.487106323242188, 258.0085144042969, 146.17257690429688, 642.808837890625, 200.31712341308594, -61.271636962890625, 239.81253051757812, 413.82861328125, 18.381662368774414, 26.55819320678711, 769.4205322265625, 352.4217224121094, -12.105537414550781, -391.9483642578125, -18.131683349609375, 227.82217407226562, -566.85693359375, 128.66552734375, 56.665565490722656, 157.01156616210938, 135.87686157226562, 604.3480224609375, 416.3261413574219, 218.76792907714844, 467.48675537109375, 326.5714111328125, 572.4375, 508.8157043457031, -89.72320556640625, -164.65716552734375, 584.9679565429688, 280.44427490234375, -37.54582214355469, 845.10302734375, 107.25569915771484, 460.9310302734375, 205.60769653320312, 364.5878601074219, 539.2095336914062, 337.5547180175781, 203.48448181152344, 527.9654541015625, 804.5194091796875, 158.0399169921875, -11.74560546875, 127.212646484375, 237.02255249023438, 11.485580444335938, 273.5474853515625, 114.95503234863281, 120.34396362304688, 785.50341796875, 405.641845703125, 443.7727355957031, 662.4678955078125, -38.35563659667969, 47.21544647216797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000373.npy"} +{"epoch": 0.5477239353891337, "step": 374, "batch_size": 64, "mean": 220.60926818847656, "std": 359.8564453125, "min": -701.3601684570312, "p10": -99.51512145996092, "median": 142.03038024902344, "p90": 584.6535156250001, "max": 1279.7706298828125, "pos_frac": 0.734375, "sample": [-185.86489868164062, 143.352783203125, -28.645950317382812, 1279.7706298828125, 329.87347412109375, 1129.66015625, 296.7781677246094, 614.7344970703125, 459.7200927734375, -29.40021514892578, 140.70797729492188, 333.6917419433594, 27.9381103515625, 198.88427734375, 63.478912353515625, -48.69518280029297, 538.8333740234375, 69.76943969726562, 561.6452026367188, 327.60321044921875, 16.147918701171875, -19.915409088134766, 364.598876953125, 34.94462585449219, -104.68318176269531, 922.3011474609375, -19.583364486694336, 137.3236846923828, -253.1661834716797, 348.4098205566406, 260.0206604003906, 290.361083984375, -35.608062744140625, 1039.818603515625, -458.56903076171875, 110.71903991699219, -47.46672058105469, 580.0202026367188, 106.8442153930664, -38.0467529296875, 159.54576110839844, 37.775917053222656, 55.605934143066406, 85.06096649169922, 283.9967041015625, 159.46633911132812, -159.01136779785156, 491.5002746582031, 379.50421142578125, 461.16412353515625, 104.02037048339844, 586.6392211914062, -87.45631408691406, 452.963134765625, 291.4819641113281, 5.850196838378906, -159.9609375, 231.81939697265625, -5.0610809326171875, 74.06306457519531, 1188.05419921875, -701.3601684570312, 176.049560546875, 548.9745483398438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000374.npy"} +{"epoch": 0.5491923641703378, "step": 375, "batch_size": 64, "mean": 216.62973022460938, "std": 308.5451354980469, "min": -486.10284423828125, "p10": -154.70993194580072, "median": 175.19449615478516, "p90": 597.7040100097657, "max": 1060.9251708984375, "pos_frac": 0.765625, "sample": [687.035888671875, 353.8768310546875, -7.108642578125, 280.14373779296875, -29.459976196289062, -181.27365112304688, 511.296875, 217.90216064453125, 289.91925048828125, 28.561840057373047, -10.871957778930664, -92.72792053222656, 25.269859313964844, -474.855224609375, 138.54043579101562, 928.787353515625, 347.0297546386719, 48.176177978515625, 350.8160095214844, 115.55187225341797, 25.864837646484375, 255.4886474609375, -486.10284423828125, 56.618690490722656, 438.22698974609375, -189.90650939941406, 616.2451171875, 164.1764373779297, 143.6954345703125, 89.1269302368164, -369.9333190917969, 92.84553527832031, 571.3619384765625, 634.5550537109375, 61.7407112121582, 245.36904907226562, 351.4046325683594, 485.4212646484375, 447.9803466796875, 672.693115234375, 372.7077331542969, 186.21255493164062, 111.1461410522461, -314.67864990234375, 453.47186279296875, 487.8197937011719, 304.3350524902344, -294.6617431640625, 1060.9251708984375, 608.9934692382812, 362.6094055175781, -27.197879791259766, -2.407135009765625, 408.5357360839844, 127.63162231445312, 457.3031921386719, 138.29115295410156, 561.8912353515625, -65.1269760131836, 157.16513061523438, 426.91259765625, 456.6193542480469, 104.89755249023438, -52.57648849487305], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000375.npy"} +{"epoch": 0.5506607929515418, "step": 376, "batch_size": 64, "mean": 262.1507568359375, "std": 314.5519104003906, "min": -536.0286865234375, "p10": -154.91670532226559, "median": 214.7726593017578, "p90": 643.559130859375, "max": 888.317138671875, "pos_frac": 0.8125, "sample": [-22.681190490722656, 737.1805419921875, 215.09320068359375, 47.46085739135742, 874.64306640625, 102.65034484863281, 513.808349609375, -241.2737579345703, 50.30027770996094, -2.7172393798828125, 430.94879150390625, 0.4531822204589844, 572.6465454101562, 103.08163452148438, -201.17202758789062, 448.40972900390625, 571.5623168945312, -172.12930297851562, -105.8251953125, 156.3988494873047, 86.59403991699219, 14.148542404174805, -171.24188232421875, 888.317138671875, 790.4453735351562, 122.23052215576172, 422.5576171875, 305.0206298828125, 522.9710083007812, -122.82618713378906, 633.9749755859375, 541.9898071289062, 124.90272521972656, 632.9192504882812, 139.29977416992188, 530.8106689453125, 587.1143188476562, 355.7453308105469, 419.59393310546875, 238.50875854492188, 272.211181640625, 528.9632568359375, -168.66978454589844, 303.859619140625, 598.205078125, 125.17027282714844, 793.2578125, 20.817970275878906, -186.23350524902344, 58.72199249267578, 18.17291259765625, 151.3908233642578, 647.6666259765625, 673.3058471679688, 322.02984619140625, 617.4163818359375, 155.3678436279297, 256.52825927734375, 214.45211791992188, 170.17098999023438, -89.85567474365234, 77.67922973632812, 611.13232421875, -536.0286865234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000376.npy"} +{"epoch": 0.5521292217327459, "step": 377, "batch_size": 64, "mean": 246.15280151367188, "std": 295.74945068359375, "min": -385.4408874511719, "p10": -79.43132934570312, "median": 237.93524932861328, "p90": 649.6059692382813, "max": 881.0942993164062, "pos_frac": 0.734375, "sample": [311.2369689941406, 872.828369140625, 186.4796142578125, -47.424407958984375, 328.9174499511719, -27.638837814331055, 245.76239013671875, -151.2611083984375, 281.37255859375, -8.462028503417969, 810.1443481445312, 797.4212646484375, -32.29283905029297, 853.063720703125, -0.21677589416503906, 259.0255432128906, 232.5105743408203, 465.4148864746094, 198.604736328125, -33.476898193359375, 257.59808349609375, 603.1565551757812, 390.2910461425781, -22.90249252319336, 657.842529296875, 881.0942993164062, 444.3866882324219, -185.9044647216797, 582.3738403320312, 156.85427856445312, 276.6759033203125, 229.75477600097656, 82.6273422241211, 179.46127319335938, 27.734142303466797, 580.3329467773438, 50.43995666503906, 314.987060546875, 378.88134765625, 630.3873291015625, 265.05914306640625, -28.705810546875, -385.4408874511719, 58.90544891357422, -54.492095947265625, 92.66234588623047, 411.10284423828125, 404.6922607421875, -303.2078857421875, 395.2315979003906, 108.28587341308594, 628.1953735351562, -79.24261474609375, -79.51220703125, -96.43463897705078, 476.1083068847656, 130.37002563476562, 734.666259765625, 372.3704833984375, 21.85405731201172, 312.48291015625, 187.34036254882812, 243.35992431640625, -119.95345306396484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000377.npy"} +{"epoch": 0.55359765051395, "step": 378, "batch_size": 64, "mean": 262.8395690917969, "std": 281.70458984375, "min": -508.97723388671875, "p10": -46.29496688842773, "median": 232.6874771118164, "p90": 599.6037841796875, "max": 1064.7374267578125, "pos_frac": 0.84375, "sample": [654.7562255859375, 257.76202392578125, 660.6715087890625, 564.816650390625, 520.8392944335938, 110.10331726074219, 204.09060668945312, 447.45281982421875, 172.97418212890625, 202.86041259765625, 230.43075561523438, 358.74810791015625, 572.4722900390625, 200.16883850097656, 98.01873779296875, -32.93522644042969, 242.35955810546875, 723.9457397460938, 213.30715942382812, 304.91546630859375, 246.08551025390625, -120.97147369384766, 41.83096694946289, 102.78833770751953, 258.2195739746094, 180.88711547851562, -86.94136047363281, 520.013427734375, 49.04778289794922, 478.16058349609375, 581.8699951171875, 530.1529541015625, 200.40447998046875, 148.542236328125, -39.555870056152344, 473.9087219238281, 193.1319580078125, 1064.7374267578125, 49.32081604003906, -342.09515380859375, 244.2654266357422, -7.158866882324219, 234.94419860839844, 607.2039794921875, 174.82278442382812, -49.18315124511719, 458.9003601074219, 302.1605529785156, 493.2434387207031, 341.58880615234375, 99.66102600097656, 343.6400146484375, 431.81732177734375, 241.3399658203125, -508.97723388671875, 177.3655242919922, 227.99420166015625, 548.31298828125, -63.27428436279297, 803.9755859375, 125.11387634277344, -433.30438232421875, 152.9224395751953, 637.0614013671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000378.npy"} +{"epoch": 0.5550660792951542, "step": 379, "batch_size": 64, "mean": 253.78219604492188, "std": 310.1694641113281, "min": -461.40936279296875, "p10": -112.65117111206054, "median": 231.35073852539062, "p90": 663.2630432128909, "max": 1254.758544921875, "pos_frac": 0.828125, "sample": [252.0626220703125, 373.802978515625, 4.394378662109375, 221.6612548828125, -104.31688690185547, 102.21045684814453, 186.32501220703125, 694.2451171875, 211.25106811523438, 144.6619873046875, 416.9549865722656, 799.3198852539062, 58.74504089355469, 481.5195617675781, 364.567626953125, 208.19290161132812, -461.40936279296875, 941.0845336914062, 53.94056701660156, 325.4956359863281, 495.84283447265625, 409.1741638183594, 261.77069091796875, 49.57252883911133, 118.87979125976562, -89.35281372070312, -311.9066162109375, 146.39105224609375, 36.89435577392578, 354.848876953125, 743.7505493164062, 357.3909912109375, -298.6210632324219, 47.930538177490234, 471.6313171386719, 716.783203125, 323.0, 397.4417724609375, 590.117919921875, 114.92964172363281, -74.9445571899414, 598.6441650390625, 241.04022216796875, -46.58506393432617, 383.94598388671875, -149.03897094726562, 176.24685668945312, 471.0113220214844, 298.8565979003906, 286.3369140625, -116.22300720214844, 517.322021484375, 690.9568481445312, 168.77940368652344, 1254.758544921875, 243.3206787109375, 152.1177978515625, 503.2501220703125, 128.0689697265625, -156.04693603515625, 19.223159790039062, 570.944580078125, -154.42422485351562, 23.320755004882812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000379.npy"} +{"epoch": 0.5565345080763583, "step": 380, "batch_size": 64, "mean": 280.27264404296875, "std": 312.65576171875, "min": -686.41748046875, "p10": -96.67137145996092, "median": 297.5885009765625, "p90": 677.7396240234376, "max": 1137.5849609375, "pos_frac": 0.859375, "sample": [139.24658203125, 120.32478332519531, 38.42173767089844, 1137.5849609375, 76.70503997802734, 105.74938201904297, 145.15235900878906, 378.8880615234375, 376.4533996582031, 339.57073974609375, 167.37734985351562, 377.25091552734375, -52.85358428955078, 602.5140991210938, 836.8309326171875, 484.78265380859375, 391.6104736328125, 405.36456298828125, 502.14056396484375, 177.0609130859375, 410.57672119140625, -244.54833984375, 264.2349853515625, 200.98208618164062, 67.18023681640625, -87.49535369873047, 466.431396484375, 335.8568420410156, 148.77064514160156, 644.2542114257812, 373.7677001953125, 692.0905151367188, 611.3147583007812, 537.8848876953125, 621.3834228515625, 412.8905029296875, 150.52162170410156, -105.39227294921875, 260.9700622558594, 57.83533477783203, 811.8511962890625, 126.98957824707031, -255.71878051757812, 210.118408203125, -203.5263671875, 216.09683227539062, 330.9420166015625, 723.6165771484375, 65.19125366210938, -339.66424560546875, 440.13604736328125, 44.888336181640625, 758.3102416992188, 358.43951416015625, 248.3216552734375, -100.60395050048828, 469.81707763671875, 371.1114807128906, 156.49362182617188, 77.06829833984375, -686.41748046875, 365.97552490234375, 808.3385009765625, 369.98748779296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000380.npy"} +{"epoch": 0.5580029368575624, "step": 381, "batch_size": 64, "mean": 200.75160217285156, "std": 332.6195373535156, "min": -744.9247436523438, "p10": -175.33491058349608, "median": 185.88229370117188, "p90": 606.414959716797, "max": 1028.01220703125, "pos_frac": 0.734375, "sample": [220.40945434570312, 30.821319580078125, 97.25350952148438, 623.8214721679688, 608.0420532226562, 651.5474853515625, 492.59991455078125, -26.466415405273438, 141.75686645507812, 538.8775024414062, 469.4956359863281, 97.02586364746094, 421.7507019042969, 261.7611389160156, 1028.01220703125, -173.61801147460938, 443.997802734375, 410.26922607421875, 678.7286987304688, 190.89761352539062, 906.9341430664062, 602.618408203125, -211.04574584960938, -95.61268615722656, -86.15990447998047, 118.46041870117188, 577.5403442382812, 17.561809539794922, -302.3958435058594, -67.76044464111328, 140.92942810058594, 219.23577880859375, -12.079116821289062, -35.32082748413086, 258.59576416015625, -64.60841369628906, 245.9375, 130.9599609375, 240.27127075195312, -78.57730102539062, 70.66326141357422, 116.80453491210938, 990.5873413085938, 208.84884643554688, 399.7637023925781, 401.4113464355469, 46.99981689453125, -220.0167236328125, 510.3079833984375, -176.0707244873047, 26.69038963317871, 397.85833740234375, -75.93563842773438, 242.7812957763672, 134.43875122070312, -236.03256225585938, 83.60671997070312, 316.5794982910156, 431.9345703125, 180.86697387695312, 233.3417205810547, -744.9247436523438, -604.7066650390625, 399.8363342285156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000381.npy"} +{"epoch": 0.5594713656387665, "step": 382, "batch_size": 64, "mean": 245.24298095703125, "std": 263.3995056152344, "min": -287.45330810546875, "p10": -100.90397720336914, "median": 200.48407745361328, "p90": 638.0592712402346, "max": 941.4036865234375, "pos_frac": 0.8125, "sample": [941.4036865234375, 657.7510375976562, 156.53286743164062, 136.6756591796875, 176.94212341308594, 766.913818359375, -115.6504898071289, 108.8477554321289, 307.307861328125, -144.57260131835938, 147.8003692626953, 254.87554931640625, 376.11376953125, 461.40252685546875, 448.3697509765625, 556.602783203125, 211.9407501220703, 105.44032287597656, 398.7601318359375, 109.93177795410156, 420.79998779296875, -158.1702117919922, 9.736099243164062, 301.2727966308594, 728.9320068359375, -98.9006576538086, 331.95855712890625, -101.76254272460938, 520.4026489257812, 287.51898193359375, 747.95751953125, -131.60594177246094, 23.122684478759766, 306.58612060546875, 400.0605773925781, -287.45330810546875, -153.54135131835938, 459.9126281738281, -15.131698608398438, 280.2368469238281, 178.07073974609375, 206.82962036132812, 355.298583984375, 24.0259952545166, 295.29815673828125, 135.86473083496094, 183.5832061767578, 249.06973266601562, 194.13853454589844, 156.69190979003906, 693.0623779296875, 542.54345703125, 285.26763916015625, -33.850624084472656, 154.23626708984375, 698.8020629882812, 72.27894592285156, -43.676029205322266, 465.5357666015625, 592.11181640625, 135.1707000732422, 99.5052719116211, -3.8585357666015625, 124.2288818359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000382.npy"} +{"epoch": 0.5609397944199707, "step": 383, "batch_size": 64, "mean": 244.95123291015625, "std": 339.1126403808594, "min": -880.06689453125, "p10": -104.36193389892577, "median": 242.6487274169922, "p90": 729.7895812988281, "max": 965.9595336914062, "pos_frac": 0.75, "sample": [-7.995933532714844, -70.6734619140625, 108.91067504882812, -281.94134521484375, 557.4998168945312, -234.25181579589844, -145.74822998046875, 550.0882568359375, 336.2239685058594, -23.800548553466797, 390.9324951171875, 265.31488037109375, 74.7860107421875, -23.940505981445312, -285.03955078125, 245.62594604492188, 952.58544921875, 30.187110900878906, 244.81744384765625, 288.38323974609375, 754.68017578125, -165.12069702148438, 3.5233192443847656, 692.3221435546875, 240.48001098632812, -34.196205139160156, 766.7581787109375, 25.495927810668945, -51.99151611328125, 313.48876953125, 965.9595336914062, 219.10008239746094, 699.0050048828125, 212.20578002929688, 386.60791015625, 385.8321533203125, 455.1837158203125, 32.8612060546875, 245.3697052001953, 296.90045166015625, -78.28277587890625, -20.4591064453125, 234.84408569335938, 77.83998107910156, 395.8525085449219, 722.3719482421875, -106.27703857421875, 575.2769165039062, 46.32655334472656, 270.430419921875, 765.4385375976562, 168.38919067382812, -99.89335632324219, 313.4129638671875, 425.86602783203125, 33.059242248535156, 776.9539794921875, 571.113525390625, 214.07574462890625, 732.9685668945312, -880.06689453125, 111.5821533203125, 453.05682373046875, 556.5699462890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000383.npy"} +{"epoch": 0.5624082232011748, "step": 384, "batch_size": 64, "mean": 191.53524780273438, "std": 223.45127868652344, "min": -335.9676208496094, "p10": -94.43865661621092, "median": 188.42733001708984, "p90": 432.9469818115235, "max": 817.507568359375, "pos_frac": 0.8125, "sample": [182.73651123046875, 43.596343994140625, 817.507568359375, -127.44453430175781, 4.614410400390625, 81.69328308105469, -209.10569763183594, -1.3067474365234375, 719.6851196289062, 200.2911376953125, 434.17242431640625, 425.75958251953125, 203.1385955810547, -227.76773071289062, 371.0234069824219, 107.65702819824219, -68.89631652832031, 61.57302474975586, 353.74127197265625, 362.54339599609375, 305.136962890625, 233.672119140625, 100.43446350097656, 327.28057861328125, 285.1468200683594, 159.1761932373047, 156.14224243164062, -214.43173217773438, -335.9676208496094, 91.35375213623047, 335.6018981933594, 160.69435119628906, 50.920433044433594, 184.7760009765625, 190.12718200683594, -78.3500747680664, 393.8825378417969, 304.3860168457031, 352.00531005859375, 66.67822265625, 10.524883270263672, 236.94398498535156, 321.2498474121094, 498.82354736328125, 501.8600158691406, 333.43603515625, 315.49407958984375, 230.73336791992188, 259.0028381347656, -107.82825469970703, 186.72747802734375, -38.338645935058594, 88.3181381225586, 498.03448486328125, 76.689697265625, -101.3337631225586, -31.395198822021484, 412.0466003417969, 430.0876159667969, 106.57231140136719, 556.6820678710938, 318.7453308105469, 53.933258056640625, 297.3674011230469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000384.npy"} +{"epoch": 0.5638766519823789, "step": 385, "batch_size": 64, "mean": 233.17413330078125, "std": 233.24720764160156, "min": -330.227294921875, "p10": -37.175697326660135, "median": 209.49551391601562, "p90": 569.2040771484376, "max": 743.5350952148438, "pos_frac": 0.875, "sample": [-44.8641357421875, 21.32489776611328, 43.43572235107422, 233.9720458984375, 426.3164367675781, 290.3789367675781, 488.88983154296875, 165.65188598632812, -131.73431396484375, 491.1612548828125, 165.2749481201172, 50.93736267089844, 146.07369995117188, 190.8653106689453, 479.0191650390625, 144.456787109375, 591.2591552734375, 337.54681396484375, 108.98279571533203, 76.15885162353516, 354.1195068359375, 172.3637237548828, -161.03074645996094, 308.08349609375, -232.80560302734375, 274.4310302734375, 208.71022033691406, 640.1564331054688, 92.94941711425781, 32.140625, 341.5028076171875, 240.93016052246094, 134.99734497070312, 743.5350952148438, 722.421630859375, 174.4851837158203, 192.65997314453125, -19.236007690429688, 551.832763671875, 259.4229736328125, 46.73133850097656, 100.83174133300781, 478.6014709472656, 210.2808074951172, 228.36770629882812, 576.64892578125, 424.078369140625, -134.11727905273438, 616.35546875, 341.87091064453125, -330.227294921875, 71.46040344238281, 199.76675415039062, -247.1044921875, 247.2025604248047, 280.68353271484375, 435.3704528808594, 146.33474731445312, 278.4188232421875, 660.621337890625, 134.6044464111328, 343.44744873046875, 192.74066162109375, 313.3978271484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000385.npy"} +{"epoch": 0.5653450807635829, "step": 386, "batch_size": 64, "mean": 257.5692138671875, "std": 305.5406494140625, "min": -430.52032470703125, "p10": -72.94065856933592, "median": 193.34922790527344, "p90": 720.4236755371096, "max": 976.7825317382812, "pos_frac": 0.828125, "sample": [-59.1878547668457, 394.0437316894531, -64.72667694091797, 376.6476745605469, 751.48193359375, 191.7993927001953, 125.33271026611328, 463.5494689941406, 147.50125122070312, 502.7044677734375, 416.7706604003906, 976.7825317382812, -279.19537353515625, -11.102266311645508, 656.0173950195312, 410.890869140625, 320.713623046875, 414.7560119628906, 62.93965148925781, 96.56623077392578, 141.06866455078125, 286.65716552734375, 182.47186279296875, 211.64422607421875, 289.1172790527344, 276.8058166503906, 128.49099731445312, -76.45806884765625, 919.5923461914062, 620.3550415039062, -64.73336791992188, 130.79522705078125, 105.50979614257812, -188.78369140625, 182.06346130371094, 748.0263671875, 897.9326782226562, 404.184814453125, 763.6741943359375, 145.91407775878906, 238.38046264648438, 543.4921875, -237.4397735595703, 171.0349884033203, 46.392547607421875, 141.76046752929688, 134.44789123535156, 194.89906311035156, -184.4878692626953, 389.05029296875, -430.52032470703125, 853.9800415039062, 106.76432037353516, 19.949546813964844, 93.48992919921875, 147.38783264160156, 499.3123779296875, 357.941162109375, -356.74578857421875, 457.9717102050781, 524.3004760742188, 278.9769592285156, 388.3121032714844, 107.16303253173828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000386.npy"} +{"epoch": 0.566813509544787, "step": 387, "batch_size": 64, "mean": 207.89810180664062, "std": 244.35458374023438, "min": -301.1578063964844, "p10": -58.21962394714355, "median": 192.77425384521484, "p90": 512.9523437500001, "max": 841.9522094726562, "pos_frac": 0.796875, "sample": [297.15093994140625, -22.05376434326172, 100.07420349121094, 113.83384704589844, 207.3494873046875, 841.9522094726562, 144.44931030273438, 102.25294494628906, 326.7216796875, 445.9842224121094, -301.1578063964844, 306.4539489746094, 113.14761352539062, 51.3200798034668, -293.60296630859375, 230.58840942382812, 433.9619140625, 593.1119995117188, -106.6071548461914, 179.15585327148438, 748.2015991210938, 62.28803634643555, 203.42605590820312, 774.7796630859375, 387.2255554199219, 193.34848022460938, 149.32931518554688, 231.95260620117188, 57.13397979736328, 139.84375, 28.67804718017578, 192.2000274658203, 522.3807373046875, 96.11994934082031, 543.593017578125, 174.67349243164062, 383.60321044921875, 115.28825378417969, 288.091796875, 311.7821350097656, 37.583396911621094, -8.407572746276855, 631.3526611328125, 440.8039855957031, 339.0694885253906, 128.17413330078125, 374.9512634277344, -54.7116584777832, 490.9527587890625, -59.72303771972656, 373.1767578125, -119.54464721679688, 56.673866271972656, 335.3220520019531, -206.01242065429688, 233.96580505371094, -8.7042236328125, 399.77532958984375, -36.51177978515625, -29.5194091796875, 267.54541015625, -273.24261474609375, 279.47900390625, 345.00311279296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000387.npy"} +{"epoch": 0.5682819383259912, "step": 388, "batch_size": 64, "mean": 283.08306884765625, "std": 327.8969421386719, "min": -527.787109375, "p10": -53.946214294433574, "median": 234.99268341064453, "p90": 689.0857177734379, "max": 1412.7535400390625, "pos_frac": 0.859375, "sample": [1412.7535400390625, 550.676513671875, -84.68357849121094, 517.9152221679688, 528.744873046875, 823.3345336914062, 169.91517639160156, -277.3818054199219, 136.35238647460938, 313.20355224609375, 192.02786254882812, 899.1614990234375, 558.813232421875, 251.42062377929688, -527.787109375, 53.78278350830078, 356.22894287109375, 227.72604370117188, 432.48382568359375, 4.862281799316406, 168.27725219726562, 731.4254150390625, 165.41647338867188, -82.64679718017578, 256.2864685058594, 242.89089965820312, 67.5688705444336, -14.837936401367188, 156.95645141601562, 307.5880432128906, 101.48046112060547, 148.3186492919922, 291.83514404296875, 80.16366577148438, 452.6744384765625, 171.5743408203125, 123.47459411621094, 51.060890197753906, 553.5985717773438, 128.00546264648438, 443.2400207519531, 87.75108337402344, 412.75360107421875, 165.73480224609375, 195.4813232421875, 33.64875030517578, 429.233154296875, 301.7689208984375, -227.21353149414062, 379.0183410644531, 334.36041259765625, 1237.806884765625, -184.08985900878906, 380.878662109375, 242.2593231201172, 29.204559326171875, -62.379241943359375, 504.48089599609375, 755.6453857421875, 778.0792846679688, 590.2930908203125, 463.8860168457031, 219.08352661132812, -34.26914978027344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000388.npy"} +{"epoch": 0.5697503671071953, "step": 389, "batch_size": 64, "mean": 238.16555786132812, "std": 310.4461669921875, "min": -401.635498046875, "p10": -111.81875991821289, "median": 257.6193161010742, "p90": 595.6424255371094, "max": 959.0499267578125, "pos_frac": 0.734375, "sample": [96.79144287109375, 529.0107421875, 358.4903259277344, -376.4403991699219, 70.70294189453125, 345.26922607421875, 177.0730438232422, 585.0016479492188, 441.56829833984375, 278.89422607421875, 234.22055053710938, -112.27408599853516, 346.3475646972656, -206.68460083007812, 174.2251739501953, 492.2109069824219, 158.99273681640625, 472.98126220703125, 419.4048156738281, -87.07821655273438, -283.3432312011719, -20.431255340576172, 600.2027587890625, -18.130619049072266, 288.18719482421875, 647.2435913085938, -401.635498046875, 2.8518829345703125, 124.78728485107422, 142.18896484375, 327.6916198730469, -51.087982177734375, 142.3773956298828, 350.3365783691406, -283.7105407714844, 303.92041015625, 231.3688201904297, 546.2273559570312, 386.70391845703125, 957.2830810546875, 476.93402099609375, -94.68318176269531, 450.9640808105469, 368.5130615234375, 308.5692138671875, 396.14739990234375, -7.065492630004883, 103.18327331542969, 494.0650939941406, 150.6658935546875, 75.0701675415039, 959.0499267578125, -81.09400939941406, -69.91960906982422, 244.5537567138672, 784.7785034179688, -40.938232421875, 900.5466918945312, 322.5286865234375, -110.75633239746094, 341.1141052246094, -209.79312133789062, 270.68487548828125, 817.7382202148438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000389.npy"} +{"epoch": 0.5712187958883994, "step": 390, "batch_size": 64, "mean": 244.15020751953125, "std": 378.6503601074219, "min": -558.407470703125, "p10": -132.84322586059568, "median": 251.31453704833984, "p90": 570.7348266601563, "max": 2224.67236328125, "pos_frac": 0.765625, "sample": [209.01832580566406, 299.86627197265625, 618.7081298828125, 525.67724609375, -558.407470703125, -90.27715301513672, -269.1306457519531, 147.33924865722656, 366.9576416015625, 250.31253051757812, 493.7460021972656, 57.11506652832031, 311.43994140625, 401.6467590332031, 265.04205322265625, 46.208709716796875, -124.41478729248047, 2224.67236328125, -136.45541381835938, 390.11944580078125, 137.14697265625, 557.278076171875, 589.0841064453125, 164.36138916015625, 179.5398406982422, -41.478084564208984, 109.30986785888672, -34.4630126953125, 117.34989929199219, 314.4013671875, 174.61026000976562, -40.729736328125, -204.2188720703125, -102.2171401977539, 168.01129150390625, 542.55322265625, 821.163818359375, 574.2951049804688, -377.55230712890625, 358.6127624511719, 564.28076171875, 650.6452026367188, 413.0697021484375, 135.66015625, 573.5008544921875, 213.9282989501953, 177.4287109375, 71.06044006347656, 514.6548461914062, 416.11273193359375, 487.0969543457031, 282.8057861328125, 332.3093566894531, 356.63531494140625, 410.8187561035156, -30.26784324645996, -18.679840087890625, -264.2267150878906, -523.9175415039062, 252.31654357910156, 449.06646728515625, 87.86083984375, 278.9353942871094, 358.2741394042969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000390.npy"} +{"epoch": 0.5726872246696035, "step": 391, "batch_size": 64, "mean": 143.4707794189453, "std": 291.51275634765625, "min": -695.4203491210938, "p10": -144.86351928710937, "median": 101.611572265625, "p90": 489.07761840820314, "max": 1088.2696533203125, "pos_frac": 0.734375, "sample": [21.704875946044922, 10.465744018554688, 89.6686019897461, -73.65597534179688, 180.61618041992188, 343.0132141113281, 13.927835464477539, 123.19617462158203, 18.813251495361328, 388.5227355957031, -96.38729095458984, 153.8935089111328, -227.11767578125, -70.66167449951172, 58.49772644042969, 35.87355041503906, 380.9698791503906, 128.72784423828125, 22.246973037719727, -116.14398193359375, 237.80503845214844, 486.2498474121094, 558.4786376953125, 490.2895202636719, -107.64881134033203, 69.71342468261719, 1088.2696533203125, -205.45225524902344, -114.14645385742188, 310.14129638671875, 332.369873046875, -141.86929321289062, 83.98114013671875, 41.16495132446289, -80.73754119873047, 234.4356231689453, 108.92855072021484, -44.582481384277344, 517.0911254882812, 149.649169921875, 94.29459381103516, -184.64010620117188, 161.69146728515625, 241.50045776367188, 708.759765625, -252.17221069335938, 623.500244140625, -30.016530990600586, -534.3244018554688, 20.033042907714844, 54.63731384277344, 281.6734313964844, -146.14675903320312, 396.84503173828125, 419.4859619140625, 68.88972473144531, 789.6134033203125, 382.045166015625, 342.6475830078125, -695.4203491210938, 257.021240234375, 239.87286376953125, 297.4112548828125, 244.6248321533203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000391.npy"} +{"epoch": 0.5741556534508077, "step": 392, "batch_size": 64, "mean": 280.9255065917969, "std": 280.8028259277344, "min": -174.76478576660156, "p10": -44.60391311645504, "median": 230.9010467529297, "p90": 652.6270568847656, "max": 1155.8701171875, "pos_frac": 0.859375, "sample": [-3.9802474975585938, 181.3011474609375, -150.41835021972656, 10.66402816772461, 210.90867614746094, -88.79447174072266, 651.0838012695312, 331.358154296875, 369.4180908203125, 381.98565673828125, 653.2884521484375, -174.76478576660156, 273.677734375, 149.26388549804688, 174.69761657714844, 34.3961181640625, 730.3582153320312, 750.6483154296875, 467.70684814453125, 200.8350372314453, -90.80706024169922, 390.34844970703125, 483.2127380371094, 46.76820373535156, 373.46136474609375, 420.04302978515625, 220.44166564941406, 1155.8701171875, -61.200164794921875, 244.2349853515625, 182.42857360839844, 299.92041015625, 438.35040283203125, 179.53189086914062, 558.056884765625, 241.3604278564453, 333.103515625, 172.7046661376953, 211.31434631347656, 153.91314697265625, 353.4683837890625, 437.622314453125, 68.62335205078125, -5.879325866699219, 545.0020141601562, 914.374267578125, 422.298583984375, 483.2298889160156, 394.24090576171875, -167.708740234375, 124.81707000732422, 86.05961608886719, 81.09140014648438, 363.16729736328125, 168.958740234375, 134.96702575683594, 298.18218994140625, 28.945629119873047, 1053.11572265625, 42.82368469238281, -105.23360443115234, 77.8739013671875, 320.0539855957031, 752.44677734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000392.npy"} +{"epoch": 0.5756240822320118, "step": 393, "batch_size": 64, "mean": 219.51449584960938, "std": 346.0307312011719, "min": -339.4408874511719, "p10": -196.30450592041012, "median": 236.0550765991211, "p90": 594.124890136719, "max": 1602.287353515625, "pos_frac": 0.71875, "sample": [279.100830078125, 715.1239013671875, -24.39523696899414, -93.93289947509766, 441.8927001953125, 516.2933959960938, -161.06869506835938, 357.94659423828125, 319.77862548828125, 240.0919189453125, 83.84674072265625, 271.7864990234375, 241.513427734375, 344.072021484375, 333.793212890625, 63.888587951660156, 1104.3759765625, 918.1622314453125, 665.8209228515625, -6.969390869140625, 232.0182342529297, 487.0169677734375, -236.06460571289062, -287.406005859375, -122.65971374511719, 8.505786895751953, -212.58815002441406, 227.79470825195312, 303.79583740234375, 281.64031982421875, 183.36720275878906, 444.39141845703125, 449.45904541015625, 240.62960815429688, 529.1883544921875, 336.2661437988281, 261.6465148925781, 111.80081939697266, 402.7685546875, -111.78639221191406, 17.419143676757812, 165.55813598632812, 656.0034790039062, -204.52096557617188, -233.07662963867188, 525.3353271484375, 293.6343994140625, -207.70114135742188, 10.420677185058594, 165.8975830078125, 116.48542022705078, 225.05006408691406, -177.1327667236328, -134.11688232421875, -153.93838500976562, 385.6432189941406, 421.33038330078125, -339.4408874511719, 309.35809326171875, 51.185646057128906, -112.91295623779297, 1602.287353515625, 621.954833984375, -96.70022583007812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000393.npy"} +{"epoch": 0.5770925110132159, "step": 394, "batch_size": 64, "mean": 234.3231964111328, "std": 306.7842102050781, "min": -351.5914001464844, "p10": -113.06614456176757, "median": 166.17357635498047, "p90": 640.5941345214845, "max": 1128.50048828125, "pos_frac": 0.796875, "sample": [104.43675231933594, 331.6502685546875, -201.2057647705078, 427.1675720214844, 184.15682983398438, -151.41510009765625, 47.7872200012207, 92.28495788574219, 530.6717529296875, 78.61874389648438, 549.2027587890625, -17.29131317138672, -351.5914001464844, 86.95597839355469, -51.70713806152344, -180.1328125, -115.56409454345703, 781.3744506835938, 677.1370849609375, -170.04637145996094, 557.9364013671875, -123.73126220703125, 299.53076171875, 276.9918518066406, 172.5709686279297, 371.70916748046875, 142.11598205566406, 91.18904113769531, -52.75493621826172, 605.9017944335938, 578.9160766601562, 751.53173828125, -107.23759460449219, 362.9803161621094, 110.93893432617188, 46.0357666015625, 147.57318115234375, 49.78789520263672, 133.5328369140625, 655.4622802734375, 23.83771514892578, 538.510009765625, 2.337512969970703, 209.4259490966797, 101.29026794433594, 78.20452880859375, 505.8414611816406, 877.6790771484375, 250.8811798095703, 175.94500732421875, 452.6968994140625, 283.01214599609375, 1128.50048828125, 1071.2552490234375, 352.1962890625, 42.995933532714844, 76.31669616699219, -70.0396728515625, 202.11026000976562, 411.5657043457031, 193.99691772460938, -73.49305725097656, 159.77618408203125, 278.3706359863281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000394.npy"} +{"epoch": 0.57856093979442, "step": 395, "batch_size": 64, "mean": 250.46954345703125, "std": 322.7596740722656, "min": -464.3038635253906, "p10": -118.84705352783202, "median": 223.5328369140625, "p90": 738.2884338378907, "max": 999.7794799804688, "pos_frac": 0.765625, "sample": [468.6614074707031, -124.2667236328125, 221.20916748046875, 58.664154052734375, 263.36480712890625, 319.1596374511719, 348.7772521972656, 223.74008178710938, -269.1724548339844, 257.3231506347656, -132.0080108642578, 423.14288330078125, 372.4194030761719, -21.13477325439453, 157.61666870117188, -9.032093048095703, 717.1422729492188, 312.6981506347656, 932.37646484375, -57.83570098876953, 640.302734375, 26.282325744628906, 537.1184692382812, -135.1108856201172, 223.32559204101562, 9.433868408203125, 164.30093383789062, 398.876708984375, -57.63488006591797, 61.224021911621094, 805.94580078125, 999.7794799804688, 478.015869140625, 139.38580322265625, 358.3771057128906, 51.35577392578125, 158.35195922851562, 327.8277587890625, 905.8201904296875, 157.878173828125, 509.20318603515625, 505.4344177246094, -464.3038635253906, -38.44990539550781, -266.0074768066406, 349.98779296875, -81.14793395996094, 810.2059936523438, 35.353919982910156, -106.20115661621094, 103.66998291015625, -185.69100952148438, 71.86946868896484, 977.056884765625, 747.35107421875, 247.18821716308594, 279.513916015625, 527.5911865234375, 235.7212371826172, 468.1365661621094, 475.0685119628906, 66.05370330810547, 106.3882064819336, -57.64393997192383], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000395.npy"} +{"epoch": 0.580029368575624, "step": 396, "batch_size": 64, "mean": 222.18008422851562, "std": 262.3597717285156, "min": -473.63922119140625, "p10": -21.007541275024412, "median": 160.66656494140625, "p90": 552.2312316894532, "max": 1005.7598876953125, "pos_frac": 0.859375, "sample": [165.8890838623047, 634.4820556640625, 104.06417083740234, 186.31341552734375, 98.82327270507812, 568.0836791992188, 141.148681640625, 170.30587768554688, 155.5865478515625, 285.6465759277344, 384.0951232910156, 301.8455810546875, 223.1446533203125, 430.0435485839844, -135.19302368164062, 154.1389617919922, 191.50711059570312, 3.4055328369140625, 90.98614501953125, 104.97716522216797, 82.02600860595703, 377.85748291015625, 874.0798950195312, 290.17022705078125, 46.103851318359375, 460.2439270019531, 283.52423095703125, 111.00347900390625, 195.4559326171875, 656.2540283203125, -18.650806427001953, 365.80462646484375, 165.74658203125, 274.2720031738281, 63.99092483520508, 153.7733917236328, 515.2421875, -202.10018920898438, 484.9268798828125, 695.6156005859375, 306.978271484375, 5.3548583984375, -75.38623046875, 506.5171203613281, -3.88726806640625, -64.19052124023438, 506.96856689453125, -22.01757049560547, 20.402677536010742, 249.63784790039062, 892.4174194335938, 99.42127227783203, 16.16131591796875, -80.23699951171875, 70.02763366699219, 260.85906982421875, 306.126708984375, 131.17404174804688, 135.27908325195312, -473.63922119140625, 102.65275573730469, 1005.7598876953125, 35.46923828125, 153.04229736328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000396.npy"} +{"epoch": 0.5814977973568282, "step": 397, "batch_size": 64, "mean": 291.03643798828125, "std": 300.57586669921875, "min": -300.70391845703125, "p10": -41.13445091247558, "median": 254.6646270751953, "p90": 698.1957153320313, "max": 1055.1917724609375, "pos_frac": 0.875, "sample": [20.70761489868164, 568.5009765625, 9.005157470703125, 369.4015197753906, 169.694091796875, 95.17887878417969, 261.9015197753906, 322.2669982910156, 493.08349609375, 178.4636688232422, 688.91162109375, -43.41328430175781, 641.8585815429688, -86.25518798828125, 145.2301025390625, 355.9087219238281, 210.43450927734375, -229.40029907226562, 461.9578857421875, 689.0458984375, 266.02313232421875, 753.6655883789062, 333.08087158203125, 447.62481689453125, 1055.1917724609375, 4.7892303466796875, 722.4332275390625, 454.5137939453125, -123.50595092773438, 132.16954040527344, 561.937744140625, 287.95355224609375, 116.58163452148438, 883.5802001953125, 448.7090148925781, 177.9840087890625, 67.79679870605469, 339.42877197265625, 205.2813720703125, 172.09747314453125, -300.70391845703125, -164.07833862304688, 43.36736297607422, -228.72933959960938, 531.029052734375, 163.2978057861328, 18.83899688720703, 133.97451782226562, -35.81717300415039, 533.3922119140625, 37.50400924682617, 702.1170654296875, 184.43411254882812, 1030.738037109375, 149.23983764648438, 879.4898681640625, 61.81785583496094, 247.427734375, 521.65478515625, 151.3343505859375, 294.9918212890625, 322.060302734375, 415.501220703125, 303.630615234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000397.npy"} +{"epoch": 0.5829662261380323, "step": 398, "batch_size": 64, "mean": 261.36553955078125, "std": 347.5559387207031, "min": -360.8876953125, "p10": -103.1951965332031, "median": 158.23501586914062, "p90": 638.6141357421875, "max": 1333.5487060546875, "pos_frac": 0.765625, "sample": [151.625732421875, 90.33967590332031, 433.4425354003906, 696.8462524414062, -123.14781188964844, 57.213294982910156, 297.96502685546875, -334.62713623046875, 91.83793640136719, -110.45330047607422, 570.2716674804688, -360.8876953125, 632.2337646484375, 336.16180419921875, 463.0249328613281, 1039.208984375, 164.9073028564453, 28.28185272216797, 626.106201171875, 312.80010986328125, 567.2985229492188, 146.40682983398438, 529.760498046875, 475.7884826660156, 1183.4969482421875, -12.94268798828125, 175.799560546875, 144.36680603027344, 635.456787109375, 417.32330322265625, 69.71112060546875, 193.57504272460938, -113.25201416015625, -67.80192565917969, 331.8273010253906, 102.86152648925781, -5.637367248535156, 415.969970703125, 25.155128479003906, 447.86572265625, 141.71934509277344, 448.9021911621094, 138.4049072265625, 69.51154327392578, 135.97988891601562, -51.49585723876953, 408.46240234375, 10.169647216796875, -64.64497375488281, 58.065399169921875, 164.84429931640625, 1333.5487060546875, 639.96728515625, 794.1593017578125, -332.0350036621094, 544.953125, 446.12701416015625, 508.4321594238281, 842.2601318359375, 149.9373779296875, -52.43804931640625, -28.633056640625, -86.2596206665039, -218.72412109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000398.npy"} +{"epoch": 0.5844346549192364, "step": 399, "batch_size": 64, "mean": 234.83462524414062, "std": 333.4617614746094, "min": -433.8165283203125, "p10": -203.25854492187494, "median": 199.69983673095703, "p90": 643.2350463867189, "max": 1081.234130859375, "pos_frac": 0.75, "sample": [130.82443237304688, 291.251708984375, 93.09716033935547, -276.347900390625, 623.6239013671875, -73.59261322021484, 650.877685546875, 59.04716110229492, 147.52706909179688, -41.8973388671875, 281.3778991699219, -339.3339538574219, 708.2813110351562, -54.93609619140625, 625.4022216796875, 186.22096252441406, 333.0123291015625, -137.38153076171875, -21.1601619720459, 67.25961303710938, 703.59814453125, 140.58932495117188, 382.6221923828125, 228.01869201660156, 573.9860229492188, 798.446533203125, 198.60708618164062, 414.79559326171875, -301.9312744140625, 253.10702514648438, 23.383132934570312, 485.0942077636719, 187.29916381835938, 161.7119598388672, 423.0662536621094, -228.0008544921875, 1081.234130859375, 530.8145751953125, 581.1339721679688, -282.09881591796875, 321.6894226074219, 480.0764465332031, 190.25184631347656, -433.8165283203125, -54.342708587646484, 426.791015625, 36.004608154296875, 930.1058349609375, 55.32744598388672, -145.5264892578125, 446.4872741699219, 938.0762329101562, 488.10455322265625, 200.79258728027344, 559.3641967773438, 201.4669647216797, 186.36898803710938, 236.17617797851562, -360.4336853027344, 281.4293518066406, 469.2156982421875, -80.19093322753906, 162.62521362304688, -115.25817108154297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000399.npy"} +{"epoch": 0.5859030837004405, "step": 400, "batch_size": 64, "mean": 333.2471008300781, "std": 307.8584899902344, "min": -553.0962524414062, "p10": -45.20373001098633, "median": 318.1609344482422, "p90": 691.7175659179687, "max": 1122.758544921875, "pos_frac": 0.84375, "sample": [264.34478759765625, 469.9415283203125, 413.78955078125, 248.77761840820312, 573.9605712890625, -113.49929809570312, 510.51568603515625, 224.58428955078125, -42.74071502685547, 577.1532592773438, 200.40121459960938, 94.83600616455078, 506.7387390136719, 279.841552734375, -553.0962524414062, 718.5036010742188, 658.492919921875, 184.08750915527344, 406.1177978515625, 411.39520263671875, 824.3449096679688, 519.4637451171875, 3.067535400390625, 368.5710144042969, 72.91735076904297, 320.09539794921875, 113.63444519042969, 556.90087890625, 310.742919921875, 397.5587158203125, 377.4891662597656, 219.04962158203125, -93.66471099853516, 416.9796142578125, 224.95242309570312, -259.307373046875, -8.154273986816406, 764.4022216796875, 316.2264709472656, 550.2304077148438, -42.14277648925781, 214.2261962890625, 303.8043212890625, 975.8126220703125, 31.033388137817383, 637.2333374023438, 294.9900207519531, 197.2807159423828, 19.947101593017578, 1122.758544921875, 674.80615234375, -46.259307861328125, 222.28448486328125, 467.6524658203125, 322.634765625, 654.4340209960938, -211.3732452392578, 694.2008056640625, 655.3745727539062, 425.232421875, 295.91754150390625, 685.92333984375, 750.159912109375, -47.76280212402344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000400.npy"} +{"epoch": 0.5873715124816447, "step": 401, "batch_size": 64, "mean": 190.42982482910156, "std": 398.49053955078125, "min": -878.6638793945312, "p10": -247.5817749023437, "median": 154.94528198242188, "p90": 769.1032775878908, "max": 1126.476806640625, "pos_frac": 0.6875, "sample": [156.1666717529297, 514.6387939453125, -74.05412292480469, 224.6944580078125, 531.4614868164062, 76.50618743896484, -335.8830871582031, -379.71600341796875, -878.6638793945312, -67.98155212402344, 99.28887176513672, 215.22250366210938, -207.42901611328125, 73.5391616821289, -28.265533447265625, -39.74341583251953, 334.516357421875, 286.3454895019531, 163.37832641601562, 13.881729125976562, -105.57803344726562, -190.0723114013672, -77.54206085205078, 1026.965576171875, 28.06122398376465, 690.9221801757812, 1002.4436645507812, 467.47235107421875, 795.3634033203125, 153.72389221191406, 84.07015991210938, 280.6824951171875, -29.66353988647461, 53.40717315673828, -117.4070053100586, -512.0767822265625, 976.920166015625, -63.35493087768555, 220.33425903320312, 567.6861572265625, 192.0840301513672, 528.9927978515625, 368.78326416015625, -65.5208740234375, 299.472900390625, 191.67010498046875, 187.07630920410156, -264.79010009765625, 1126.476806640625, -522.0569458007812, 648.72998046875, 479.70703125, 344.0799255371094, 78.42211151123047, 867.797607421875, 871.4588623046875, 45.178627014160156, 375.7969055175781, 4.84820556640625, 64.732421875, 207.25433349609375, 707.8296508789062, -279.8056945800781, -200.9703826904297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000401.npy"} +{"epoch": 0.5888399412628488, "step": 402, "batch_size": 64, "mean": 350.07647705078125, "std": 310.92864990234375, "min": -286.915283203125, "p10": 13.901937103271486, "median": 291.2509307861328, "p90": 781.9913330078126, "max": 1378.5826416015625, "pos_frac": 0.90625, "sample": [413.14605712890625, 27.525007247924805, 13.303916931152344, 470.03997802734375, 225.17123413085938, 437.950439453125, -6.7924346923828125, 305.5844421386719, -51.553428649902344, 655.320556640625, 384.76153564453125, 581.5332641601562, 658.25927734375, 200.1415557861328, -58.79718017578125, 596.655517578125, 35.22581481933594, 295.3526916503906, 240.78086853027344, 432.3703308105469, 560.3001098632812, 208.37490844726562, 573.3370971679688, -286.915283203125, 377.6435546875, 545.9636840820312, 71.2808837890625, 942.214599609375, 15.297317504882812, 924.0960693359375, 222.62371826171875, 344.044921875, 175.1098175048828, 506.38629150390625, 89.98310089111328, 275.2429504394531, 905.4481201171875, 133.24142456054688, 155.83226013183594, 692.8585205078125, 27.974075317382812, -68.90318298339844, 791.330810546875, 922.21923828125, 259.7587585449219, 473.6138000488281, 760.19921875, 150.45790100097656, 286.7706604003906, 1378.5826416015625, 427.15234375, 589.39501953125, 25.348356246948242, 881.1107177734375, 250.7064208984375, 302.748046875, 336.8082275390625, 283.2319030761719, 124.42941284179688, -181.44979858398438, 383.03765869140625, 194.38970947265625, 230.489501953125, 287.149169921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000402.npy"} +{"epoch": 0.5903083700440529, "step": 403, "batch_size": 64, "mean": 271.0248107910156, "std": 309.0369873046875, "min": -350.03155517578125, "p10": -115.38826980590818, "median": 244.04527282714844, "p90": 674.981463623047, "max": 1226.6429443359375, "pos_frac": 0.84375, "sample": [245.92385864257812, 74.65848541259766, 88.77985382080078, 192.49560546875, 310.5081481933594, 36.3416633605957, 242.16668701171875, 373.30987548828125, 2.8540706634521484, -244.2008514404297, 182.40057373046875, -9.092010498046875, -350.03155517578125, 582.267578125, 153.89048767089844, 890.7431640625, 257.12127685546875, 39.833961486816406, 408.826416015625, 225.04310607910156, 421.45623779296875, 81.55484008789062, 719.623779296875, -88.21558380126953, -127.68233489990234, 565.2658081054688, 476.40545654296875, 697.0963745117188, 283.3726501464844, 902.6312255859375, -127.03370666503906, 487.8942565917969, 623.3800048828125, 61.573665618896484, 11.272209167480469, 495.2685852050781, -150.05941772460938, 262.9589538574219, 619.5907592773438, 501.68011474609375, -171.98643493652344, 289.7355041503906, 1226.6429443359375, 16.382619857788086, 564.5010375976562, 136.45823669433594, 387.0745544433594, 758.6612548828125, 403.00799560546875, 910.02978515625, 319.9859313964844, 444.52081298828125, 362.1622009277344, 129.92576599121094, -34.92950439453125, 36.50019836425781, 35.142234802246094, 350.0599365234375, 46.47087097167969, 94.31657409667969, 350.5151062011719, 227.17886352539062, -127.81564331054688, 169.17184448242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000403.npy"} +{"epoch": 0.591776798825257, "step": 404, "batch_size": 64, "mean": 267.9746398925781, "std": 372.95635986328125, "min": -550.493408203125, "p10": -222.3148452758789, "median": 274.17657470703125, "p90": 732.0711730957031, "max": 1328.7203369140625, "pos_frac": 0.765625, "sample": [-287.34698486328125, 294.48681640625, 65.50267791748047, 622.6806640625, 554.3267211914062, 187.54476928710938, 216.1732940673828, 1024.1942138671875, 175.903564453125, 819.36181640625, 1328.7203369140625, 242.77342224121094, 314.09051513671875, 860.840087890625, -108.04926300048828, -378.179443359375, 902.1763305664062, 110.93157196044922, -298.07550048828125, 146.1975555419922, -54.43320083618164, -28.084678649902344, -85.93818664550781, 430.1328125, 265.9083251953125, 316.8669128417969, -198.13677978515625, 457.03741455078125, 282.44482421875, 365.4678039550781, 528.4534912109375, -274.9346618652344, 411.4967041015625, 112.44364166259766, 615.917724609375, 109.4330062866211, -187.7212371826172, 175.40977478027344, 333.3555908203125, 508.37646484375, -550.493408203125, 436.04351806640625, 93.88374328613281, 664.8273315429688, 242.14974975585938, -300.383056640625, -217.2865753173828, 520.8378295898438, 611.8355712890625, -122.27412414550781, 412.9123229980469, 3.4461288452148438, 409.2147216796875, 410.0732421875, 525.3634033203125, 734.0941772460938, 313.8700866699219, 83.15986633300781, 23.897924423217773, -224.46981811523438, 932.0772094726562, 158.31884765625, 727.350830078125, 384.179443359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000404.npy"} +{"epoch": 0.593245227606461, "step": 405, "batch_size": 64, "mean": 220.505615234375, "std": 387.6534118652344, "min": -513.6572265625, "p10": -265.24851226806635, "median": 176.62430572509766, "p90": 810.3806945800783, "max": 1164.2232666015625, "pos_frac": 0.71875, "sample": [840.9589233398438, 829.7310791015625, 232.60385131835938, -218.9597625732422, -464.9722900390625, -43.727142333984375, -431.380126953125, 95.70773315429688, 398.96142578125, -208.326416015625, 51.88860321044922, 1126.85302734375, 546.44091796875, 125.12472534179688, 87.11522674560547, 1164.2232666015625, -46.5892333984375, -50.491920471191406, 43.12493896484375, -156.020751953125, 116.4321060180664, 236.12220764160156, 114.91325378417969, 542.5643920898438, 176.24563598632812, 180.49017333984375, -154.07789611816406, 580.89599609375, -334.350341796875, -44.704856872558594, 419.1943664550781, 5.791387557983398, 448.69744873046875, 53.95911407470703, 415.00885009765625, 138.614501953125, 276.7876281738281, -320.38629150390625, 38.055641174316406, 42.20230484008789, 561.0447387695312, 193.51077270507812, 686.506103515625, 350.4693298339844, -55.04452896118164, 182.83840942382812, 616.031494140625, -209.64447021484375, 247.28115844726562, 302.674072265625, 390.43914794921875, -285.0865478515625, 177.0029754638672, 915.714111328125, 75.51932525634766, -513.6572265625, 499.833740234375, 329.9866638183594, -57.22320556640625, 969.337890625, 880.8406982421875, -297.18829345703125, 531.2208251953125, 765.2297973632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000405.npy"} +{"epoch": 0.5947136563876652, "step": 406, "batch_size": 64, "mean": 287.32806396484375, "std": 351.7142333984375, "min": -378.40216064453125, "p10": -73.98970794677734, "median": 215.78164672851562, "p90": 736.6491516113282, "max": 1270.327880859375, "pos_frac": 0.78125, "sample": [486.55230712890625, 146.3798828125, 275.2457580566406, 76.61575317382812, 146.98974609375, 277.02593994140625, 168.74105834960938, -371.1181640625, 1192.0693359375, 153.8802490234375, 527.738037109375, 204.4186248779297, 1270.327880859375, 563.4049072265625, 284.60699462890625, -51.5375862121582, 551.3724365234375, 309.3091735839844, 566.2672119140625, -136.8605194091797, 900.49169921875, 360.1121520996094, 343.27557373046875, -107.60279846191406, 109.9540786743164, 174.0177764892578, 10.80902099609375, -74.13177490234375, 515.6406860351562, 10.087799072265625, -73.65821838378906, 222.77781677246094, 512.9388427734375, -294.73681640625, -176.32171630859375, 469.17535400390625, 888.841064453125, 215.8262939453125, 150.7374725341797, -12.18482780456543, 161.19281005859375, 205.61642456054688, 476.1393127441406, -35.162960052490234, -67.75942993164062, 17.742591857910156, 710.0953369140625, 351.3290100097656, 34.62937927246094, 215.73699951171875, 636.6580200195312, 693.2188720703125, 480.64776611328125, 63.34452819824219, -378.40216064453125, -0.15331649780273438, 804.2437133789062, 520.8425903320312, 1067.3475341796875, 420.722412109375, 353.39495849609375, 748.0293579101562, 134.84397888183594, -12.781944274902344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000406.npy"} +{"epoch": 0.5961820851688693, "step": 407, "batch_size": 64, "mean": 177.00746154785156, "std": 330.871337890625, "min": -879.1386108398438, "p10": -249.5128616333008, "median": 155.22250366210938, "p90": 581.2775146484375, "max": 840.6063842773438, "pos_frac": 0.71875, "sample": [-309.63287353515625, 537.818603515625, 572.6851806640625, -338.6717834472656, -268.3294677734375, -93.31368255615234, 240.8921356201172, -252.98104858398438, -261.5535583496094, 260.859375, 402.9456787109375, 106.70256042480469, 380.90582275390625, 776.760986328125, 626.0592041015625, 38.05281066894531, -97.37612915039062, 474.11639404296875, 103.37818145751953, -106.53843688964844, 429.2570495605469, 131.27587890625, 71.35865783691406, 116.29739379882812, 86.15879821777344, 491.9908447265625, 251.59567260742188, 240.5286865234375, 100.17298889160156, 141.1291046142578, -225.86660766601562, 203.50607299804688, 13.099578857421875, -98.31168365478516, 47.484886169433594, 138.40480041503906, 488.5469665527344, 805.5657958984375, 95.46064758300781, -879.1386108398438, 736.3140258789062, 368.30364990234375, 392.50140380859375, -33.280296325683594, 415.033935546875, 188.5054168701172, 183.8087158203125, 840.6063842773438, 328.2004699707031, 353.7787170410156, 574.6029052734375, -187.8848876953125, -93.74609375, 584.1380615234375, -28.54230499267578, 123.1031723022461, 169.31590270996094, 489.1383361816406, -89.42976379394531, -410.392333984375, 677.1944580078125, 239.6044464111328, -241.42042541503906, 307.7260437011719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000407.npy"} +{"epoch": 0.5976505139500734, "step": 408, "batch_size": 64, "mean": 205.55117797851562, "std": 269.80572509765625, "min": -282.23492431640625, "p10": -147.2392837524414, "median": 221.0392837524414, "p90": 597.2308349609377, "max": 801.7899169921875, "pos_frac": 0.75, "sample": [36.02021789550781, 694.9895629882812, 801.7899169921875, -0.291015625, 406.8426513671875, 361.32525634765625, -63.26708984375, 180.75978088378906, -93.464599609375, -160.1418914794922, -24.22230339050293, -273.6051025390625, 110.7087173461914, -67.754150390625, 16.873512268066406, -282.23492431640625, 126.41294860839844, 278.9280090332031, 241.51077270507812, 78.14811706542969, 179.6875762939453, 354.87908935546875, -112.311279296875, 419.3454895019531, -138.8547821044922, 235.37063598632812, -9.607950210571289, 249.57901000976562, 78.91890716552734, 733.1842651367188, 403.054931640625, 235.27188110351562, 521.7633056640625, -5.5869293212890625, -259.92425537109375, -234.48208618164062, 261.5597229003906, 276.6142578125, 59.59111022949219, 219.63735961914062, 545.9425048828125, 438.2306213378906, 369.1278076171875, 475.62469482421875, 165.5209503173828, -223.63723754882812, 682.845947265625, 34.798179626464844, 91.73912048339844, 82.99349975585938, 107.6988525390625, 661.9921264648438, 271.1365661621094, -150.8326416015625, 320.20257568359375, 329.9158630371094, 45.651405334472656, 461.6690673828125, 222.4412078857422, 282.80352783203125, 691.2996826171875, 619.2115478515625, 471.9222412109375, 319.95819091796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000408.npy"} +{"epoch": 0.5991189427312775, "step": 409, "batch_size": 64, "mean": 201.48634338378906, "std": 295.236572265625, "min": -427.4471130371094, "p10": -97.96447067260742, "median": 152.90426635742188, "p90": 549.7907165527344, "max": 1020.2586669921875, "pos_frac": 0.75, "sample": [-39.152496337890625, -368.25567626953125, 314.3778381347656, 224.45436096191406, 198.84703063964844, 350.6834716796875, -98.02057647705078, -97.83355712890625, -57.957122802734375, 23.146163940429688, 517.9061889648438, 547.9171142578125, -335.5121154785156, 159.90609741210938, 454.00238037109375, 133.47085571289062, 206.00369262695312, 267.1385803222656, 145.90243530273438, 812.572265625, 312.665283203125, -167.66421508789062, 283.64208984375, 26.698162078857422, 694.2183227539062, -112.35692596435547, -7.673377990722656, 51.96290588378906, -83.56295776367188, 142.1759490966797, 426.80157470703125, 771.1817626953125, 517.1396484375, -427.4471130371094, 58.60254669189453, 1020.2586669921875, -13.502647399902344, 97.41604614257812, -348.1221923828125, 141.3125, 536.6618041992188, -46.903038024902344, 550.5936889648438, 97.54823303222656, 259.2742919921875, 230.00830078125, 80.2065200805664, 133.6182403564453, 757.0858154296875, 511.19635009765625, -36.54840850830078, 189.3552703857422, -56.10084533691406, 176.6197967529297, 211.2155303955078, 452.6468505859375, 362.8457336425781, 319.75634765625, 52.405303955078125, 76.17301940917969, 754.0859375, 276.5916748046875, 130.059326171875, 133.3875732421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000409.npy"} +{"epoch": 0.6005873715124816, "step": 410, "batch_size": 64, "mean": 247.55059814453125, "std": 306.2297058105469, "min": -579.2340087890625, "p10": -110.66102981567381, "median": 272.3592071533203, "p90": 635.5991455078126, "max": 896.58251953125, "pos_frac": 0.8125, "sample": [188.92633056640625, 318.46051025390625, -114.07847595214844, 79.92716217041016, 552.3150634765625, 345.8554992675781, 528.7701416015625, 475.8417053222656, 499.0319519042969, 8.080389022827148, 124.25013732910156, 281.77850341796875, 12.060836791992188, 454.10931396484375, -102.6869888305664, -191.794677734375, 647.1859130859375, -451.31475830078125, -94.02908325195312, 349.25628662109375, 486.642333984375, 769.2391357421875, -116.78721618652344, 41.60957336425781, 600.1861572265625, 697.1895751953125, 512.2190551757812, 33.718196868896484, 304.02337646484375, 896.58251953125, 309.2027587890625, 31.38143539428711, 346.3968505859375, 37.047821044921875, 402.7340087890625, -84.26773071289062, -51.15515899658203, -13.207174301147461, 134.453369140625, -135.03497314453125, 216.3352508544922, 549.547607421875, 163.70318603515625, 379.51446533203125, 201.27725219726562, -579.2340087890625, 379.3644104003906, 245.79554748535156, 825.5021362304688, 341.7490234375, 107.18600463867188, 31.754430770874023, 477.64434814453125, 678.8931884765625, 353.27752685546875, 756.384765625, 150.35060119628906, 364.0002746582031, -374.6868896484375, 164.02879333496094, 262.9399108886719, 39.170188903808594, 608.5633544921875, 386.0577087402344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000410.npy"} +{"epoch": 0.6020558002936858, "step": 411, "batch_size": 64, "mean": 234.196533203125, "std": 351.9522399902344, "min": -575.6903686523438, "p10": -120.88887786865232, "median": 162.95513916015625, "p90": 715.8546752929689, "max": 1392.0137939453125, "pos_frac": 0.75, "sample": [245.20208740234375, -7.225227355957031, -135.12802124023438, 119.82576751708984, 678.6229248046875, 106.48882293701172, 94.16891479492188, 322.01373291015625, 355.77069091796875, 402.127685546875, 134.36581420898438, -22.241342544555664, 154.1796875, -8.973751068115234, 19.790319442749023, -2.436830520629883, 538.310546875, 269.50787353515625, 85.44305419921875, 418.7723693847656, 877.7608032226562, 364.31976318359375, -244.551025390625, -22.558584213256836, 171.89022827148438, 157.41075134277344, 55.19789123535156, 186.25804138183594, 112.68966674804688, 1074.2210693359375, 144.68093872070312, 446.7810363769531, 861.0170288085938, -73.91454315185547, 516.4921264648438, 39.70771026611328, 312.72003173828125, 691.3140869140625, 765.3696899414062, 179.91839599609375, 10.827901840209961, 366.0824890136719, 114.70478057861328, 273.25177001953125, 152.0394287109375, 314.4179992675781, -57.10053634643555, -575.6903686523438, 1392.0137939453125, 270.4627380371094, 168.49952697753906, 361.6409606933594, 632.9580078125, 726.3720703125, 661.5037231445312, -261.67431640625, -29.52649688720703, -508.55535888671875, 182.64755249023438, -99.10140991210938, 780.8028564453125, -143.36895751953125, 0.2870941162109375, -130.2263641357422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000411.npy"} +{"epoch": 0.6035242290748899, "step": 412, "batch_size": 64, "mean": 250.42259216308594, "std": 323.71832275390625, "min": -524.0042114257812, "p10": -120.01763534545896, "median": 212.79204559326172, "p90": 638.8585632324221, "max": 1237.79638671875, "pos_frac": 0.796875, "sample": [-148.03524780273438, 183.47174072265625, 459.7098388671875, 463.30987548828125, 403.32513427734375, -94.60655975341797, 692.1907348632812, -34.88086700439453, 157.46890258789062, 539.1175537109375, 122.8829345703125, 356.0776672363281, 47.0369873046875, 371.90667724609375, 13.767011642456055, 546.7988891601562, 381.59954833984375, 84.9145278930664, -524.0042114257812, 202.89886474609375, 53.88434600830078, 14.915321350097656, 580.755615234375, 203.09767150878906, 82.01344299316406, 381.9548034667969, 10.984771728515625, 360.2105407714844, -177.77835083007812, 222.48641967773438, -81.52227020263672, 81.10474395751953, 243.89620971679688, 416.9413146972656, 119.55393981933594, -79.60392761230469, -329.3773193359375, -130.90809631347656, 136.35745239257812, 949.0408935546875, 13.42034912109375, -407.402099609375, 367.02069091796875, 606.0840454101562, 705.527587890625, -136.14613342285156, 521.8436889648438, 820.3369750976562, 136.8914794921875, 521.4332275390625, 355.9642639160156, -69.88088989257812, 115.46769714355469, 652.90478515625, 560.8133544921875, 1237.79638671875, 16.214488983154297, 353.57635498046875, 417.880126953125, -29.390260696411133, 459.4886474609375, 375.57098388671875, 654.6566162109375, 494.0160217285156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000412.npy"} +{"epoch": 0.604992657856094, "step": 413, "batch_size": 64, "mean": 239.5261688232422, "std": 316.60430908203125, "min": -765.2942504882812, "p10": -149.26843414306632, "median": 249.4036102294922, "p90": 598.4300598144531, "max": 934.4389038085938, "pos_frac": 0.828125, "sample": [48.68756103515625, -18.058300018310547, 224.143798828125, 387.62664794921875, 192.5804443359375, 119.41722106933594, 244.1177978515625, 247.318603515625, 80.24063873291016, 186.97396850585938, 585.7133178710938, 912.4024658203125, 26.23455047607422, 222.8284912109375, -6.474021911621094, -16.73676872253418, 327.11859130859375, 131.34146118164062, -45.89183044433594, 824.7447509765625, 123.96578979492188, 482.3504638671875, 433.8413391113281, -765.2942504882812, 77.17416381835938, 298.1253662109375, 303.09014892578125, 143.97802734375, 555.90966796875, 934.4389038085938, 369.21649169921875, 326.9971008300781, 623.7142333984375, 331.89288330078125, -198.2115478515625, 489.2293701171875, 385.1421813964844, 158.81674194335938, 299.4366760253906, 328.37493896484375, 76.40013122558594, 176.22181701660156, 648.541015625, -327.741455078125, 597.6237182617188, 371.7431335449219, 263.1424255371094, -271.7390441894531, -282.146728515625, -630.7737426757812, 166.93101501464844, 463.517822265625, 439.11737060546875, 768.3782958984375, 28.392498016357422, 598.775634765625, 251.48861694335938, 541.705810546875, 272.27203369140625, 305.5804443359375, 83.486572265625, 214.2457733154297, 391.56597900390625, -193.57269287109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000413.npy"} +{"epoch": 0.6064610866372981, "step": 414, "batch_size": 64, "mean": 256.2596435546875, "std": 312.81951904296875, "min": -688.4190673828125, "p10": -52.44013118743894, "median": 241.35254669189453, "p90": 675.9684631347658, "max": 941.3126220703125, "pos_frac": 0.84375, "sample": [352.0544128417969, 334.7887268066406, 471.47589111328125, 219.72657775878906, 44.64377212524414, 913.3582153320312, 213.3922119140625, -688.4190673828125, 29.167770385742188, 187.43890380859375, 348.1656799316406, 296.0670166015625, 695.6154174804688, 869.4827880859375, 239.57949829101562, -23.18794059753418, 319.27850341796875, 93.61376953125, -124.85509490966797, 109.37348175048828, 201.35406494140625, 532.0279541015625, -80.81248474121094, 112.63558959960938, 470.31396484375, 321.87322998046875, -292.4339904785156, -31.899187088012695, 452.4482421875, -61.24339294433594, 563.5106201171875, 513.5771484375, 17.737323760986328, 251.48167419433594, 690.5813598632812, 56.351863861083984, 391.06011962890625, -24.974655151367188, -336.2279052734375, 198.66090393066406, 342.2835693359375, 267.543701171875, 641.8717041015625, -316.5856628417969, 83.07496643066406, 570.7058715820312, 30.769546508789062, 632.306396484375, 130.42160034179688, 25.489574432373047, 243.12559509277344, 83.81993103027344, 164.15032958984375, 117.7304916381836, 265.9228515625, 283.203125, 802.1571044921875, 15.520965576171875, 440.9766845703125, 878.1817016601562, 941.3126220703125, 430.2574462890625, 131.6695098876953, 347.9255676269531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000414.npy"} +{"epoch": 0.6079295154185022, "step": 415, "batch_size": 64, "mean": 198.34219360351562, "std": 301.28546142578125, "min": -863.8922729492188, "p10": -115.32838439941406, "median": 206.86713409423828, "p90": 610.0275024414063, "max": 944.673583984375, "pos_frac": 0.765625, "sample": [271.8617858886719, -104.2083740234375, 347.8663024902344, 772.1407470703125, -46.43998718261719, 779.8704833984375, 580.7261962890625, 164.8421173095703, -66.92205047607422, 199.9141387939453, 512.6166381835938, 365.2052307128906, 459.90130615234375, 417.32525634765625, -112.9088363647461, -127.59614562988281, 209.41122436523438, -247.097900390625, 270.68682861328125, 213.28611755371094, 6.8314056396484375, 54.870880126953125, 5.678436279296875, 624.4818115234375, -141.18283081054688, 143.15841674804688, 730.834228515625, -36.418800354003906, 118.11048126220703, 326.58697509765625, -56.18463134765625, 364.1530456542969, 248.82501220703125, 82.46994018554688, 944.673583984375, -249.2105255126953, -79.96516418457031, 130.8048095703125, 445.99493408203125, 662.4857177734375, 59.3785285949707, 332.40789794921875, 209.63714599609375, 80.8265380859375, 120.44512939453125, 204.3230438232422, -33.793067932128906, 281.06634521484375, 435.4549865722656, -116.3653335571289, 226.2042236328125, 424.9907531738281, 291.3531494140625, 10.31646728515625, 413.09283447265625, 12.63859748840332, 220.09744262695312, 1.023651123046875, 488.54119873046875, -261.6840515136719, 622.585205078125, 73.86427307128906, -863.8922729492188, 273.90924072265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000415.npy"} +{"epoch": 0.6093979441997063, "step": 416, "batch_size": 64, "mean": 310.9082946777344, "std": 290.04217529296875, "min": -151.33755493164062, "p10": -57.46387901306152, "median": 256.5294952392578, "p90": 708.8130310058594, "max": 978.2022705078125, "pos_frac": 0.828125, "sample": [835.8713989257812, 107.27411651611328, 148.673583984375, 415.59228515625, -94.97933197021484, 556.1322631835938, 265.3983154296875, 128.70382690429688, 521.5895385742188, 683.6105346679688, -116.99515533447266, -61.432037353515625, 67.01996612548828, 302.6018371582031, 247.66067504882812, 496.55145263671875, 396.2001647949219, -15.214576721191406, 579.543701171875, 166.3425750732422, 422.83050537109375, 523.2650756835938, 98.87672424316406, 171.54275512695312, 675.732666015625, 246.98367309570312, 331.7649841308594, 728.7103881835938, 92.43026733398438, 736.2247314453125, 188.58433532714844, 580.2691040039062, 978.2022705078125, -151.33755493164062, -141.42041015625, 69.24434661865234, 557.4235229492188, 495.07147216796875, 11.420219421386719, 857.05419921875, -8.245368957519531, 959.2382202148438, 268.8201904296875, 221.3469696044922, 500.19708251953125, 500.6453857421875, 315.8455810546875, 612.076904296875, 222.8304443359375, 70.10590362548828, 38.90000534057617, 214.73045349121094, -120.20733642578125, 698.6795654296875, 362.3332824707031, -54.460357666015625, 335.8922119140625, -58.751102447509766, 237.47396850585938, -47.85026550292969, 136.65206909179688, 228.47146606445312, 447.2310791015625, 713.1559448242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000416.npy"} +{"epoch": 0.6108663729809104, "step": 417, "batch_size": 64, "mean": 263.2933044433594, "std": 254.3087615966797, "min": -231.77117919921875, "p10": -74.10251541137694, "median": 215.9885711669922, "p90": 638.176580810547, "max": 891.605224609375, "pos_frac": 0.859375, "sample": [66.7718276977539, 22.904800415039062, 1.0103397369384766, 366.09210205078125, 38.53951644897461, 522.1260375976562, 158.8865509033203, 173.70809936523438, -155.10586547851562, 682.8075561523438, 27.47378921508789, 615.884033203125, 405.0926513671875, 115.447265625, 247.10130310058594, -83.25743103027344, -97.17900085449219, 652.1322631835938, 653.29052734375, 45.398406982421875, 571.9356689453125, 202.77188110351562, 747.3836059570312, 163.0491943359375, 572.3687133789062, 891.605224609375, 303.9395751953125, 241.02432250976562, 172.59439086914062, 320.7917785644531, 77.80216217041016, 138.83192443847656, -95.0114517211914, 134.3623046875, 338.3971862792969, 603.588623046875, 68.41238403320312, 331.54571533203125, 393.0782775878906, 208.6048126220703, 86.16911315917969, 369.6442565917969, 223.37232971191406, 336.58477783203125, -63.94673156738281, 298.0130615234375, 155.59820556640625, 144.87704467773438, 484.24005126953125, 306.78265380859375, 488.79449462890625, -231.77117919921875, 363.2831115722656, 367.8371887207031, -27.760093688964844, -161.21823120117188, 206.70855712890625, 179.2281494140625, 647.1839599609375, 471.5484619140625, 617.1593627929688, 160.94920349121094, 659.7467041015625, -78.45499420166016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000417.npy"} +{"epoch": 0.6123348017621145, "step": 418, "batch_size": 64, "mean": 271.077880859375, "std": 371.2981872558594, "min": -415.42596435546875, "p10": -136.8207565307617, "median": 203.96804809570312, "p90": 715.9693725585938, "max": 1316.34912109375, "pos_frac": 0.75, "sample": [221.42050170898438, -193.30979919433594, 226.55108642578125, -18.27017593383789, 143.90989685058594, 929.5897827148438, 63.75122833251953, 75.63751220703125, 719.759765625, 581.4028930664062, -1.03692626953125, -32.32470703125, 479.71319580078125, -73.59197235107422, 1136.933349609375, 560.522705078125, 334.92584228515625, -144.3129425048828, 478.3869323730469, 66.060546875, -252.82864379882812, 707.1251220703125, -119.3389892578125, 529.2965087890625, 226.677490234375, 74.23609924316406, 231.4664306640625, 554.2350463867188, 540.9415893554688, 186.51559448242188, 531.0504150390625, 67.10050964355469, 107.20386505126953, 867.87646484375, -415.42596435546875, -110.5354232788086, -185.19674682617188, 417.36517333984375, 261.94720458984375, 476.515380859375, 1316.34912109375, -54.98334503173828, 62.30401611328125, 138.04678344726562, 276.0491638183594, 171.77426147460938, 167.1201171875, 155.4679412841797, -310.19561767578125, -311.693359375, 365.15484619140625, 47.76399230957031, 581.5087890625, -82.45817565917969, 627.945556640625, 297.7057189941406, 650.3280029296875, 1082.778564453125, 121.29727935791016, 26.894908905029297, 604.4244384765625, 961.417236328125, 313.0086364746094, -110.96929931640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000418.npy"} +{"epoch": 0.6138032305433186, "step": 419, "batch_size": 64, "mean": 297.6322021484375, "std": 323.2002258300781, "min": -448.2620849609375, "p10": -62.80826721191404, "median": 286.07728576660156, "p90": 717.0528686523438, "max": 1004.8865966796875, "pos_frac": 0.84375, "sample": [803.2567749023438, -359.80108642578125, 335.01885986328125, 552.1209106445312, 85.35193634033203, 821.10595703125, -7.372524261474609, -32.89250564575195, 29.58642578125, 668.890625, 1004.8865966796875, 197.90597534179688, 312.6055603027344, 544.8826904296875, 450.73565673828125, 535.1370849609375, 197.4822998046875, 149.833984375, -448.2620849609375, 49.73969268798828, -103.81182098388672, 723.583984375, -239.3646697998047, 281.0312805175781, 548.1175537109375, -310.131591796875, 173.23052978515625, 150.4095001220703, 516.0828857421875, 858.6677856445312, 369.1147766113281, 102.4271240234375, 443.9801940917969, 291.123291015625, 458.700927734375, 118.25521087646484, 125.82020568847656, 66.22250366210938, 521.5941162109375, 340.01947021484375, 618.8572998046875, 185.995361328125, 701.8135986328125, 19.792461395263672, 81.10453033447266, 76.52091979980469, 32.551239013671875, -71.33950805664062, 665.1754150390625, 568.9600830078125, 601.809814453125, 429.12890625, 84.74346923828125, 980.375732421875, 430.232421875, 432.8476257324219, 469.80010986328125, 273.36663818359375, 126.05926513671875, 735.3886108398438, -214.1179656982422, -42.90203857421875, 160.51510620117188, 376.49627685546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000419.npy"} +{"epoch": 0.6152716593245228, "step": 420, "batch_size": 64, "mean": 179.68783569335938, "std": 392.33544921875, "min": -834.665771484375, "p10": -228.80961608886713, "median": 143.80525970458984, "p90": 592.3000610351562, "max": 1213.5389404296875, "pos_frac": 0.65625, "sample": [-185.828125, 170.17807006835938, 26.561119079589844, -26.58517837524414, 587.7742919921875, -76.40190887451172, -247.23025512695312, -20.757659912109375, 212.7397918701172, 590.6685791015625, -51.73829650878906, -42.93199920654297, -60.6408576965332, 183.29258728027344, -22.04949951171875, 104.07038116455078, 570.91357421875, 272.2748107910156, 80.97477722167969, 271.3369445800781, 21.703916549682617, 266.0101013183594, -149.39508056640625, 494.8864440917969, -82.06582641601562, 155.50930786132812, -22.162918090820312, 396.2822265625, 140.5794677734375, -102.73956298828125, 543.6307373046875, 477.641357421875, 441.167236328125, -248.8625946044922, 147.0310516357422, 224.37120056152344, -304.351318359375, 15.490821838378906, 704.5339965820312, 736.0072021484375, -34.687095642089844, -834.665771484375, 663.1586303710938, 519.796630859375, 592.999267578125, 147.71337890625, 502.3230895996094, 474.470703125, 365.100341796875, 335.7917175292969, 11.017038345336914, -533.0274658203125, 173.2612762451172, 32.51279067993164, -37.29753494262695, 1213.5389404296875, -745.1215209960938, 65.99006652832031, 125.17582702636719, 325.46826171875, 1198.5826416015625, -114.61791229248047, -336.6946716308594, 1197.34423828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000420.npy"} +{"epoch": 0.6167400881057269, "step": 421, "batch_size": 64, "mean": 204.13345336914062, "std": 308.9214782714844, "min": -894.52978515625, "p10": -158.94231338500973, "median": 258.5935821533203, "p90": 578.8029418945314, "max": 856.6429443359375, "pos_frac": 0.78125, "sample": [335.8915710449219, 88.02059936523438, 116.55608367919922, -117.42768096923828, 258.0636901855469, 471.0860595703125, 799.7211303710938, -25.16900634765625, 109.47270202636719, 83.89549255371094, 325.85382080078125, -10.12625503540039, 558.771484375, 649.5357666015625, 469.7071533203125, 638.7640380859375, 311.4921875, -226.7058563232422, 110.61951446533203, 147.97586059570312, 763.1885986328125, 466.464111328125, 159.11712646484375, -223.96926879882812, 234.10292053222656, 263.14483642578125, 411.49468994140625, 259.12347412109375, 36.141456604003906, 207.44801330566406, 558.9658203125, 334.7115173339844, 588.3483276367188, 285.50909423828125, 415.67877197265625, -519.0563354492188, 429.42333984375, 364.97265625, -396.3625793457031, 587.3045654296875, 338.07379150390625, -18.8314266204834, 308.46356201171875, 856.6429443359375, 273.7892150878906, 292.9276123046875, 139.3118133544922, -2.1058216094970703, 21.20530128479004, 426.629150390625, 98.72142028808594, 3.3199539184570312, 269.6951904296875, -894.52978515625, -215.6904754638672, 65.61485290527344, 10.77471923828125, -115.96572875976562, -92.1058349609375, 266.650390625, 245.61875915527344, -176.7342987060547, 283.3056335449219, 358.01019287109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000421.npy"} +{"epoch": 0.618208516886931, "step": 422, "batch_size": 64, "mean": 242.64328002929688, "std": 339.99072265625, "min": -720.4974365234375, "p10": -91.71028747558593, "median": 226.37308502197266, "p90": 646.4427612304688, "max": 1220.555908203125, "pos_frac": 0.78125, "sample": [286.66485595703125, 653.4700927734375, 812.8734741210938, 628.6075439453125, 347.80023193359375, -213.8470001220703, -560.2924194335938, 729.80322265625, -16.91005516052246, 1220.555908203125, -96.06056213378906, 166.13987731933594, 220.01268005371094, 48.51690673828125, 489.01220703125, 187.24472045898438, 70.47466278076172, -175.30844116210938, -33.630672454833984, 223.3037872314453, 51.3760986328125, 792.4459838867188, 334.8858642578125, 88.31369018554688, 413.90863037109375, 153.53811645507812, 186.31185913085938, 630.045654296875, 337.45159912109375, 615.117431640625, 764.0767211914062, 356.19415283203125, 417.5897521972656, 172.2633514404297, 310.5035705566406, 328.2825927734375, 328.09368896484375, -380.34246826171875, 286.7450256347656, 16.331146240234375, -39.75386428833008, 320.5122375488281, 229.4423828125, -720.4974365234375, -49.16958236694336, 562.5838623046875, 469.707763671875, 50.99922180175781, 706.49462890625, -59.02201843261719, 465.74884033203125, 206.7989959716797, 28.19085693359375, 105.87294006347656, 546.802490234375, 432.4952697753906, 285.40423583984375, -81.55964660644531, 460.0191955566406, 515.7611083984375, -58.146759033203125, 221.6717987060547, 180.67410278320312, -443.4249267578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000422.npy"} +{"epoch": 0.6196769456681351, "step": 423, "batch_size": 64, "mean": 269.53515625, "std": 315.45367431640625, "min": -395.78094482421875, "p10": -122.72834320068358, "median": 236.45469665527344, "p90": 731.1557006835939, "max": 1096.186767578125, "pos_frac": 0.796875, "sample": [264.0618896484375, 86.82659149169922, 188.3196563720703, 60.23797607421875, -136.00991821289062, -312.2400207519531, 791.1408081054688, 254.24154663085938, 622.6700439453125, 519.93505859375, 741.1305541992188, -129.65359497070312, 856.35791015625, 204.7579345703125, 503.0863952636719, -76.25970458984375, 821.38037109375, 854.9627075195312, 803.0806884765625, 707.8810424804688, -112.669677734375, -23.5726318359375, 385.497314453125, 181.38656616210938, 526.5179443359375, 283.72784423828125, -12.026128768920898, 39.830474853515625, -395.78094482421875, -165.07485961914062, 386.925048828125, -127.03919982910156, 284.8093566894531, 316.35675048828125, 626.9837646484375, 270.1678466796875, 4.221235275268555, 447.4378662109375, -143.7971954345703, 101.40200805664062, 350.9774475097656, 632.6176147460938, 233.30239868164062, 361.9143981933594, 1096.186767578125, 140.6947479248047, 360.9783020019531, 19.940185546875, 476.02587890625, 45.81471252441406, 64.25843811035156, 201.62229919433594, 472.96783447265625, 486.1483154296875, 239.60699462890625, 200.9539031982422, -58.3697509765625, 190.49667358398438, 175.95474243164062, -103.06759643554688, 608.3738403320312, 44.101966857910156, 134.25784301757812, 373.2811584472656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000423.npy"} +{"epoch": 0.6211453744493393, "step": 424, "batch_size": 64, "mean": 250.56887817382812, "std": 352.5650634765625, "min": -361.3214111328125, "p10": -210.34189453124998, "median": 234.7029800415039, "p90": 700.8061828613282, "max": 1250.06591796875, "pos_frac": 0.78125, "sample": [-222.54214477539062, 338.8777770996094, 52.44303894042969, -142.32061767578125, -94.24365234375, 630.6431884765625, 412.6286315917969, 318.4145202636719, 103.61780548095703, -361.3214111328125, 281.6503601074219, 277.3232727050781, -75.18360900878906, -238.83982849121094, 238.44447326660156, 707.4198608398438, 457.7587585449219, 962.41796875, 131.45533752441406, 424.9835510253906, 535.5590209960938, 220.9104766845703, 714.9595947265625, 572.527587890625, 303.9883728027344, -126.75257110595703, 210.95352172851562, 22.343055725097656, -219.61001586914062, -188.71627807617188, 150.33226013183594, 1185.4278564453125, -271.7002258300781, 267.9373474121094, 351.58111572265625, -110.015380859375, 342.9617614746094, -248.35414123535156, 239.1640625, 80.55593872070312, -71.47777557373047, 121.62601470947266, -306.2066650390625, 34.374698638916016, 324.8292236328125, 348.6375427246094, 230.96148681640625, 87.25775909423828, 303.8656921386719, 1250.06591796875, 386.6893615722656, 685.374267578125, 411.8565368652344, 193.93130493164062, 377.94049072265625, 90.10578918457031, 864.5125122070312, 172.79299926757812, 178.06735229492188, 1154.8184814453125, 473.8558044433594, 250.3271942138672, 124.76924133300781, 109.7529296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000424.npy"} +{"epoch": 0.6226138032305433, "step": 425, "batch_size": 64, "mean": 187.0858612060547, "std": 332.3014831542969, "min": -805.51611328125, "p10": -143.3329605102539, "median": 198.3335723876953, "p90": 600.7332580566407, "max": 861.7115478515625, "pos_frac": 0.78125, "sample": [-119.47503662109375, 20.818769454956055, 377.5012512207031, -361.48919677734375, -805.51611328125, 213.26400756835938, 138.59437561035156, 302.8493347167969, 105.66232299804688, 249.66212463378906, -27.29625701904297, 539.7578125, -789.98583984375, -144.7097625732422, 190.25318908691406, 112.37274169921875, 137.58609008789062, 178.0115966796875, 605.8336791992188, 113.65306091308594, 291.82861328125, 145.46319580078125, -49.64370346069336, 206.41395568847656, 665.2314453125, 7.929756164550781, 525.8685302734375, 106.76509094238281, -97.99772644042969, 733.5400390625, 153.34860229492188, 588.832275390625, 652.2297973632812, -179.2561492919922, 168.4019317626953, 294.1004943847656, 634.8357543945312, 142.0110321044922, 17.67681884765625, 683.995849609375, -596.7979125976562, 390.18048095703125, 141.60987854003906, -392.5075988769531, 409.1110534667969, 382.75396728515625, 464.462158203125, 221.7850341796875, 6.1356353759765625, 345.88372802734375, 209.16143798828125, 259.24114990234375, -114.61964416503906, -140.12042236328125, 265.50579833984375, 377.9183349609375, 508.2755126953125, 861.7115478515625, 524.6410522460938, 271.4290771484375, 514.5905151367188, -97.4742660522461, 266.6914978027344, 165.0032958984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000425.npy"} +{"epoch": 0.6240822320117474, "step": 426, "batch_size": 64, "mean": 238.563720703125, "std": 307.7294616699219, "min": -578.775390625, "p10": -57.92626342773437, "median": 167.80438995361328, "p90": 684.3603149414064, "max": 844.4959716796875, "pos_frac": 0.796875, "sample": [269.80413818359375, 105.98285675048828, 101.93616485595703, 167.85397338867188, 160.52670288085938, 81.46583557128906, 94.2789306640625, 213.49961853027344, 699.4610595703125, 133.97647094726562, 167.7548065185547, 226.073974609375, -46.547122955322266, -32.40709686279297, 649.125244140625, -214.9139404296875, 38.74070739746094, 192.40328979492188, 491.9070129394531, 198.66571044921875, -53.357582092285156, 712.4862670898438, -153.06854248046875, 218.84249877929688, 155.0754852294922, 100.66363525390625, -164.7483673095703, 364.59429931640625, 778.736328125, 523.73486328125, 154.15196228027344, 604.4717407226562, 68.49536895751953, 363.7682800292969, 156.94837951660156, 801.4347534179688, 15.81793212890625, -299.6405029296875, 116.7990951538086, 446.0213623046875, -35.890716552734375, 321.47076416015625, 844.4959716796875, 12.009757995605469, 471.22650146484375, 45.137596130371094, 352.524169921875, 556.8663940429688, -40.52398681640625, 95.09538269042969, -334.879638671875, 308.4739074707031, -11.812274932861328, 456.9428405761719, 607.9674072265625, 288.37921142578125, -59.88426971435547, 477.082275390625, 831.58203125, -578.775390625, 621.1731567382812, 812.0177001953125, 68.75112915039062, 547.8335571289062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000426.npy"} +{"epoch": 0.6255506607929515, "step": 427, "batch_size": 64, "mean": 189.3392333984375, "std": 290.7200927734375, "min": -375.8910827636719, "p10": -149.3357391357422, "median": 155.38015747070312, "p90": 471.47783508300796, "max": 1007.0493774414062, "pos_frac": 0.765625, "sample": [370.4399108886719, 38.19511032104492, 353.9605407714844, 399.13519287109375, 832.87841796875, 93.21269989013672, -56.72948455810547, 40.545082092285156, 416.2109069824219, 204.74819946289062, -18.020965576171875, 60.24385070800781, 133.80050659179688, 163.59881591796875, 87.36589050292969, 333.2333984375, 411.7373352050781, 869.4176025390625, 94.10285186767578, -213.69976806640625, 2.2264633178710938, 147.1614990234375, 351.544677734375, 312.2613220214844, 131.29995727539062, 270.851318359375, 380.22259521484375, 890.9035034179688, 40.69627380371094, 135.78431701660156, -238.45254516601562, 486.0047912597656, 437.58160400390625, -126.50305938720703, -44.77248001098633, 359.1181640625, 381.83843994140625, -57.827816009521484, 548.672607421875, 203.14669799804688, 218.7523193359375, 124.58604431152344, 106.92815399169922, 176.15122985839844, -373.7708740234375, 307.39666748046875, 382.71710205078125, 393.282958984375, -363.46966552734375, -279.3774108886719, 505.4437255859375, 253.1900634765625, 411.7676086425781, 19.94316864013672, 1007.0493774414062, -150.0076904296875, 61.55732727050781, 317.8071594238281, 368.3059997558594, 58.24140167236328, -147.76785278320312, -375.8910827636719, -28.041709899902344, -103.21830749511719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000427.npy"} +{"epoch": 0.6270190895741556, "step": 428, "batch_size": 64, "mean": 255.57199096679688, "std": 389.7872009277344, "min": -799.4129638671875, "p10": -175.10899200439448, "median": 250.8757781982422, "p90": 742.2724365234376, "max": 1193.638671875, "pos_frac": 0.734375, "sample": [-23.196636199951172, -230.46212768554688, 2.0543460845947266, 429.9582214355469, 856.088623046875, 377.6510009765625, 124.91642761230469, 610.5471801757812, 468.57568359375, 1193.638671875, -278.4408874511719, 97.15606689453125, 509.3649597167969, 204.72323608398438, 468.1255798339844, -53.93890380859375, -545.9903564453125, 782.1144409179688, 437.75482177734375, 595.9990234375, 503.753173828125, 399.3345947265625, -105.57028198242188, -30.95236587524414, 418.3186340332031, 799.4375, 344.6624755859375, 246.425537109375, -122.72323608398438, 340.59307861328125, -72.34484100341797, 367.32989501953125, 862.8880615234375, -100.43773651123047, 255.32601928710938, 54.02693176269531, 622.8900146484375, 468.433837890625, 29.764881134033203, 175.58563232421875, 103.10223388671875, 153.46258544921875, -46.51819610595703, -131.42684936523438, -799.4129638671875, -119.40016174316406, 72.14289855957031, 185.96627807617188, 1074.521240234375, 174.0731201171875, 454.4253845214844, 461.43731689453125, 650.9150390625, -685.3593139648438, 580.9317626953125, 284.78948974609375, 712.3501586914062, 751.9564819335938, 399.2374267578125, 156.7220916748047, 719.6763305664062, -193.8299102783203, 128.61167907714844, -215.1483154296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000428.npy"} +{"epoch": 0.6284875183553598, "step": 429, "batch_size": 64, "mean": 206.82952880859375, "std": 359.9501953125, "min": -520.5164794921875, "p10": -242.40450592041014, "median": 204.4083251953125, "p90": 613.7321899414063, "max": 1107.6165771484375, "pos_frac": 0.734375, "sample": [38.672874450683594, 190.89266967773438, -431.73223876953125, 334.335693359375, -75.6586685180664, 599.70263671875, -114.23131561279297, 118.40645599365234, 143.92547607421875, 377.49761962890625, -461.07623291015625, 183.79580688476562, 94.26155853271484, 109.60503387451172, -251.53565979003906, 417.2943420410156, 127.02053833007812, -249.69606018066406, 848.354248046875, -245.3629913330078, 1107.6165771484375, 925.7019653320312, 0.8909912109375, 414.5664978027344, 265.94586181640625, 36.295413970947266, 330.49090576171875, 6.783916473388672, 645.208251953125, 617.821044921875, 276.30572509765625, 361.06414794921875, 407.6278076171875, -519.8951416015625, -120.1011734008789, -214.7854461669922, -162.09603881835938, 235.20680236816406, 258.21466064453125, -22.55110740661621, 543.66162109375, 412.6287841796875, 282.99493408203125, -18.768157958984375, 793.59912109375, 370.74432373046875, 578.1663208007812, -220.94436645507812, 163.14732360839844, -235.50137329101562, 217.92398071289062, 173.42323303222656, 536.0804443359375, 240.89678955078125, -77.97138214111328, -520.5164794921875, 12.142318725585938, 282.98077392578125, 68.6783218383789, 604.1915283203125, 420.0848083496094, 1066.227294921875, 520.9799194335938, 417.45660400390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000429.npy"} +{"epoch": 0.6299559471365639, "step": 430, "batch_size": 64, "mean": 206.62753295898438, "std": 287.2034912109375, "min": -392.3701171875, "p10": -103.9875419616699, "median": 173.93126678466797, "p90": 673.8929321289063, "max": 1039.86572265625, "pos_frac": 0.734375, "sample": [222.68075561523438, 197.73167419433594, -10.413047790527344, -31.45528793334961, 682.7952880859375, 393.8990173339844, 37.818328857421875, 268.1669921875, 182.54132080078125, 534.9597778320312, -11.46114730834961, -31.608131408691406, 77.36066436767578, -113.9125747680664, 184.2261505126953, 696.6712036132812, -282.533203125, 424.25177001953125, 76.83119201660156, 102.57620239257812, 142.5326690673828, 109.75040435791016, 164.5299530029297, 186.83468627929688, -41.82244110107422, 165.3212127685547, 408.77783203125, 381.7763366699219, -80.82913208007812, 192.34361267089844, 102.67695617675781, -161.00791931152344, 1039.86572265625, -18.915603637695312, 83.34085845947266, 679.5597534179688, -18.23175811767578, -392.3701171875, 21.4908504486084, 234.9667510986328, 418.0084228515625, 757.064453125, 423.6175231933594, 143.14178466796875, 258.5589904785156, 706.6851806640625, -178.50685119628906, -242.49163818359375, 585.3353271484375, 127.25318908691406, 94.56416320800781, -34.84305953979492, 428.30816650390625, 245.68028259277344, -26.488605499267578, 306.5384521484375, 357.2646484375, -232.70040893554688, 355.12615966796875, 817.0230712890625, 660.6703491210938, 220.48529052734375, 27.626808166503906, 204.52342224121094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000430.npy"} +{"epoch": 0.631424375917768, "step": 431, "batch_size": 64, "mean": 297.0145568847656, "std": 320.4397888183594, "min": -440.065185546875, "p10": -94.56382064819334, "median": 264.4649353027344, "p90": 653.73466796875, "max": 1004.4764404296875, "pos_frac": 0.84375, "sample": [-263.49761962890625, 229.0268096923828, -196.43910217285156, 484.5334777832031, 249.41232299804688, 110.9877700805664, 825.638427734375, 158.3970947265625, 339.9207763671875, 533.7242431640625, 235.03570556640625, 455.8324890136719, 85.21566772460938, -70.9620361328125, -104.67887115478516, 124.18565368652344, 15.113250732421875, -440.065185546875, 497.1868896484375, -245.70591735839844, 656.7523193359375, 646.6934814453125, 1004.4764404296875, 1000.4146728515625, 141.46253967285156, 633.570556640625, 415.01287841796875, 399.3584289550781, 224.3223114013672, 392.28759765625, 732.5723876953125, 337.0937805175781, 294.30419921875, 98.03688049316406, 449.8458251953125, -403.2462158203125, 925.0050048828125, 8.026535034179688, 279.5175476074219, -66.77559661865234, 493.2283020019531, 235.8854522705078, 956.7603759765625, -12.289138793945312, 128.95753479003906, 220.13485717773438, 443.56573486328125, 21.75486183166504, 231.82733154296875, 346.6597900390625, 350.5804748535156, 575.701416015625, 627.6199340820312, 372.28411865234375, 530.469482421875, 432.217041015625, 206.539794921875, 102.01026153564453, -159.74493408203125, 233.24913024902344, 637.2258911132812, 170.87994384765625, 147.0607452392578, 524.7593383789062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000431.npy"} +{"epoch": 0.6328928046989721, "step": 432, "batch_size": 64, "mean": 290.58984375, "std": 293.08392333984375, "min": -273.5431213378906, "p10": -14.13495712280272, "median": 247.97593688964844, "p90": 701.8621032714844, "max": 1317.9202880859375, "pos_frac": 0.875, "sample": [91.38752746582031, 213.40087890625, 265.36383056640625, 258.99847412109375, 212.5140838623047, 232.67820739746094, 109.17789459228516, 74.73616027832031, 222.60516357421875, 81.86146545410156, 49.567691802978516, 19.196788787841797, 577.658935546875, 438.52752685546875, -53.22591018676758, 1317.9202880859375, 691.5863647460938, 282.3153381347656, -1.5282440185546875, 178.03631591796875, 107.81057739257812, 116.37960815429688, 228.49185180664062, 46.924102783203125, 466.85174560546875, 285.07989501953125, 756.6013793945312, 210.8575897216797, 435.892822265625, 74.25738525390625, 253.769287109375, -19.53783416748047, -273.5431213378906, 286.5189208984375, -157.64425659179688, 635.4495239257812, 178.42430114746094, -135.0766143798828, 366.5148010253906, 97.51023864746094, 242.82745361328125, -110.09310913085938, 283.5086669921875, 499.7557373046875, 306.7465515136719, 401.8951721191406, 656.0388793945312, 428.0533447265625, 410.40020751953125, 391.64361572265625, 147.5566864013672, 520.717041015625, 47.356719970703125, 847.4330444335938, 253.12442016601562, 178.59564208984375, 731.9678955078125, 399.2531433105469, 676.8466796875, 839.21484375, -230.76803588867188, 729.4998168945312, 15.597999572753906, 706.2659912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000432.npy"} +{"epoch": 0.6343612334801763, "step": 433, "batch_size": 64, "mean": 188.51522827148438, "std": 337.96759033203125, "min": -881.205322265625, "p10": -172.2292907714844, "median": 219.57298278808594, "p90": 581.9626281738282, "max": 730.8955078125, "pos_frac": 0.75, "sample": [13.970916748046875, 511.0572509765625, 515.502197265625, 424.07586669921875, 408.0149230957031, -111.03919982910156, 341.55462646484375, -776.7498779296875, 544.7594604492188, 611.3289184570312, -881.205322265625, 10.368270874023438, 670.6236572265625, 393.3728942871094, 192.3787841796875, -98.36015319824219, 160.9612579345703, 350.70806884765625, 624.4944458007812, -767.3458862304688, -18.630592346191406, 571.0934448242188, 586.620849609375, 281.19525146484375, 152.75921630859375, 174.19981384277344, 75.82133483886719, 213.48159790039062, -70.65885925292969, 402.3205871582031, 79.7760009765625, 730.8955078125, -306.1537780761719, 423.64794921875, 327.47021484375, 444.91552734375, 500.7567138671875, -266.8402099609375, 297.6226806640625, 17.084022521972656, 647.2061767578125, 566.385986328125, 217.47964477539062, -115.28185272216797, 230.941650390625, 65.6402816772461, 340.3006286621094, -5.912498474121094, -170.7470245361328, -190.30084228515625, 592.9439086914062, 427.7851867675781, 221.66632080078125, 39.75838088989258, 109.704833984375, -172.8645477294922, -24.938438415527344, 386.9478454589844, -51.19744873046875, 7.548057556152344, 301.03558349609375, 426.455078125, 181.55421447753906, 277.0157470703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000433.npy"} +{"epoch": 0.6358296622613803, "step": 434, "batch_size": 64, "mean": 225.47265625, "std": 357.62359619140625, "min": -309.36431884765625, "p10": -99.6663459777832, "median": 165.14928436279297, "p90": 691.5034790039064, "max": 2019.46923828125, "pos_frac": 0.75, "sample": [495.9696350097656, 169.88856506347656, 2019.46923828125, 172.55604553222656, -148.62364196777344, 113.77371215820312, 188.98822021484375, 475.7235412597656, 34.29267120361328, -42.65238952636719, 252.93531799316406, -18.942550659179688, 95.5790023803711, 703.7735595703125, -25.699321746826172, 69.06436157226562, -88.29247283935547, -309.36431884765625, 452.9741516113281, 662.873291015625, 711.8331298828125, -189.98251342773438, 156.5860595703125, 223.7340087890625, 284.842041015625, 204.48464965820312, -28.34804344177246, -40.41387939453125, 245.1105194091797, 237.83206176757812, 82.29165649414062, 219.30801391601562, 402.296630859375, -84.50105285644531, 45.58473205566406, 405.199951171875, 116.92868041992188, 13.935182571411133, 174.15045166015625, 229.49404907226562, -38.073699951171875, 394.3860778808594, 569.8170166015625, 61.18684387207031, 160.41000366210938, 237.01217651367188, 388.9670104980469, 89.79853820800781, 119.8153305053711, 801.2999877929688, 281.6319580078125, -51.47589111328125, 809.4622802734375, -211.18482971191406, 120.19309997558594, 110.72830200195312, 797.09716796875, 196.09356689453125, -104.54086303710938, -216.23199462890625, 921.5592651367188, 517.9247436523438, 5.5838165283203125, -215.86256408691406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000434.npy"} +{"epoch": 0.6372980910425844, "step": 435, "batch_size": 64, "mean": 245.1993408203125, "std": 311.724609375, "min": -642.1610107421875, "p10": -68.93781013488767, "median": 215.7286148071289, "p90": 637.5289062500001, "max": 1004.6622314453125, "pos_frac": 0.828125, "sample": [969.287353515625, 325.74658203125, -642.1610107421875, 330.7864990234375, 561.9165649414062, 180.61911010742188, 167.76541137695312, 390.59381103515625, 352.5667419433594, 152.09823608398438, 526.6511840820312, 30.814498901367188, 88.45616149902344, 286.6231384277344, -80.85011291503906, 580.7318725585938, 1004.6622314453125, 901.7537841796875, 234.4599609375, -92.21409606933594, -32.644622802734375, 6.873870849609375, -31.967117309570312, 177.76744079589844, -113.92352294921875, 131.93190002441406, 252.1055145263672, 208.42080688476562, -159.904052734375, 201.55999755859375, 262.3230895996094, 32.78892517089844, 122.5830078125, 480.7873840332031, 565.5068359375, 379.9054260253906, 342.8929748535156, -458.93927001953125, 421.7835388183594, -314.8530578613281, 228.53414916992188, 276.1452941894531, 646.4521484375, 7.3427581787109375, 93.01888275146484, -41.14243698120117, 616.7080078125, 549.0619506835938, 9.170413970947266, -17.610170364379883, 842.351806640625, 45.33122253417969, 299.11004638671875, 124.01754760742188, 197.73721313476562, 700.7958984375, 653.7499389648438, 255.41580200195312, 223.0364227294922, 546.8067016601562, 172.59593200683594, 2.4449920654296875, 335.65435791015625, 180.72201538085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000435.npy"} +{"epoch": 0.6387665198237885, "step": 436, "batch_size": 64, "mean": 220.5616912841797, "std": 290.15338134765625, "min": -578.1475219726562, "p10": -136.1795310974121, "median": 178.23751831054688, "p90": 566.2327697753907, "max": 908.6744384765625, "pos_frac": 0.796875, "sample": [-104.67491149902344, 175.2628631591797, -139.95712280273438, 88.7594223022461, 577.265625, 265.9139099121094, 363.7734680175781, 755.7425537109375, 562.6019897460938, -115.38143157958984, 73.64775085449219, 804.734619140625, -44.6103515625, -150.91981506347656, 296.26739501953125, 233.06777954101562, -157.45924377441406, 43.71234130859375, 895.2701416015625, -267.9715576171875, 451.30987548828125, 519.9169921875, 277.12005615234375, 370.74554443359375, 474.8962097167969, 111.4405517578125, 408.42718505859375, 567.788818359375, 64.62834167480469, 97.81794738769531, 908.6744384765625, 85.44348907470703, 535.7453002929688, 168.05397033691406, 287.25091552734375, 141.61544799804688, -31.017044067382812, -15.505271911621094, 259.98052978515625, 127.4107666015625, 367.33856201171875, 77.33267211914062, 228.11354064941406, 157.66806030273438, 243.02598571777344, 302.06512451171875, 242.52076721191406, -170.38189697265625, 181.21217346191406, 150.23883056640625, 283.4156188964844, 173.697509765625, 413.4731750488281, 556.4076538085938, 858.4790649414062, 4.0478363037109375, 115.96955108642578, 2.3323516845703125, -578.1475219726562, 268.23162841796875, -127.36515045166016, 102.35220336914062, 448.84185791015625, -151.71060180664062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000436.npy"} +{"epoch": 0.6402349486049926, "step": 437, "batch_size": 64, "mean": 226.75625610351562, "std": 286.6820983886719, "min": -526.2520751953125, "p10": -87.55421829223633, "median": 206.86681365966797, "p90": 567.7707702636719, "max": 990.3386840820312, "pos_frac": 0.78125, "sample": [123.52711486816406, -298.4039306640625, 281.37359619140625, 549.9862670898438, 383.5453796386719, 3.414846420288086, 597.6718139648438, 173.73483276367188, 304.951904296875, -43.698333740234375, -526.2520751953125, 153.34774780273438, -276.6391906738281, 483.9843444824219, -25.27374267578125, 162.3505096435547, 209.74081420898438, -247.7161865234375, 146.14797973632812, 623.0459594726562, 70.44603729248047, 511.7767333984375, 237.18772888183594, -84.91466522216797, -136.68817138671875, 670.989501953125, 148.84939575195312, 111.82015228271484, 418.8471374511719, 437.75457763671875, 779.1260375976562, 437.29193115234375, 765.0364990234375, -97.20866394042969, 990.3386840820312, -88.68545532226562, -32.91771697998047, 3.252408981323242, 251.34890747070312, 178.82156372070312, 14.531639099121094, 13.045135498046875, 203.99281311035156, 283.865966796875, 403.713134765625, 122.11160278320312, 441.2350158691406, 467.4747009277344, 503.85821533203125, 537.0472412109375, 112.63539123535156, 486.4712219238281, 384.00238037109375, -45.874969482421875, -41.609642028808594, 575.3927001953125, 358.8702087402344, 376.15484619140625, -65.57878112792969, 269.151611328125, 52.017215728759766, 447.0046691894531, 257.1131896972656, 4.462532043457031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000437.npy"} +{"epoch": 0.6417033773861968, "step": 438, "batch_size": 64, "mean": 261.35968017578125, "std": 291.9039001464844, "min": -420.16357421875, "p10": -55.73866271972656, "median": 248.4976806640625, "p90": 659.2117309570314, "max": 967.6296997070312, "pos_frac": 0.84375, "sample": [59.5440673828125, 49.220008850097656, 406.5566711425781, -111.40602111816406, 243.34213256835938, 128.06735229492188, 753.3958740234375, -6.654191970825195, 194.69393920898438, 14.83270263671875, 272.2636413574219, 424.15234375, 4.035671234130859, 164.93310546875, 406.9921875, 458.91876220703125, 285.14874267578125, 967.6296997070312, 76.28260803222656, 262.635009765625, 200.52532958984375, 580.8717651367188, 425.92840576171875, 642.2637329101562, 161.27401733398438, 257.84710693359375, 41.587379455566406, 291.9886169433594, -371.0447082519531, -53.85333251953125, -56.546661376953125, 42.13883972167969, 567.849609375, 694.8446655273438, 621.1756591796875, 368.2451477050781, 253.65322875976562, -420.16357421875, 539.1712036132812, 315.81182861328125, 688.54150390625, 156.3309326171875, -52.29555892944336, 733.8414916992188, -116.8023681640625, 205.35354614257812, 580.6563720703125, 397.5653991699219, 589.8814086914062, 109.09008026123047, 80.22930908203125, 283.4020080566406, 728.7869873046875, 98.06547546386719, -68.7291259765625, 192.769775390625, 156.827392578125, -413.40948486328125, 10.045158386230469, 589.526611328125, 185.31483459472656, 666.4751586914062, 285.485107421875, 481.9140930175781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000438.npy"} +{"epoch": 0.6431718061674009, "step": 439, "batch_size": 64, "mean": 210.98980712890625, "std": 335.1656188964844, "min": -459.57623291015625, "p10": -139.1959106445312, "median": 141.97432708740234, "p90": 706.5486022949219, "max": 1071.1954345703125, "pos_frac": 0.734375, "sample": [221.11856079101562, 798.95458984375, 116.86784362792969, 343.40155029296875, 22.38543701171875, -102.30624389648438, 709.8272705078125, 56.13926696777344, 144.43968200683594, 94.87947845458984, -352.17169189453125, 746.0670166015625, 84.99031066894531, -19.50725555419922, -60.83123016357422, 274.0227966308594, 337.82989501953125, 576.7904052734375, 671.8612060546875, 181.46392822265625, 100.29582214355469, 972.023681640625, 255.58547973632812, -25.481658935546875, 203.1715087890625, -209.1583251953125, 93.78973388671875, -112.49702453613281, -3.3451385498046875, 23.54689598083496, 328.8151550292969, -193.06655883789062, 309.8017272949219, 516.2877197265625, 1071.1954345703125, 340.5938415527344, 554.7990112304688, 252.96420288085938, 87.87849426269531, 193.3729248046875, 139.50897216796875, -90.3205795288086, -432.11761474609375, -38.545562744140625, -13.302085876464844, 218.94671630859375, 132.3121337890625, 253.14393615722656, -323.245849609375, 575.56787109375, 698.8983764648438, 38.45951461791992, 214.2776641845703, 750.2157592773438, 108.45464324951172, -150.63829040527344, 478.1290283203125, 356.2835388183594, 59.161006927490234, 85.91090393066406, 1030.104736328125, 318.56915283203125, -459.57623291015625, -53.64640426635742], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000439.npy"} +{"epoch": 0.644640234948605, "step": 440, "batch_size": 64, "mean": 245.49815368652344, "std": 343.5228271484375, "min": -276.71246337890625, "p10": -100.7785430908203, "median": 149.77825927734375, "p90": 729.3162292480469, "max": 1438.187744140625, "pos_frac": 0.765625, "sample": [341.4913024902344, 345.25103759765625, 554.5869140625, 383.7547607421875, -90.73956298828125, 5.522148132324219, 636.1632690429688, 221.6181640625, 73.40306091308594, 829.48681640625, 73.10503387451172, -22.148345947265625, 261.3931884765625, 135.00540161132812, 726.2168579101562, -165.12283325195312, 145.63473510742188, -174.48768615722656, 33.430755615234375, 5.548118591308594, 150.68319702148438, 641.3997192382812, 71.59423828125, 59.45545196533203, 295.9984130859375, 198.11607360839844, -126.34569549560547, 326.22235107421875, 380.2740478515625, 187.15274047851562, 134.26156616210938, 774.5751342773438, -105.08096313476562, 265.43768310546875, 1.7063922882080078, -63.711517333984375, 1150.2720947265625, -258.4266052246094, 213.00015258789062, -4.569543838500977, 792.9334106445312, 238.55706787109375, 947.8253173828125, 553.9273071289062, 213.7308349609375, 105.9506607055664, -183.54531860351562, 69.50511169433594, 445.5444030761719, 544.7144165039062, 193.74017333984375, 730.64453125, -33.46527862548828, 107.3497085571289, 24.989070892333984, 536.6561279296875, 40.92628860473633, -40.77516174316406, -51.724342346191406, 148.87332153320312, 1438.187744140625, 570.6416625976562, -276.71246337890625, -17.720916748046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000440.npy"} +{"epoch": 0.6461086637298091, "step": 441, "batch_size": 64, "mean": 242.96311950683594, "std": 445.7601013183594, "min": -765.0991821289062, "p10": -241.3094573974609, "median": 233.01223754882812, "p90": 746.7207031250001, "max": 2014.7120361328125, "pos_frac": 0.6875, "sample": [327.47259521484375, 229.2400665283203, 281.8438720703125, 527.1697387695312, 485.0193786621094, 810.2904663085938, -60.455657958984375, 28.150970458984375, 133.41262817382812, -375.74310302734375, -183.2812957763672, -81.66255950927734, 727.1609497070312, 1076.5616455078125, 325.7325439453125, -479.2142028808594, 230.71131896972656, 685.8665771484375, 99.05084228515625, -28.996368408203125, 39.99052429199219, 112.40281677246094, -12.334564208984375, 795.8919677734375, 755.1034545898438, 55.398834228515625, 235.3131561279297, 393.09765625, 401.5242919921875, 2014.7120361328125, -10.006555557250977, -566.096923828125, 248.9150848388672, 572.252685546875, -448.74517822265625, -222.17333984375, 144.25933837890625, 255.4049072265625, 600.8411254882812, -453.9437561035156, -77.36904907226562, 520.82421875, -52.85618591308594, -765.0991821289062, 377.820068359375, 411.0501708984375, 417.90106201171875, 468.2348937988281, 360.5049133300781, -77.65908813476562, 184.3043975830078, -249.51065063476562, -66.791748046875, 447.3677978515625, 370.8175048828125, 64.2884521484375, 631.6604614257812, 362.07373046875, 589.8518676757812, 1185.452880859375, -19.688663482666016, -108.24358367919922, 131.83575439453125, 772.7320556640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000441.npy"} +{"epoch": 0.6475770925110133, "step": 442, "batch_size": 64, "mean": 295.62847900390625, "std": 396.7184143066406, "min": -515.55029296875, "p10": -72.33919715881345, "median": 167.91915130615234, "p90": 818.1462829589843, "max": 1262.7523193359375, "pos_frac": 0.78125, "sample": [-82.62984466552734, 169.17149353027344, 525.4805908203125, -11.8677978515625, 314.8663330078125, 146.27664184570312, -47.82421112060547, 202.56568908691406, 66.351318359375, 90.6365966796875, 27.440685272216797, 479.7967834472656, -26.13898468017578, 548.8713989257812, 370.4575500488281, 668.81640625, 1026.2926025390625, 429.04949951171875, -18.988616943359375, 164.93333435058594, 166.66680908203125, 531.8351440429688, 41.1719970703125, 269.8178405761719, -290.0799255371094, 214.06369018554688, 819.3482055664062, 314.49822998046875, 969.6766357421875, 248.39248657226562, 62.4671630859375, -515.55029296875, 148.18588256835938, -114.64732360839844, -7.499153137207031, 644.961181640625, -48.32768630981445, 495.2378845214844, 53.87755584716797, 1227.2294921875, -274.49237060546875, 387.67535400390625, 798.3040771484375, 33.218849182128906, 20.2530517578125, 1262.7523193359375, 815.341796875, 288.34832763671875, 675.9948120117188, 92.40804290771484, 97.5159912109375, -319.1745910644531, 15.96771240234375, 130.4723663330078, -1.6513214111328125, -234.15293884277344, 346.56536865234375, 428.40130615234375, 1208.826416015625, 645.5386352539062, 1163.1904296875, 772.7063598632812, 164.13099670410156, 127.19789123535156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000442.npy"} +{"epoch": 0.6490455212922174, "step": 443, "batch_size": 64, "mean": 187.8720703125, "std": 431.3890686035156, "min": -966.292236328125, "p10": -243.0147674560547, "median": 167.51332092285156, "p90": 697.1250427246097, "max": 1541.1722412109375, "pos_frac": 0.75, "sample": [1238.8900146484375, 81.42505645751953, 899.0298461914062, 19.481185913085938, 294.83905029296875, 115.89988708496094, -102.47150421142578, -84.99272155761719, -429.33673095703125, 120.34485626220703, 69.98448181152344, 24.942276000976562, 229.4515838623047, 214.405517578125, 58.397857666015625, -728.7346801757812, 99.05763244628906, -132.70689392089844, -114.00789642333984, -52.191192626953125, 1224.8446044921875, 358.87481689453125, 613.3052978515625, 328.6365661621094, -682.2098388671875, -631.3081665039062, 180.37319946289062, 248.7248077392578, 733.0477905273438, -966.292236328125, 430.8844299316406, 368.595458984375, 261.7911376953125, 147.33580017089844, 109.80635070800781, -125.59882354736328, 133.9193115234375, 458.9910888671875, 492.454833984375, 189.56663513183594, -244.0602264404297, -40.84312438964844, 468.4988708496094, -311.9557189941406, 390.012451171875, 147.84376525878906, 287.75494384765625, 1541.1722412109375, 313.2720947265625, 110.74083709716797, 578.3115844726562, 103.95883178710938, 213.3142547607422, 263.7689208984375, -157.52903747558594, 319.3968200683594, -240.5753631591797, 234.81068420410156, 153.7227783203125, 160.09783935546875, 895.9053955078125, 767.8350830078125, 174.92880249023438, 195.97848510742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000443.npy"} +{"epoch": 0.6505139500734214, "step": 444, "batch_size": 64, "mean": 300.8608093261719, "std": 403.4767761230469, "min": -606.9053344726562, "p10": -168.52476501464838, "median": 289.3016815185547, "p90": 766.6613891601568, "max": 1545.5203857421875, "pos_frac": 0.765625, "sample": [325.31109619140625, -253.12677001953125, 95.98584747314453, 633.6517333984375, 14.458852767944336, 350.95013427734375, -220.62916564941406, 1467.54150390625, 861.4454345703125, -8.004444122314453, -194.87918090820312, 294.3590393066406, 226.75030517578125, 524.5790405273438, 622.4087524414062, 161.862548828125, 823.66552734375, 495.44384765625, -228.65719604492188, 151.54652404785156, -37.80699920654297, -1.5916080474853516, 384.7259216308594, 893.8651733398438, -4.93621826171875, 408.57958984375, 329.2405090332031, -36.63812255859375, 321.4371032714844, -427.5013427734375, -10.213708877563477, 1545.5203857421875, 314.3628234863281, 392.0971984863281, 329.9525146484375, 499.92486572265625, 1194.5048828125, 424.7248840332031, 550.2645263671875, 250.01657104492188, -107.0311279296875, 1107.370361328125, 583.935546875, 13.981651306152344, 600.8377075195312, 524.2498168945312, 591.9822387695312, 18.54007911682129, 211.55970764160156, 158.24163818359375, 284.24432373046875, 217.0763702392578, 312.4319152832031, 203.83248901367188, 401.26611328125, 252.06103515625, 12.850217819213867, -25.000198364257812, -285.9148254394531, 37.036956787109375, 618.7425537109375, -606.9053344726562, 204.31094360351562, 460.19854736328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000444.npy"} +{"epoch": 0.6519823788546255, "step": 445, "batch_size": 64, "mean": 280.84063720703125, "std": 363.599609375, "min": -457.11383056640625, "p10": -126.38410186767574, "median": 218.09463500976562, "p90": 712.2947937011719, "max": 1274.9434814453125, "pos_frac": 0.796875, "sample": [225.95761108398438, 94.52316284179688, 25.036766052246094, 890.6241455078125, 13.932022094726562, 645.310546875, 512.1825561523438, 152.630126953125, 49.01287841796875, 161.2695770263672, 101.434814453125, 622.056640625, 12.351821899414062, 410.96185302734375, 145.71334838867188, -457.11383056640625, 99.7037353515625, 481.9492492675781, 352.537353515625, 231.53465270996094, 456.5420227050781, 364.82427978515625, 591.4877319335938, 717.42333984375, 6.782398223876953, 141.06387329101562, 1274.9434814453125, 477.4204406738281, 464.84710693359375, 651.6192626953125, 700.3281860351562, 594.8864135742188, 202.53260803222656, 739.3531494140625, 17.634384155273438, -213.0423583984375, 197.73721313476562, 280.1136779785156, -141.83621215820312, 572.59521484375, 41.835655212402344, 1076.7607421875, 498.1895446777344, -226.7157745361328, 834.4661865234375, 378.0313720703125, 193.69149780273438, -243.57489013671875, -49.697105407714844, -90.32917785644531, 267.5419616699219, -52.059608459472656, -44.10710525512695, 430.6693420410156, 96.87921142578125, 306.8719482421875, -34.928924560546875, -316.927978515625, 1236.87255859375, -75.70591735839844, 650.3277587890625, 210.23165893554688, 297.42144775390625, -280.8088684082031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000445.npy"} +{"epoch": 0.6534508076358296, "step": 446, "batch_size": 64, "mean": 299.6471862792969, "std": 329.9698486328125, "min": -656.377197265625, "p10": -48.08669815063476, "median": 268.6734313964844, "p90": 751.6568359375001, "max": 1138.68603515625, "pos_frac": 0.84375, "sample": [-40.08373260498047, 298.499267578125, 8.684768676757812, 725.2003173828125, 817.5171508789062, 374.5561218261719, 19.118701934814453, -656.377197265625, 210.15855407714844, 670.255126953125, 864.3883666992188, 403.14794921875, -51.855979919433594, 727.8259887695312, 546.3873901367188, 609.1428833007812, 626.9532470703125, 352.3531188964844, 148.648193359375, 383.83123779296875, -51.51654052734375, 166.80477905273438, 150.15196228027344, 208.67396545410156, 235.03855895996094, -400.72955322265625, 110.05702209472656, 157.52731323242188, 514.1567993164062, 259.34918212890625, 617.75, -117.30714416503906, 21.382293701171875, 192.6558074951172, 23.773298263549805, 189.30438232421875, -14.62420654296875, 415.02783203125, 279.322265625, 871.9199829101562, 329.7474365234375, 194.8842010498047, 854.5258178710938, 1138.68603515625, 547.135986328125, 3.7266311645507812, 869.8240966796875, 489.6330871582031, 305.94293212890625, 273.04022216796875, 364.4823913574219, -1.8708648681640625, -77.8126220703125, 145.08181762695312, 27.47020721435547, 264.306640625, 156.26817321777344, 401.36846923828125, 761.8700561523438, 211.16030883789062, -242.16580200195312, 702.4114990234375, 311.42169189453125, 279.2107238769531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000446.npy"} +{"epoch": 0.6549192364170338, "step": 447, "batch_size": 64, "mean": 316.29620361328125, "std": 411.7406921386719, "min": -499.46978759765625, "p10": -125.51631469726561, "median": 273.6072540283203, "p90": 754.9641723632812, "max": 2202.04541015625, "pos_frac": 0.8125, "sample": [231.53077697753906, 43.90174102783203, 501.78143310546875, 229.32606506347656, 51.802398681640625, -499.46978759765625, 570.708251953125, 267.0506896972656, 318.59832763671875, -100.90475463867188, -13.405254364013672, 762.4378051757812, 270.2162170410156, 204.21144104003906, 749.591552734375, 596.9872436523438, 803.8534545898438, 50.60069274902344, 2202.04541015625, 812.66259765625, 183.22799682617188, 1184.8829345703125, 459.03033447265625, 394.5303955078125, -247.84547424316406, 276.998291015625, 559.38330078125, -151.01437377929688, 315.8758850097656, 169.1663055419922, -171.35214233398438, -69.95146179199219, -28.670433044433594, -120.84591674804688, 394.0146484375, 4.09132194519043, 278.9206848144531, 593.4496459960938, 91.593017578125, 32.485252380371094, 585.8558349609375, 82.41495513916016, 114.43794250488281, -422.4830322265625, 757.2667236328125, 521.3716430664062, 475.2343444824219, -243.38760375976562, 264.5259094238281, 28.7772159576416, 473.05718994140625, 409.6504211425781, 423.93988037109375, 354.452392578125, 153.79193115234375, 630.2357788085938, 352.44793701171875, 231.3146209716797, 1062.646240234375, 627.961669921875, 87.86876678466797, 652.672119140625, 544.9269409179688, -127.51791381835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000447.npy"} +{"epoch": 0.6563876651982379, "step": 448, "batch_size": 64, "mean": 293.324462890625, "std": 357.9877014160156, "min": -562.0764770507812, "p10": -108.18391876220701, "median": 241.89767456054688, "p90": 672.2722717285158, "max": 1416.95166015625, "pos_frac": 0.828125, "sample": [13.690055847167969, 44.528316497802734, -361.3143615722656, 162.27420043945312, 592.826904296875, 37.345947265625, 167.8839874267578, -73.59290313720703, 329.1842956542969, -562.0764770507812, 472.9425048828125, 97.30890655517578, 647.10546875, 110.64508056640625, 227.92755126953125, 553.912109375, 39.85045623779297, -5.50898551940918, 629.586669921875, -155.43331909179688, -116.17445373535156, 57.059810638427734, 743.1988525390625, 471.26708984375, 238.9874267578125, 343.1521911621094, 244.80792236328125, 177.5277099609375, 497.59521484375, 912.5927734375, 321.64129638671875, 644.40869140625, -34.914100646972656, 377.1611328125, 1416.95166015625, 39.92286682128906, 84.75880432128906, 217.55076599121094, 89.92777252197266, 309.26727294921875, 565.9121704101562, 618.8658447265625, 266.1852111816406, -212.158203125, 1157.49755859375, 355.3839111328125, 228.5294189453125, -89.53933715820312, 427.8221740722656, 129.52334594726562, 327.4794616699219, -231.10984802246094, 992.772216796875, -164.8243408203125, 622.1871337890625, 918.4805297851562, 56.232025146484375, 368.0982666015625, 131.19375610351562, 323.6097106933594, 595.63330078125, 683.0580444335938, 509.67864990234375, 186.4769287109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000448.npy"} +{"epoch": 0.657856093979442, "step": 449, "batch_size": 64, "mean": 300.69012451171875, "std": 455.12646484375, "min": -852.9403076171875, "p10": -203.86676025390622, "median": 253.3141632080078, "p90": 778.5385375976563, "max": 2321.828857421875, "pos_frac": 0.765625, "sample": [179.46446228027344, 23.930641174316406, 314.0873107910156, -19.442214965820312, 766.7900390625, 760.2388916015625, 205.1463623046875, 117.56499481201172, 288.51861572265625, 281.3219299316406, 895.8974609375, -185.5531768798828, -28.38037872314453, 226.93240356445312, 665.053955078125, 24.223888397216797, 484.56451416015625, 179.84982299804688, 111.99681091308594, -328.29620361328125, 783.5736083984375, 288.1158447265625, 565.5340576171875, 747.6331787109375, 334.3580322265625, 741.5178833007812, -329.0153503417969, 42.70726013183594, 425.808837890625, 406.7041320800781, 182.22833251953125, 217.99478149414062, 35.6986083984375, 611.3143310546875, 586.4736938476562, 743.040283203125, -155.48306274414062, 189.63377380371094, -158.353759765625, -211.71543884277344, 2321.828857421875, 654.6390380859375, -234.23109436035156, 335.60711669921875, 377.37994384765625, 166.52914428710938, -84.776611328125, -852.9403076171875, -44.494937896728516, 1097.480712890625, 279.6959228515625, 854.5094604492188, 815.23388671875, -270.1819763183594, 465.8714599609375, 182.30557250976562, 445.62762451171875, 1076.102294921875, 282.3240966796875, -117.03962707519531, -215.1695098876953, 52.271881103515625, 197.7077178955078, 446.2066345214844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000449.npy"} +{"epoch": 0.6593245227606461, "step": 450, "batch_size": 64, "mean": 329.4759216308594, "std": 330.1685485839844, "min": -767.2117919921875, "p10": 18.185406875610358, "median": 292.62298583984375, "p90": 756.4460266113282, "max": 1235.1019287109375, "pos_frac": 0.90625, "sample": [419.8829345703125, 605.6182861328125, 806.6822509765625, 359.7026062011719, 58.39544677734375, 650.0132446289062, 1235.1019287109375, -56.02781677246094, 381.0700378417969, 25.13697052001953, 657.768310546875, 527.0830688476562, 701.4298706054688, 374.0531921386719, 409.20745849609375, 744.602294921875, 79.65472412109375, 84.5478515625, 232.496337890625, 49.092716217041016, 388.08160400390625, 761.5219116210938, 557.3567504882812, 786.460693359375, 246.80101013183594, -60.744319915771484, 145.9295196533203, 906.814208984375, 392.7998046875, 685.2245483398438, 82.69023132324219, 384.12432861328125, 278.59417724609375, 217.47508239746094, 294.4222412109375, 15.206165313720703, 238.0869598388672, 399.0967102050781, 222.896484375, 126.19148254394531, 936.1651000976562, -234.37718200683594, 124.72002410888672, 465.9999084472656, 145.5897216796875, 344.3999938964844, 56.12792205810547, 665.3387451171875, 223.26348876953125, 40.8338737487793, 479.38507080078125, -68.15301513671875, 567.9931030273438, 348.88519287109375, 57.07721710205078, -767.2117919921875, 299.65478515625, 290.82373046875, 80.86017608642578, 207.9832305908203, 152.625, 1077.879150390625, -34.91254425048828, 210.96713256835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000450.npy"} +{"epoch": 0.6607929515418502, "step": 451, "batch_size": 64, "mean": 231.30206298828125, "std": 442.19921875, "min": -1319.343017578125, "p10": -205.6855697631836, "median": 258.77110290527344, "p90": 691.6066040039065, "max": 1197.018310546875, "pos_frac": 0.796875, "sample": [63.216094970703125, -169.52877807617188, 21.091064453125, 346.4849853515625, 536.1513061523438, 282.0144958496094, 421.7794189453125, 615.21240234375, -59.170509338378906, 301.8070068359375, 501.3899230957031, 1197.018310546875, 492.5938720703125, 851.6514892578125, 139.51727294921875, 202.18685913085938, 136.99850463867188, -1319.343017578125, -811.8064575195312, -209.600830078125, 137.59609985351562, 55.85071563720703, 439.1749572753906, 589.4068603515625, 709.6878662109375, 260.55438232421875, 473.83465576171875, 192.57826232910156, 313.0917663574219, 649.4169921875, 206.73907470703125, 1063.33935546875, 962.8075561523438, 402.2912902832031, 587.5255126953125, 598.76171875, 434.07489013671875, -389.5562744140625, 256.9878234863281, 300.75787353515625, -41.77986145019531, -166.13015747070312, 434.66082763671875, -1088.7032470703125, 160.88558959960938, 177.82720947265625, 179.8766326904297, 168.14810180664062, 284.9901428222656, 96.22440338134766, 631.4772338867188, -184.43894958496094, -202.53927612304688, 758.54052734375, 545.2534790039062, 99.17523193359375, 404.092529296875, -495.0771789550781, 198.7884521484375, 8.71271800994873, 797.7760009765625, -207.0339813232422, 312.3945617675781, 145.62680053710938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000451.npy"} +{"epoch": 0.6622613803230544, "step": 452, "batch_size": 64, "mean": 286.1051330566406, "std": 304.45660400390625, "min": -365.5533752441406, "p10": -102.64582061767577, "median": 282.75360107421875, "p90": 698.2640869140627, "max": 970.2665405273438, "pos_frac": 0.828125, "sample": [880.7276611328125, -289.6528015136719, 538.4030151367188, 635.8822631835938, -108.40196990966797, 485.7069091796875, 444.8955078125, 550.3418579101562, 541.0925903320312, -89.21480560302734, 575.7698364257812, 293.7159423828125, 143.41693115234375, 117.62980651855469, 418.43194580078125, 313.0528259277344, 652.2457275390625, 271.791259765625, 472.47528076171875, 14.167219161987305, 375.182861328125, -12.389472961425781, 490.78192138671875, 100.49732208251953, 340.49993896484375, 710.7832641601562, 107.6455078125, -227.35183715820312, 587.597412109375, 217.58270263671875, -89.02474975585938, 418.258056640625, 79.79356384277344, 192.5260009765625, 731.9717407226562, 111.95272827148438, 237.35972595214844, 669.0526733398438, 87.17253875732422, 198.19927978515625, 307.18414306640625, -249.853271484375, 252.92697143554688, 743.188720703125, 236.2840576171875, 432.2947082519531, 133.55718994140625, 862.433349609375, 970.2665405273438, 79.38725280761719, 95.08100891113281, 367.57037353515625, 388.70318603515625, 9.21609115600586, -365.5533752441406, 300.80718994140625, -132.03668212890625, 480.6639709472656, 490.02685546875, 726.0626220703125, -156.56820678710938, 189.95443725585938, -65.35719299316406, 23.9211368560791], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000452.npy"} +{"epoch": 0.6637298091042585, "step": 453, "batch_size": 64, "mean": 271.54254150390625, "std": 367.0718994140625, "min": -337.310791015625, "p10": -111.85130386352539, "median": 188.31182861328125, "p90": 747.9303649902347, "max": 1661.365966796875, "pos_frac": 0.734375, "sample": [672.429443359375, 586.0264892578125, -102.73882293701172, 87.42620086669922, 190.5250244140625, 120.70979309082031, 255.32254028320312, 160.5919647216797, 247.56201171875, 30.88958740234375, 220.44882202148438, 582.3671264648438, 505.5205383300781, 598.98095703125, -7.269588470458984, 164.30787658691406, 311.36822509765625, 145.451416015625, -174.25392150878906, 147.44728088378906, 564.1107788085938, -189.44618225097656, 239.96719360351562, 388.22210693359375, 671.6011962890625, -267.90545654296875, -94.48149108886719, 780.2879028320312, -337.310791015625, 346.7410888671875, -36.48155212402344, 205.15817260742188, 542.407958984375, 858.9644165039062, 872.3421630859375, 4.637012481689453, -95.47715759277344, 151.61050415039062, 1661.365966796875, 819.5184326171875, 418.9884338378906, -101.0400619506836, 950.8959350585938, -18.749427795410156, 266.6431884765625, 104.11100769042969, 557.2699584960938, 186.0986328125, 380.41961669921875, 444.14495849609375, -199.45277404785156, 498.34271240234375, -115.75665283203125, 545.9672241210938, 45.94830322265625, -102.33881378173828, -186.5078582763672, 117.44161224365234, 1051.2239990234375, 465.67938232421875, -28.157642364501953, -28.36577606201172, 138.67823791503906, 158.29269409179688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000453.npy"} +{"epoch": 0.6651982378854625, "step": 454, "batch_size": 64, "mean": 302.390625, "std": 394.3345642089844, "min": -847.0496826171875, "p10": -91.86246643066404, "median": 245.78482818603516, "p90": 757.0154052734376, "max": 1473.2222900390625, "pos_frac": 0.84375, "sample": [265.42218017578125, 26.697998046875, 1016.2410278320312, 284.613525390625, 543.5143432617188, 60.17353820800781, 713.702392578125, 359.93408203125, 19.14950942993164, 239.4524383544922, -631.01953125, 674.768310546875, 372.2051086425781, 331.637451171875, 205.86862182617188, 218.26507568359375, 456.86248779296875, 381.345458984375, -16.38829803466797, -847.0496826171875, 12.56169319152832, 526.1318359375, -73.39840698242188, 796.6625366210938, -327.6615295410156, -105.29421997070312, 775.578125, 97.1393814086914, 122.46638488769531, 223.21707153320312, 559.1273803710938, 135.17430114746094, 375.9483642578125, 129.20303344726562, 457.8924560546875, 130.49884033203125, 216.99755859375, 190.04751586914062, 366.80218505859375, 167.42356872558594, 519.4319458007812, 489.4370422363281, 62.45051574707031, 388.3713073730469, -7.3218994140625, 652.5374145507812, 1473.2222900390625, 398.84637451171875, -103.30391693115234, 175.56796264648438, 1439.7088623046875, 1111.9000244140625, 252.11721801757812, 498.58251953125, 78.56270599365234, 468.1145935058594, 678.3333129882812, -99.775634765625, 443.6896667480469, -102.79588317871094, 161.5426483154297, 87.45912170410156, 817.2952880859375, 17.113941192626953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000454.npy"} +{"epoch": 0.6666666666666666, "step": 455, "batch_size": 64, "mean": 361.5738220214844, "std": 421.0751037597656, "min": -753.1860961914062, "p10": -99.0847145080566, "median": 253.58267974853516, "p90": 1100.6028808593753, "max": 1371.8192138671875, "pos_frac": 0.875, "sample": [116.28813171386719, 164.2843017578125, 762.7968139648438, 1125.306884765625, -381.4869384765625, 345.9407653808594, 709.6527709960938, -115.70907592773438, 1188.430908203125, -200.15631103515625, 1129.004150390625, 249.60755920410156, 1042.960205078125, -269.1232604980469, 1154.3089599609375, -111.4267349243164, 222.37457275390625, 666.3836669921875, 271.1894836425781, 225.9605255126953, 83.66677856445312, 535.52197265625, 257.55780029296875, 11.143442153930664, 438.99658203125, 1160.50830078125, 754.0383911132812, -138.44287109375, 320.43597412109375, 495.673828125, 222.20721435546875, 517.265625, 232.560302734375, 165.43014526367188, 225.2461700439453, 204.18759155273438, 271.9771728515625, 331.8404846191406, 117.28793334960938, -753.1860961914062, 367.3900451660156, 54.656612396240234, 221.2272491455078, 589.2747802734375, 45.83685302734375, 50.255226135253906, 52.08349609375, 488.876708984375, 821.6832885742188, 229.56240844726562, 696.9804077148438, 111.91241455078125, 431.02728271484375, 516.0382080078125, 95.09174346923828, 109.52421569824219, 14.941211700439453, 833.475830078125, 474.43524169921875, 436.11871337890625, 1230.96875, 217.32705688476562, -70.28666687011719, 1371.8192138671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000455.npy"} +{"epoch": 0.6681350954478708, "step": 456, "batch_size": 64, "mean": 341.6557312011719, "std": 365.0517272949219, "min": -198.82437133789062, "p10": -19.246002197265618, "median": 244.1196746826172, "p90": 887.5202087402345, "max": 1454.42041015625, "pos_frac": 0.84375, "sample": [126.79962158203125, 165.9325408935547, 800.6246948242188, 76.03643798828125, -22.402099609375, 330.1128845214844, 59.75240707397461, -3.547515869140625, 230.29220581054688, 747.6513061523438, 457.82891845703125, -1.5327739715576172, 72.13710021972656, 1454.42041015625, 288.88165283203125, -116.08057403564453, -54.09471130371094, 234.96725463867188, 291.3562316894531, 398.8462219238281, 286.21435546875, 63.115928649902344, -69.95925903320312, 1079.7401123046875, -82.9898681640625, 429.0364990234375, 1403.744384765625, 374.7223205566406, 644.6802978515625, 30.336334228515625, 374.75958251953125, 510.7001647949219, 671.9236450195312, 179.6322021484375, 169.0574951171875, 239.33238220214844, 87.98857116699219, 451.2724609375, 1018.9888305664062, 97.27877807617188, 899.7079467773438, 479.027099609375, 305.43804931640625, 66.12810516357422, -198.82437133789062, 423.81396484375, 72.2354507446289, -69.63265991210938, 859.0821533203125, 54.955894470214844, 952.8629760742188, 1042.1580810546875, 349.8684997558594, -11.88177490234375, 201.76065063476562, 93.49699401855469, 340.5051574707031, 85.48165893554688, 434.1912841796875, 190.0562744140625, 221.35202026367188, 528.7176513671875, 799.0018310546875, 248.90696716308594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000456.npy"} +{"epoch": 0.6696035242290749, "step": 457, "batch_size": 64, "mean": 334.63519287109375, "std": 350.2214050292969, "min": -340.7384033203125, "p10": -72.36526718139648, "median": 392.60040283203125, "p90": 777.6267883300782, "max": 1293.507568359375, "pos_frac": 0.828125, "sample": [-57.66661834716797, 467.8211975097656, 436.6343994140625, 383.32781982421875, 589.764892578125, 692.3865966796875, 796.7479858398438, 786.111328125, 97.39742279052734, 54.09228515625, 1069.420166015625, 402.818603515625, 20.392948150634766, 229.14344787597656, 81.16603088378906, 236.81736755371094, 525.8312377929688, 204.20985412597656, -48.849143981933594, 546.9783935546875, 757.8295288085938, -11.557109832763672, 178.06398010253906, 508.98846435546875, 432.8822021484375, 1036.9647216796875, -340.7384033203125, -281.3984375, 582.8621826171875, 621.0372314453125, 97.05656433105469, 585.6505126953125, 439.7930603027344, 540.138916015625, 319.47137451171875, -78.66468811035156, 650.4738159179688, 455.0255432128906, 85.29763793945312, 20.02911376953125, 415.8963623046875, 827.8037719726562, 511.9053955078125, 117.42137145996094, 4.684192657470703, 127.71882629394531, 562.3057861328125, -129.1368408203125, 478.33209228515625, 380.9913024902344, -291.9955749511719, 704.3899536132812, 266.56781005859375, 851.9669189453125, -311.35516357421875, 438.52239990234375, 75.06443786621094, -151.39736938476562, 401.87298583984375, -23.757946014404297, 214.8433837890625, 505.19183349609375, 31.55828094482422, 1293.507568359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000457.npy"} +{"epoch": 0.671071953010279, "step": 458, "batch_size": 64, "mean": 307.7383117675781, "std": 386.9480285644531, "min": -451.6098327636719, "p10": -136.56593170166013, "median": 282.4566955566406, "p90": 678.3927612304688, "max": 1849.35205078125, "pos_frac": 0.78125, "sample": [-451.6098327636719, 828.0601196289062, 477.1475830078125, 346.1850280761719, 634.5269775390625, -70.96836853027344, 1849.35205078125, -104.61820983886719, 287.44635009765625, 524.3157958984375, 154.40521240234375, 208.5213623046875, 470.35150146484375, 333.6457214355469, 731.1144409179688, -84.18804931640625, -173.3830108642578, -1.999155044555664, 211.18826293945312, 681.5423583984375, 458.9644470214844, 516.1729736328125, 89.11064910888672, 534.0446166992188, 353.67364501953125, 372.79571533203125, 348.254150390625, -78.13739013671875, 277.467041015625, 543.7692260742188, 263.66571044921875, 623.2470703125, 563.3646240234375, 855.59228515625, 78.64697265625, -20.8162841796875, 390.5968933105469, 73.69664001464844, 347.30731201171875, 400.4764709472656, 712.3175048828125, 380.1193542480469, -94.64998626708984, -329.3311462402344, 181.47430419921875, -220.796875, 84.0902099609375, -150.2578125, 148.2613525390625, 1615.2642822265625, 582.5164184570312, 137.75601196289062, 269.6117858886719, 91.03807067871094, 191.47854614257812, 671.043701171875, 333.7274169921875, 458.0475769042969, 249.01817321777344, 273.6297302246094, -179.19325256347656, 449.37005615234375, -237.9456787109375, 235.7332305908203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000458.npy"} +{"epoch": 0.6725403817914831, "step": 459, "batch_size": 64, "mean": 339.218505859375, "std": 434.5468444824219, "min": -698.048583984375, "p10": -203.12803649902344, "median": 273.3023376464844, "p90": 914.4524658203127, "max": 1513.4561767578125, "pos_frac": 0.8125, "sample": [70.02104187011719, 131.1697998046875, 376.80706787109375, 119.0673828125, 245.31878662109375, 424.7197570800781, -229.68426513671875, 114.29820251464844, 667.4583129882812, -358.3658752441406, 559.6982421875, 390.9974365234375, 466.38629150390625, 568.1162719726562, -19.244279861450195, 482.68487548828125, -91.23440551757812, 679.85107421875, 344.45172119140625, 931.5040283203125, 31.433319091796875, 971.5980834960938, 614.8897705078125, -121.37834167480469, 365.6293640136719, -324.5653991699219, 878.0576782226562, -207.65293884277344, 87.69221496582031, 10.518377304077148, 246.2117156982422, -37.723411560058594, 858.4422607421875, -698.048583984375, 1513.4561767578125, 314.45172119140625, 1273.6181640625, 568.1024169921875, 185.18589782714844, -279.1663818359375, 184.64584350585938, 229.49569702148438, 442.24346923828125, 211.62548828125, 80.79449462890625, 266.40374755859375, -409.68292236328125, 202.63088989257812, 466.8848571777344, 689.5173950195312, 813.257080078125, 95.42805480957031, 331.183837890625, 1288.5411376953125, 262.14508056640625, 115.12786102294922, 1050.380859375, -192.56993103027344, 872.489013671875, 756.590087890625, 930.0502319335938, 280.200927734375, 58.84866714477539, 558.9780883789062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000459.npy"} +{"epoch": 0.6740088105726872, "step": 460, "batch_size": 64, "mean": 372.6073913574219, "std": 416.76416015625, "min": -420.4366455078125, "p10": -155.191731262207, "median": 317.98651123046875, "p90": 929.4992065429689, "max": 1667.1707763671875, "pos_frac": 0.828125, "sample": [860.12451171875, 723.0352172851562, 612.9554443359375, -195.81842041015625, 106.93952941894531, 996.8741455078125, 936.6767578125, 281.49359130859375, 141.72232055664062, 228.9670867919922, -156.94308471679688, 708.7259521484375, 154.98912048339844, 527.0406494140625, 165.9467010498047, 224.36614990234375, 324.2654113769531, -420.4366455078125, 158.01026916503906, 1401.334716796875, 1372.9744873046875, 210.0211181640625, 419.0596618652344, 60.42572021484375, 1034.7208251953125, 266.9972229003906, 1667.1707763671875, 369.3831787109375, -54.905296325683594, 25.977264404296875, 311.7076110839844, -171.30233764648438, 575.1805419921875, -127.43305969238281, 584.663330078125, 583.2471313476562, -151.10523986816406, 417.96484375, 257.9907531738281, -23.608116149902344, 912.7515869140625, 617.63525390625, 518.7216186523438, 443.0876770019531, 557.0026245117188, 154.24505615234375, 142.63865661621094, 84.08180236816406, 339.456787109375, 308.50323486328125, -208.2510223388672, 104.77535247802734, 162.77764892578125, 437.1747131347656, -234.7305450439453, 608.557373046875, 948.5999755859375, 486.23138427734375, -226.87545776367188, 821.3177490234375, 550.57470703125, 458.5971984863281, 5.131614685058594, 445.46942138671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000460.npy"} +{"epoch": 0.6754772393538914, "step": 461, "batch_size": 64, "mean": 295.65155029296875, "std": 433.9674072265625, "min": -567.2540283203125, "p10": -195.18454132080075, "median": 211.42481994628906, "p90": 805.4691650390625, "max": 1755.4818115234375, "pos_frac": 0.765625, "sample": [112.49282836914062, 13.262619018554688, 808.7501220703125, 393.4448547363281, -232.23272705078125, 552.93408203125, -567.2540283203125, 1432.0220947265625, 86.28131103515625, 637.5215454101562, 108.57930755615234, -99.73249053955078, 331.48016357421875, 677.8333129882812, 1056.03955078125, 458.2052001953125, 646.468017578125, -107.99554443359375, 207.43687438964844, 348.3853759765625, 326.7095947265625, 733.5007934570312, 797.8135986328125, 994.971923828125, -236.19361877441406, -59.8321533203125, 1257.5963134765625, 1755.4818115234375, 207.94381713867188, 377.646484375, 463.4516906738281, 183.0615692138672, 239.68621826171875, 20.816375732421875, 66.12255859375, 278.4775390625, -258.71392822265625, -13.49835205078125, -100.61795043945312, 30.347564697265625, 863.6055297851562, -83.87860870361328, 670.7362060546875, 685.0494384765625, -203.3241424560547, 328.5209655761719, 230.64659118652344, 620.8865356445312, 17.94940185546875, -176.192138671875, 627.932373046875, 106.1861801147461, 105.15231323242188, 114.0087890625, 464.0851135253906, -102.29126739501953, -343.5395202636719, 115.99923706054688, 381.55426025390625, 206.6651611328125, 162.39984130859375, -310.34832763671875, 296.29449462890625, 214.90582275390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000461.npy"} +{"epoch": 0.6769456681350955, "step": 462, "batch_size": 64, "mean": 244.05181884765625, "std": 434.33453369140625, "min": -526.9102172851562, "p10": -257.35097198486324, "median": 211.36275482177734, "p90": 702.4861877441409, "max": 1443.679931640625, "pos_frac": 0.703125, "sample": [424.4847412109375, 70.30606842041016, 2.3930740356445312, -234.4580841064453, 439.3924865722656, 573.2196655273438, 24.847795486450195, 516.573486328125, 636.7680053710938, 934.2268676757812, 494.785400390625, -214.82705688476562, -400.37188720703125, 73.55029296875, 159.76321411132812, 499.30584716796875, 550.54931640625, 123.10610961914062, -223.31427001953125, 730.651123046875, -265.0523376464844, 92.93594360351562, 466.3080139160156, -526.9102172851562, -190.1862030029297, 76.90896606445312, 589.3547973632812, -438.7569580078125, 14.042469024658203, -73.69239807128906, 1310.3414306640625, 483.2684631347656, -344.7411804199219, 252.5489044189453, 573.7361450195312, 1100.6824951171875, -190.82705688476562, -78.37784576416016, 1406.0439453125, 257.5201416015625, 190.08522033691406, -118.67027282714844, -239.38111877441406, 218.23492431640625, -327.5409240722656, -83.31947326660156, 368.3000793457031, 559.2536010742188, -304.9921569824219, 207.94393920898438, -66.92556762695312, 447.21484375, 1443.679931640625, 564.1978759765625, 406.608642578125, -142.55804443359375, 272.38690185546875, 489.3578796386719, 791.69384765625, 551.4061279296875, 96.82759094238281, 157.22927856445312, 214.7815704345703, 227.4031524658203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000462.npy"} +{"epoch": 0.6784140969162996, "step": 463, "batch_size": 64, "mean": 327.8647766113281, "std": 465.2637634277344, "min": -511.038330078125, "p10": -284.09768218994134, "median": 256.7659149169922, "p90": 920.2683532714847, "max": 1459.193359375, "pos_frac": 0.796875, "sample": [59.42826843261719, 569.0010986328125, 78.76329803466797, 249.92987060546875, -180.54290771484375, 391.7397155761719, -126.22779083251953, 85.20616912841797, 469.56866455078125, 47.99043273925781, 641.1515502929688, -312.6264343261719, 633.194580078125, 678.9461669921875, 184.87326049804688, 363.1101989746094, 354.8066711425781, 681.9392700195312, 948.6903686523438, 12.467174530029297, 26.445131301879883, 1459.193359375, 477.4169616699219, 219.57345581054688, 1422.6007080078125, 701.0803833007812, 143.33883666992188, 328.2466735839844, 21.499603271484375, 234.07864379882812, -331.1313781738281, -13.972877502441406, -468.2843933105469, 150.3179931640625, 800.7959594726562, -342.9718933105469, 6.7496795654296875, -141.6865234375, 451.9618225097656, -226.66831970214844, 602.6815185546875, 321.5815734863281, 738.8475952148438, 263.6019592285156, 838.7267456054688, 1117.64306640625, -83.71408081054688, 210.11785888671875, 1070.0438232421875, -308.71026611328125, 142.5609893798828, 1385.140625, 279.7693786621094, 853.9503173828125, -393.5792236328125, 58.29052734375, 431.3007507324219, 211.97293090820312, -511.038330078125, 415.1277770996094, 463.8326721191406, 550.9161376953125, 1392.75341796875, 181.53504943847656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000463.npy"} +{"epoch": 0.6798825256975036, "step": 464, "batch_size": 64, "mean": 494.1038818359375, "std": 493.57781982421875, "min": -501.2527770996094, "p10": -79.62045669555663, "median": 500.16510009765625, "p90": 952.6771606445313, "max": 2146.0693359375, "pos_frac": 0.875, "sample": [-287.6328125, 956.8585205078125, 373.7027282714844, 234.45639038085938, 59.36944580078125, 850.3048095703125, 708.5238647460938, 172.5402374267578, 79.11721801757812, 448.06915283203125, 746.1209716796875, 621.9625244140625, -427.4883117675781, 742.8011474609375, 344.6239013671875, 578.9126586914062, 678.19091796875, 216.9393768310547, 547.7015991210938, 775.5624389648438, 815.32958984375, 487.89678955078125, 809.1543579101562, -88.02134704589844, 1056.56298828125, 305.8592834472656, 615.6477661132812, 512.4334106445312, -471.1640319824219, 275.42523193359375, 913.2730712890625, 236.5169677734375, 557.50927734375, 59.42636489868164, 382.2857360839844, 671.154541015625, 844.9274291992188, 322.3072509765625, 533.198974609375, 209.3603515625, 215.347412109375, -140.60369873046875, 942.920654296875, 686.73486328125, 131.8329620361328, 347.2237854003906, -100.67768096923828, 302.24578857421875, 16.196434020996094, 678.5670166015625, 559.1401977539062, 1572.4896240234375, 2146.0693359375, 1430.020263671875, 432.74163818359375, 1061.27587890625, 809.0944213867188, -60.01837921142578, 1889.8736572265625, 767.0230712890625, 522.1654052734375, -501.2527770996094, 361.7491455078125, 82.76969146728516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000464.npy"} +{"epoch": 0.6813509544787077, "step": 465, "batch_size": 64, "mean": 392.1781005859375, "std": 536.2352905273438, "min": -1505.8883056640625, "p10": -179.3786956787109, "median": 436.9705047607422, "p90": 911.7634582519532, "max": 1868.1697998046875, "pos_frac": 0.859375, "sample": [694.3238525390625, 1570.0972900390625, 579.8464965820312, 268.7207946777344, -305.8169250488281, 85.88259887695312, 592.646728515625, 347.0877380371094, 14.045234680175781, 23.181976318359375, 945.2554321289062, 272.199951171875, 329.7881164550781, 122.85894012451172, 605.462890625, 624.9469604492188, 456.56707763671875, 487.3229064941406, 233.92237854003906, 92.57347869873047, -867.9180297851562, 1020.7164916992188, 551.1112060546875, 201.74703979492188, 1868.1697998046875, -134.54396057128906, 582.6139526367188, 145.47625732421875, 428.92095947265625, 487.47882080078125, 755.5616455078125, 484.75994873046875, -201.39962768554688, 109.24594116210938, 268.77703857421875, 576.5736083984375, 23.841957092285156, 316.22808837890625, -405.1749572753906, 797.395263671875, 957.6257934570312, 256.5840759277344, 598.970703125, -974.1533813476562, 752.6685791015625, 651.136962890625, 185.26734924316406, 865.7178344726562, 112.96424865722656, 894.5665893554688, 851.089599609375, -198.5935821533203, 1191.466064453125, -1505.8883056640625, 864.781494140625, 879.6111450195312, -75.85951232910156, 431.424072265625, 705.471923828125, 442.5169372558594, 60.33000183105469, 387.2735595703125, 919.133544921875, 794.7999267578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000465.npy"} +{"epoch": 0.6828193832599119, "step": 466, "batch_size": 64, "mean": 346.8487854003906, "std": 514.1136474609375, "min": -1233.105712890625, "p10": -363.1240783691406, "median": 309.0800323486328, "p90": 979.65908203125, "max": 1488.062255859375, "pos_frac": 0.78125, "sample": [651.9598999023438, -377.31890869140625, 133.8702392578125, -427.34271240234375, 875.0018920898438, 307.71484375, 613.0594482421875, 160.3328094482422, 768.7825317382812, 154.1973114013672, 213.96063232421875, 1106.16259765625, -462.1494140625, 736.8634033203125, -42.76479721069336, 150.17169189453125, 387.5439453125, 7.137348175048828, 450.3047790527344, 158.54461669921875, 584.90625, 185.63653564453125, 569.0662841796875, 408.08123779296875, 892.2335815429688, 167.20468139648438, 63.470802307128906, 827.12060546875, 209.3889617919922, 530.7079467773438, 385.429931640625, 161.87930297851562, 931.2020263671875, 1488.062255859375, -1233.105712890625, 971.6422729492188, 457.8091735839844, 742.153564453125, 1213.726806640625, 455.826416015625, 974.337646484375, 4.137088775634766, 150.351806640625, 172.98117065429688, 626.912841796875, 1202.2767333984375, -240.06954956054688, -446.4403991699219, -58.364295959472656, 10.818727493286133, 1277.0216064453125, 324.0521240234375, 981.939697265625, -34.773841857910156, 943.974853515625, 471.5971984863281, -468.2232971191406, -23.734039306640625, 1038.9666748046875, -330.0028076171875, 310.4452209472656, -108.99108123779297, -419.5088806152344, 260.1427307128906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000466.npy"} +{"epoch": 0.684287812041116, "step": 467, "batch_size": 64, "mean": 373.8319091796875, "std": 721.4420166015625, "min": -1501.4473876953125, "p10": -500.08205871582027, "median": 399.5828399658203, "p90": 1261.3607910156252, "max": 2082.515625, "pos_frac": 0.765625, "sample": [843.7258911132812, 1985.4373779296875, 752.6032104492188, 1429.9283447265625, -736.095703125, -1501.4473876953125, 637.99853515625, 411.0616149902344, 485.1272277832031, 388.10406494140625, 274.3965759277344, 439.99249267578125, 1874.977294921875, 180.35182189941406, 480.5322265625, 509.94073486328125, 88.08667755126953, 1328.9608154296875, 1722.6412353515625, 535.5926513671875, 431.8963623046875, -45.65419387817383, 14.893009185791016, 791.0770263671875, -369.5579833984375, -481.1653747558594, -696.96337890625, 543.7242431640625, 1223.494140625, 431.14337158203125, 1109.2000732421875, 159.03868103027344, 436.1414794921875, -517.3489990234375, 1074.5596923828125, 24.616363525390625, 1163.3699951171875, 251.10894775390625, -140.3892822265625, 980.5590209960938, 276.4285888671875, -1201.1202392578125, 205.75738525390625, -274.556884765625, 71.42262268066406, 460.73394775390625, 114.592041015625, -366.3526611328125, -325.03485107421875, 746.9888916015625, 928.9031372070312, 633.81103515625, 1277.58935546875, 331.7690124511719, 906.755126953125, -5.144355773925781, 683.7332153320312, 76.57919311523438, -508.189208984375, 2082.515625, -1145.4056396484375, 4.755458831787109, 134.51934814453125, 298.53076171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000467.npy"} +{"epoch": 0.6857562408223201, "step": 468, "batch_size": 64, "mean": 470.14892578125, "std": 619.7252197265625, "min": -1108.3807373046875, "p10": -77.7124244689941, "median": 417.6086120605469, "p90": 1467.3194458007815, "max": 2551.257568359375, "pos_frac": 0.859375, "sample": [463.2832336425781, -3.2218379974365234, 1541.79541015625, 513.6487426757812, 1579.788818359375, 1058.30419921875, 478.7313232421875, 660.5029296875, 451.7273864746094, 584.6552734375, 407.89056396484375, 745.3258056640625, 142.81353759765625, 474.5649719238281, 557.55712890625, 116.0162353515625, 245.9919891357422, -170.78492736816406, 422.11590576171875, 265.6196594238281, 192.68414306640625, 7.862102508544922, 68.11046600341797, 370.7762756347656, 405.9285888671875, 1505.5894775390625, 440.34027099609375, 1595.904541015625, 1005.754638671875, 115.45150756835938, -399.7792663574219, 22.979202270507812, 671.0255737304688, 250.66314697265625, 508.3697204589844, 686.3294677734375, 128.592041015625, 184.91119384765625, 706.40478515625, -1108.3807373046875, 2045.1134033203125, 81.1756362915039, 597.3137817382812, 533.3430786132812, 413.101318359375, 60.855133056640625, -335.343994140625, 434.0841979980469, 7.3170013427734375, 1378.022705078125, 398.1289978027344, 2075.13916015625, 296.3839416503906, 722.8330078125, -42.47863006591797, 435.181884765625, 428.6307067871094, 2551.257568359375, 615.9054565429688, -92.8126220703125, 256.6136474609375, -423.92718505859375, 0.2263946533203125, -242.3768310546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000468.npy"} +{"epoch": 0.6872246696035242, "step": 469, "batch_size": 64, "mean": 470.13177490234375, "std": 554.444091796875, "min": -798.9064331054688, "p10": -195.25982055664056, "median": 502.8377227783203, "p90": 1252.3284423828125, "max": 2057.90380859375, "pos_frac": 0.828125, "sample": [1145.2135009765625, 652.6915283203125, 103.99004364013672, 609.5903930664062, 2057.90380859375, 514.71728515625, 1305.046630859375, 137.517333984375, -78.35154724121094, 60.28844451904297, 620.69873046875, 142.7959442138672, -546.0553588867188, 106.02798461914062, 1175.347412109375, 16.83000946044922, 530.0091552734375, 528.15185546875, 528.990478515625, 578.594970703125, 1258.48291015625, 207.43118286132812, 141.83750915527344, 202.31602478027344, 1258.791259765625, -0.488037109375, 897.3375854492188, 535.2611083984375, 1334.006591796875, -315.87139892578125, 836.1175537109375, -250.73622131347656, 878.1368408203125, 242.11683654785156, 706.0707397460938, 272.86151123046875, -129.16946411132812, -223.58425903320312, 5.686370849609375, -481.56170654296875, 620.8461303710938, 918.6780395507812, 1264.3922119140625, 44.24958419799805, -343.177490234375, 184.063232421875, 273.8867492675781, 1203.213134765625, 418.6435546875, 127.92847442626953, 1166.477294921875, 871.3167114257812, 490.9581604003906, 1237.968017578125, 1352.2689208984375, 700.9700927734375, 595.7596435546875, 374.27142333984375, 47.54478454589844, -80.5090103149414, 174.71725463867188, 851.6189575195312, 826.2129516601562, -798.9064331054688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000469.npy"} +{"epoch": 0.6886930983847284, "step": 470, "batch_size": 64, "mean": 408.45806884765625, "std": 553.426025390625, "min": -1272.4635009765625, "p10": -298.4161743164062, "median": 435.3231201171875, "p90": 1066.2550537109375, "max": 1801.3975830078125, "pos_frac": 0.75, "sample": [370.7542724609375, 495.70989990234375, -34.78985595703125, 120.69330596923828, 226.69924926757812, -103.15276336669922, 344.46368408203125, 110.93169403076172, -24.358184814453125, -635.13623046875, 611.1150512695312, 931.0735473632812, 547.23486328125, 378.2007751464844, 685.2371826171875, 1074.8580322265625, -354.56005859375, 674.0298461914062, -1272.4635009765625, 709.5845336914062, -102.25204467773438, 776.05615234375, 229.048828125, -334.6494140625, 343.66888427734375, 221.03262329101562, -246.33682250976562, 421.9682922363281, 576.7545776367188, 448.6779479980469, 701.3938598632812, -297.51434326171875, 1553.758544921875, 89.80049133300781, -29.64740562438965, 679.1142578125, 540.3336791992188, 569.2911376953125, 1801.3975830078125, 707.6964721679688, 1394.857177734375, 102.08377075195312, 819.8521728515625, 1062.247314453125, -100.49037170410156, 1561.4510498046875, 526.0166625976562, 612.0135498046875, 678.31591796875, 1030.5257568359375, 333.1024169921875, -329.1044921875, -355.153076171875, 220.45999145507812, 543.400634765625, 822.337158203125, -298.80267333984375, 1302.8739013671875, 798.9498291015625, -137.6981964111328, 1067.97265625, 103.60140991210938, 382.4131164550781, 494.3733825683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000470.npy"} +{"epoch": 0.6901615271659325, "step": 471, "batch_size": 64, "mean": 336.6220397949219, "std": 558.6133422851562, "min": -922.4022216796875, "p10": -262.07717742919914, "median": 279.27015686035156, "p90": 904.0467163085938, "max": 1948.3660888671875, "pos_frac": 0.703125, "sample": [1344.1331787109375, -22.841766357421875, -44.877685546875, 702.592529296875, 337.294189453125, 747.8413696289062, 680.2832641601562, 1948.3660888671875, 909.2176513671875, 277.64697265625, -71.75965881347656, 297.46197509765625, -53.77912902832031, 38.446197509765625, 379.0671691894531, -116.51348876953125, -184.79718017578125, -188.32310485839844, 158.90521240234375, -41.58249282836914, 1816.07666015625, -491.9879150390625, 321.9813537597656, 338.9059753417969, 815.4768676757812, -27.59845542907715, -383.6924743652344, -70.97970581054688, -452.0910339355469, 527.4105834960938, -290.2594299316406, 1148.1923828125, 841.0169067382812, 880.9595336914062, 502.4894714355469, 686.5735473632812, 859.82080078125, 147.30677795410156, 183.8551483154297, 583.8563842773438, 145.91006469726562, 280.8933410644531, 85.81572723388672, 36.7325553894043, 786.7935791015625, 1909.5550537109375, 13.28509521484375, -922.4022216796875, 891.981201171875, 32.2694091796875, -373.6240234375, 121.39142608642578, 38.87824249267578, 285.29876708984375, 74.80279541015625, 654.3711547851562, -12.862386703491211, -196.31858825683594, 484.9122314453125, 345.1819152832031, 708.5064086914062, 945.19189453125, -377.9378356933594, 551.0886840820312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000471.npy"} +{"epoch": 0.6916299559471366, "step": 472, "batch_size": 64, "mean": 391.5537109375, "std": 652.6005859375, "min": -1355.848876953125, "p10": -174.1043716430664, "median": 250.97806549072266, "p90": 1272.8382568359377, "max": 2388.244873046875, "pos_frac": 0.796875, "sample": [-1355.848876953125, -457.52142333984375, 765.871337890625, -509.6831359863281, 307.76519775390625, 144.85177612304688, 619.7042236328125, -163.57229614257812, -96.25164794921875, -803.0084838867188, 290.1247863769531, 10.769290924072266, 136.57701110839844, 378.8955383300781, 55.250518798828125, 905.4273681640625, -20.268783569335938, 505.86700439453125, 268.49420166015625, 28.6988525390625, 1182.662353515625, 93.84336853027344, 25.727294921875, 11.85516357421875, 219.42242431640625, 449.07684326171875, 353.8373718261719, -244.14974975585938, 220.99468994140625, 888.0685424804688, 34.729766845703125, 980.61962890625, 259.7099914550781, 1070.998046875, 2292.797607421875, 1606.292236328125, 106.8600845336914, 1302.7342529296875, 313.60711669921875, 584.680908203125, 397.42657470703125, 1018.61865234375, 98.32646179199219, 242.2461395263672, -148.6571807861328, 1416.379638671875, 1613.9951171875, 168.0769805908203, 1395.812255859375, 18.149307250976562, 2388.244873046875, -72.44419860839844, -178.6181182861328, -86.5188980102539, 1203.0809326171875, 823.0245971679688, 613.9229125976562, 195.14010620117188, 379.6504821777344, -310.30645751953125, 286.0211486816406, 429.7192687988281, 236.78366088867188, 164.85472106933594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000472.npy"} +{"epoch": 0.6930983847283406, "step": 473, "batch_size": 64, "mean": 185.667236328125, "std": 796.5276489257812, "min": -2017.946044921875, "p10": -515.6864440917968, "median": 195.5361785888672, "p90": 976.7841918945314, "max": 3566.90380859375, "pos_frac": 0.671875, "sample": [-308.5307312011719, 933.4876708984375, 314.497802734375, -231.26124572753906, -531.7155151367188, 1379.3331298828125, -135.74029541015625, 161.207275390625, 245.87611389160156, 367.30902099609375, 1237.0316162109375, 650.2155151367188, -115.54731750488281, 77.23223876953125, -478.2852783203125, -448.494140625, 75.18380737304688, -31.736557006835938, 197.6034393310547, 1290.3323974609375, -340.64697265625, -35.09639358520508, 697.9978637695312, 343.0135192871094, 123.48605346679688, 138.24462890625, 259.5750732421875, 220.49876403808594, 204.46267700195312, 101.55923461914062, -2017.946044921875, 76.22782135009766, -630.6790771484375, 101.48841857910156, -1592.211181640625, -1692.64697265625, 3566.90380859375, 244.59661865234375, 501.83416748046875, 1236.144287109375, 274.0069580078125, 235.52194213867188, 769.3355102539062, 776.638671875, 193.4689178466797, 713.1109008789062, -533.1578369140625, -406.7528991699219, 219.9453887939453, 918.9471435546875, 597.9784545898438, 995.33984375, -394.94476318359375, -242.30780029296875, 101.67288208007812, -128.64675903320312, 830.8855590820312, -1400.631591796875, 480.8689270019531, 211.48483276367188, -349.90533447265625, 363.06756591796875, 173.53167724609375, 1328.4400634765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000473.npy"} +{"epoch": 0.6945668135095447, "step": 474, "batch_size": 64, "mean": 338.5450439453125, "std": 612.193115234375, "min": -1601.80126953125, "p10": -278.9833862304687, "median": 344.6103820800781, "p90": 961.6655639648437, "max": 2141.251953125, "pos_frac": 0.765625, "sample": [-1138.5833740234375, 243.78939819335938, 191.91607666015625, 963.1300048828125, 878.46484375, -568.2671508789062, 958.24853515625, 140.0838623046875, 326.5431823730469, 534.7145385742188, 1158.8531494140625, 2141.251953125, 29.04725456237793, 356.9814758300781, 27.489463806152344, 802.589111328125, 343.4491882324219, -12.602561950683594, -422.802978515625, 435.3246765136719, 90.53248596191406, 301.4950866699219, -205.47296142578125, 174.34866333007812, 345.7715759277344, 681.6767578125, 765.074462890625, 220.34417724609375, 556.0707397460938, 436.63800048828125, 670.2105102539062, -67.82501220703125, 63.06035614013672, 352.05206298828125, 543.2031860351562, 280.8826904296875, 640.8006591796875, 103.93084716796875, -58.84251403808594, -309.36065673828125, 858.07080078125, -78.20394897460938, 655.2584228515625, 557.3447875976562, 162.89328002929688, 307.2916564941406, 1395.5072021484375, 1113.8773193359375, -105.22557067871094, -576.2406005859375, -208.10308837890625, 865.2535400390625, 437.15087890625, 596.7110595703125, 599.554931640625, 151.18141174316406, 413.64361572265625, -923.3259887695312, 781.3399658203125, -141.93450927734375, 350.85357666015625, 1346.9654541015625, -1601.80126953125, 1734.607421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000474.npy"} +{"epoch": 0.6960352422907489, "step": 475, "batch_size": 64, "mean": 427.5458679199219, "std": 451.60137939453125, "min": -361.1188049316406, "p10": -160.9650421142578, "median": 417.64085388183594, "p90": 1058.1525390625004, "max": 1521.1192626953125, "pos_frac": 0.78125, "sample": [96.45947265625, -91.48158264160156, 467.4467468261719, 197.46144104003906, -298.30145263671875, 583.2603759765625, 1473.658203125, 426.4094543457031, 1217.496826171875, 1105.4896240234375, 313.72528076171875, 239.00808715820312, 408.87225341796875, -6.692169189453125, 647.2719116210938, -361.1188049316406, 651.4945678710938, -0.6666088104248047, 841.1904907226562, -166.11029052734375, 846.9813232421875, 751.87548828125, 773.4998779296875, 238.65252685546875, 109.28128051757812, 326.42767333984375, 440.7686767578125, 437.1042785644531, 37.39265060424805, 501.3604736328125, 286.459228515625, 430.9451599121094, -121.005615234375, 357.0018615722656, 635.9414672851562, 924.896484375, 439.97900390625, 728.8132934570312, -161.96205139160156, 1253.6783447265625, 439.7892150878906, -19.74695587158203, 257.10205078125, -230.4897918701172, 860.3543090820312, 106.93928527832031, 585.1971435546875, -158.63868713378906, 300.85382080078125, 816.5304565429688, -119.15353393554688, 947.6993408203125, 618.4683837890625, 12.578094482421875, 1521.1192626953125, -210.6957244873047, 278.1175537109375, 1319.66455078125, 696.6808471679688, 1112.3897705078125, 334.4344177246094, 760.2216186523438, -228.7399444580078, 379.29541015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000475.npy"} +{"epoch": 0.697503671071953, "step": 476, "batch_size": 64, "mean": 329.54150390625, "std": 447.2165832519531, "min": -558.1226806640625, "p10": -159.27927093505858, "median": 274.6832580566406, "p90": 948.7928039550789, "max": 2044.6177978515625, "pos_frac": 0.796875, "sample": [673.259521484375, 108.53400421142578, -131.3092041015625, 628.4274291992188, 274.79547119140625, 560.1385498046875, 1186.2918701171875, 139.11843872070312, 164.23684692382812, -161.182373046875, 1069.7520751953125, 132.23223876953125, 140.2002410888672, 448.83465576171875, 770.6384887695312, 647.763671875, 711.0404052734375, 149.30654907226562, 260.8600769042969, 141.29049682617188, 527.5932006835938, 318.7723388671875, -28.477005004882812, 1212.3131103515625, 560.7944946289062, 581.760986328125, -508.32489013671875, -250.3336181640625, 425.77001953125, 80.7522201538086, 9.269935607910156, 144.239013671875, -261.5999755859375, -259.8625793457031, 534.4515991210938, -105.28013610839844, -125.5108871459961, -324.14666748046875, 636.0852661132812, 191.1982421875, 234.08367919921875, 538.0908203125, 287.05206298828125, -154.8386993408203, 141.5111541748047, 274.571044921875, 751.2234497070312, 1060.499755859375, 2044.6177978515625, 1025.1446533203125, 373.6121826171875, -31.667953491210938, 283.8609313964844, 265.70782470703125, 154.94090270996094, 105.40919494628906, 389.7276916503906, 435.79473876953125, 102.29032897949219, 332.0660705566406, 276.781005859375, 1072.529541015625, 412.0775146484375, -558.1226806640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000476.npy"} +{"epoch": 0.6989720998531571, "step": 477, "batch_size": 64, "mean": 353.25738525390625, "std": 586.5377197265625, "min": -1667.0718994140625, "p10": -276.34150085449215, "median": 293.6494903564453, "p90": 1029.646990966797, "max": 2142.24072265625, "pos_frac": 0.734375, "sample": [-462.71160888671875, 465.40203857421875, 994.1152954101562, -478.65460205078125, 1544.0123291015625, 253.85206604003906, 312.9518737792969, 838.8543701171875, 815.0255126953125, 999.5022583007812, 53.505348205566406, 87.24320220947266, 235.6717987060547, 821.0948486328125, 1042.566162109375, -133.2743377685547, 750.630859375, 479.7615661621094, 1191.3004150390625, -288.44073486328125, 557.208251953125, 168.12567138671875, -10.636444091796875, 230.76683044433594, -248.10995483398438, 713.7544555664062, 379.1246643066406, 117.12588500976562, -3.906766891479492, -2.734678268432617, -568.2581176757812, 489.2379455566406, -1667.0718994140625, -601.9730224609375, 1398.796142578125, 268.7789001464844, 280.4029846191406, 195.56192016601562, 537.07080078125, 85.86294555664062, 643.7276611328125, -86.29156494140625, 644.23828125, 1105.603515625, 257.6808776855469, 396.6878967285156, 309.4432067871094, 878.7676391601562, 491.6991271972656, 182.643310546875, 1155.602783203125, -245.57815551757812, -55.463035583496094, 306.89599609375, -5.195648193359375, 959.384033203125, 739.1097412109375, -149.69473266601562, 58.799407958984375, -361.59881591796875, 57.034446716308594, 700.34814453125, 2142.24072265625, 640.8515014648438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000477.npy"} +{"epoch": 0.7004405286343612, "step": 478, "batch_size": 64, "mean": 392.1885986328125, "std": 499.2119445800781, "min": -735.4822387695312, "p10": -201.9390151977539, "median": 351.7679748535156, "p90": 1158.8174682617191, "max": 1667.6767578125, "pos_frac": 0.8125, "sample": [-212.6004180908203, 135.51014709472656, 686.8975219726562, 192.48651123046875, 18.068748474121094, 450.92291259765625, 309.59478759765625, -355.9720458984375, 434.4436950683594, 23.224609375, 363.37420654296875, 533.3279418945312, -570.137451171875, -336.1380615234375, 282.9462585449219, 891.9248657226562, 805.3946533203125, 1667.6767578125, 400.25091552734375, 149.44207763671875, -272.7852783203125, -177.06240844726562, -383.8892822265625, 271.7707824707031, 391.8524169921875, 980.36572265625, 419.69586181640625, 494.6114196777344, -41.245758056640625, 198.9606475830078, 1477.9427490234375, 1053.374755859375, 340.1617431640625, 254.77337646484375, 434.1005859375, 324.9477233886719, 1303.4791259765625, 1386.269775390625, 621.010498046875, -153.24607849121094, 598.791015625, -61.29094696044922, 120.79983520507812, -101.99678802490234, 636.072021484375, 484.7660827636719, 156.84129333496094, 1204.0072021484375, 248.88255310058594, 420.23748779296875, 559.47705078125, 31.616661071777344, 321.11376953125, 755.75634765625, 310.00177001953125, 282.1036682128906, 1410.7286376953125, 669.9556274414062, 1298.671630859375, -735.4822387695312, 590.683837890625, 406.2939147949219, 690.5015258789062, 5.811187744140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000478.npy"} +{"epoch": 0.7019089574155654, "step": 479, "batch_size": 64, "mean": 376.192626953125, "std": 630.46484375, "min": -1506.421875, "p10": -290.602847290039, "median": 328.459716796875, "p90": 1109.4802001953126, "max": 2945.06787109375, "pos_frac": 0.71875, "sample": [18.938087463378906, 91.62776947021484, 1171.7322998046875, 626.7474365234375, 982.1981811523438, 1179.951904296875, -404.85455322265625, -240.40414428710938, 339.43212890625, 10.185819625854492, 822.0228271484375, -171.18185424804688, -191.16494750976562, 317.4873046875, 794.8807373046875, 257.14202880859375, 220.65235900878906, -94.50225830078125, 1035.089111328125, 493.6830139160156, 210.23892211914062, 544.8826904296875, -100.09492492675781, 432.3681640625, 2945.06787109375, 1196.7249755859375, 648.7008056640625, 757.0796508789062, 762.9605102539062, 429.6864013671875, 1127.6202392578125, 259.0703430175781, 232.94970703125, 836.9239501953125, -588.3688354492188, 1639.05517578125, -34.228973388671875, 482.49090576171875, 495.9902038574219, 138.80296325683594, 400.3768005371094, 1067.1534423828125, 742.4440307617188, 434.1432189941406, -69.16148376464844, -1506.421875, 408.91986083984375, -321.6585693359375, -312.1165771484375, -80.69287109375, 144.60317993164062, -9.110450744628906, -411.951171875, -527.136474609375, 165.22389221191406, -28.497764587402344, 551.5604248046875, -1.54864501953125, 142.98583984375, 1382.0855712890625, 383.011962890625, 1033.5657958984375, 158.1243133544922, 652.841552734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000479.npy"} +{"epoch": 0.7033773861967695, "step": 480, "batch_size": 64, "mean": 296.2763366699219, "std": 574.5997924804688, "min": -830.8179321289062, "p10": -433.01636657714835, "median": 286.8660888671875, "p90": 998.1537292480468, "max": 2306.4892578125, "pos_frac": 0.671875, "sample": [-461.5265197753906, 397.28021240234375, -121.67379760742188, 93.34979248046875, 369.5934143066406, 155.15652465820312, 298.32501220703125, 949.9603881835938, 608.5711669921875, 852.6743774414062, 414.5863037109375, 1318.14208984375, 448.3233642578125, -319.985107421875, -705.7297973632812, -598.6629638671875, 642.3060302734375, 770.1565551757812, -162.24728393554688, 350.241455078125, 86.81929779052734, -158.6689910888672, 22.008377075195312, 408.58123779296875, 506.1515808105469, 177.35191345214844, 2306.4892578125, 275.40716552734375, 582.755126953125, -157.11134338378906, 842.697998046875, 996.5553588867188, 998.8387451171875, -152.18846130371094, -561.147705078125, 263.3061828613281, 538.0297241210938, -222.60350036621094, 477.31903076171875, -830.8179321289062, -682.317626953125, -643.510009765625, 675.285400390625, 1074.869140625, 1461.0693359375, -145.11215209960938, 377.8473205566406, 632.1774291992188, -25.96657943725586, 459.2016906738281, -9.669357299804688, -366.49267578125, 173.98760986328125, 166.36685180664062, 176.62957763671875, -27.177654266357422, 120.96562957763672, -53.23819351196289, 604.4168090820312, 1021.1405029296875, 517.8120727539062, -191.8328094482422, 1066.24658203125, 880.3718872070312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000480.npy"} +{"epoch": 0.7048458149779736, "step": 481, "batch_size": 64, "mean": 462.3815002441406, "std": 551.6420288085938, "min": -693.0579833984375, "p10": -155.968147277832, "median": 392.55328369140625, "p90": 1024.429168701172, "max": 1961.847900390625, "pos_frac": 0.765625, "sample": [1961.847900390625, -693.0579833984375, 587.0864868164062, 418.50225830078125, -218.67332458496094, -19.087448120117188, -106.84967041015625, 332.9029235839844, 518.2355346679688, 1572.5869140625, 119.86394500732422, -214.42581176757812, 128.8858184814453, -268.5623779296875, 454.87127685546875, 954.1239624023438, -109.46371459960938, 684.6056518554688, 1471.1146240234375, -55.30223083496094, -16.84625244140625, 850.7772827148438, 332.5658874511719, 792.0237426757812, 420.7169189453125, 198.34381103515625, 196.712158203125, 970.8860473632812, 706.0631103515625, -342.8678894042969, 67.13616943359375, 806.3698120117188, 366.60430908203125, -136.1888427734375, 749.5151977539062, 772.23583984375, -260.9373779296875, 21.849102020263672, 941.3790283203125, 534.7716064453125, -134.94427490234375, 143.50733947753906, 926.0338134765625, 920.4553833007812, 31.036231994628906, 188.19146728515625, 1791.3114013671875, 324.7619934082031, 1611.0081787109375, 1047.376220703125, 242.9197998046875, -7.6112518310546875, 453.7339782714844, 570.8468627929688, 543.0867919921875, 363.74560546875, 153.13015747070312, 880.1435546875, 789.7726440429688, 730.6771240234375, 1590.0296630859375, -164.4449920654297, 363.27020263671875, 744.0648193359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000481.npy"} +{"epoch": 0.7063142437591777, "step": 482, "batch_size": 64, "mean": 213.993896484375, "std": 620.3377075195312, "min": -2125.4208984375, "p10": -430.1016662597656, "median": 272.96434020996094, "p90": 910.6365783691411, "max": 1801.79931640625, "pos_frac": 0.6875, "sample": [174.77407836914062, -45.14799499511719, 379.1991882324219, -872.921875, 449.65032958984375, 572.3421630859375, -1172.399658203125, 615.4345092773438, 659.7581787109375, 361.14154052734375, 339.8738098144531, 683.76806640625, -11.805221557617188, 360.84912109375, 712.6784057617188, 1152.640380859375, 229.16891479492188, -311.96868896484375, 340.2363586425781, 396.250244140625, 17.559703826904297, 371.8133239746094, 360.5838623046875, 964.5796508789062, -868.6409301757812, 356.1563720703125, 133.72491455078125, 962.0665283203125, 316.759765625, 463.7989196777344, 699.4962768554688, 653.7081298828125, 790.6333618164062, 208.64321899414062, -364.8540954589844, 154.22756958007812, 964.308349609375, -458.0649108886719, 1368.2979736328125, -343.662109375, 697.8472290039062, -17.013418197631836, -183.9607696533203, 50.76258087158203, 212.06614685058594, 77.39260864257812, -2125.4208984375, 342.8441162109375, 1801.79931640625, 43.20677947998047, -7.616363525390625, -83.86451721191406, -1.82354736328125, 465.73760986328125, -998.8471069335938, 367.8324279785156, 138.61929321289062, 1335.1302490234375, -193.3497314453125, 126.66654968261719, -200.54263305664062, -51.882606506347656, 725.6380615234375, -590.2698364257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000482.npy"} +{"epoch": 0.7077826725403817, "step": 483, "batch_size": 64, "mean": 478.30712890625, "std": 598.7930297851562, "min": -491.6479797363281, "p10": -235.89148864746087, "median": 399.04241943359375, "p90": 1227.114831542969, "max": 3140.325927734375, "pos_frac": 0.78125, "sample": [-148.1928253173828, 96.61801147460938, 1370.0767822265625, 755.6799926757812, -339.58453369140625, 758.7431030273438, 398.21405029296875, 327.15386962890625, 657.1382446289062, 662.6397094726562, 399.87078857421875, 795.7860717773438, 463.1856689453125, 91.67616271972656, 409.8036804199219, 248.9238739013672, 944.4617309570312, 1025.4051513671875, -379.48333740234375, 550.1942138671875, 343.54949951171875, -46.28862762451172, 647.58203125, 826.262939453125, 63.49169921875, -173.61184692382812, 1039.650146484375, 716.0554809570312, 31.943496704101562, 1071.1121826171875, -291.1407775878906, 1352.75439453125, -25.440284729003906, 930.1414794921875, -1.1569976806640625, 1346.7763671875, 363.6849060058594, 969.68505859375, 194.50277709960938, 1184.06201171875, 752.6475219726562, 714.7208251953125, 518.43310546875, -464.5052490234375, 170.14547729492188, 313.75390625, 497.0035705566406, 327.10107421875, 1245.5660400390625, -83.79664611816406, 188.249267578125, -491.6479797363281, 188.62924194335938, 3140.325927734375, -310.8929443359375, 250.19248962402344, 1353.84716796875, 303.539306640625, 496.23602294921875, -262.582763671875, 749.161376953125, 1311.5406494140625, 237.75091552734375, -165.68878173828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000483.npy"} +{"epoch": 0.7092511013215859, "step": 484, "batch_size": 64, "mean": 445.344970703125, "std": 536.588623046875, "min": -576.7673950195312, "p10": -104.58787841796874, "median": 370.3786926269531, "p90": 1177.0624877929688, "max": 1950.3570556640625, "pos_frac": 0.78125, "sample": [415.366943359375, -106.54960632324219, 1680.819580078125, 120.82359313964844, -100.01051330566406, -244.61256408691406, 493.00860595703125, 73.86351776123047, 1950.3570556640625, 181.81573486328125, 93.78079223632812, 523.9630737304688, 520.7604370117188, 630.9246826171875, 1050.559814453125, 571.7070922851562, 409.552001953125, 102.6060791015625, -173.9598388671875, -576.7673950195312, 478.3550109863281, 36.8746223449707, 1158.3074951171875, 1081.37890625, 1654.018798828125, 38.7684326171875, 360.47760009765625, 767.785888671875, 6.622280120849609, -73.4749755859375, -53.18294906616211, -51.131591796875, 912.5199584960938, 1343.5159912109375, 25.778541564941406, 228.916015625, 289.2015380859375, 141.48583984375, 169.31761169433594, -172.31886291503906, -8.687156677246094, 316.7468566894531, 744.232421875, 380.27978515625, 1287.736572265625, -26.1070556640625, 666.4013061523438, 1185.100341796875, 473.6079406738281, -258.4658508300781, -36.98869323730469, 855.4979858398438, 1129.8045654296875, 766.3935546875, 62.87516784667969, 1198.73193359375, 1087.24267578125, 423.30029296875, 464.43829345703125, 61.339698791503906, 873.0150146484375, 46.87898254394531, -131.25160217285156, 978.7278442382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000484.npy"} +{"epoch": 0.71071953010279, "step": 485, "batch_size": 64, "mean": 425.968505859375, "std": 598.8422241210938, "min": -954.0516967773438, "p10": -138.18357620239254, "median": 314.0285949707031, "p90": 1142.8302856445314, "max": 2323.55029296875, "pos_frac": 0.765625, "sample": [814.6837158203125, 747.4845581054688, -65.54374694824219, 213.89413452148438, 485.4739685058594, 74.61356353759766, 1149.8846435546875, 197.05557250976562, 411.05511474609375, 545.3883056640625, 208.2386474609375, -54.57166290283203, 1407.9342041015625, 1256.6328125, 180.7760009765625, 1068.110595703125, -559.7911987304688, -19.21708106994629, 1033.22705078125, 978.385498046875, 1027.895263671875, 1153.894775390625, 203.63418579101562, 539.1712646484375, 2323.55029296875, 1214.7393798828125, -34.26313018798828, 69.23273468017578, 810.5768432617188, -112.3758773803711, 34.238548278808594, -88.38540649414062, -566.727294921875, 147.72833251953125, 261.5620422363281, 968.9072875976562, 614.086669921875, 135.5289306640625, 167.45767211914062, 781.0950927734375, -42.85870361328125, 139.04812622070312, 2040.671142578125, 531.4542846679688, -149.2440185546875, 384.7975769042969, 435.9092102050781, 692.6726684570312, 347.46044921875, -684.798828125, 1126.3701171875, 1067.6375732421875, 53.57095718383789, -283.3042907714844, 27.831436157226562, -241.87684631347656, 929.203857421875, 300.0666809082031, 327.9905090332031, 223.45159912109375, -20.87804412841797, -954.0516967773438, 910.1847534179688, 375.4123229980469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000485.npy"} +{"epoch": 0.7121879588839941, "step": 486, "batch_size": 64, "mean": 476.5207824707031, "std": 726.9288330078125, "min": -1246.3443603515625, "p10": -370.0424163818359, "median": 450.60755920410156, "p90": 1374.1772094726564, "max": 3047.2646484375, "pos_frac": 0.796875, "sample": [1056.009765625, 527.8863525390625, 110.72016906738281, 494.1235656738281, 370.8614501953125, -989.8116455078125, 154.71597290039062, 215.88980102539062, 1009.1360473632812, -136.83065795898438, 624.39501953125, 1494.7415771484375, 671.6047973632812, -885.8290405273438, -386.7142333984375, 1607.4766845703125, 971.2652587890625, 981.337890625, -331.1415100097656, 1261.3248291015625, 1407.1365966796875, -316.93603515625, 1580.16064453125, 1227.257568359375, 207.38331604003906, -287.3914489746094, 540.943603515625, 449.4673767089844, 36.932037353515625, 1588.99755859375, 1178.14013671875, 451.74774169921875, -559.3660888671875, -759.4120483398438, 403.41748046875, 353.3592529296875, 140.67144775390625, 1385.6048583984375, 919.9241333007812, 893.5826416015625, 355.8353271484375, 815.64599609375, 486.87530517578125, -129.56472778320312, 575.6531982421875, 891.7877197265625, 949.274658203125, 649.2372436523438, 956.0573120117188, 1347.5126953125, 154.35330200195312, 350.6221618652344, 5.466703414916992, 359.90435791015625, 683.7798461914062, 223.32301330566406, 36.154518127441406, 513.9480590820312, -1246.3443603515625, 86.52815246582031, 378.39605712890625, -10.239532470703125, -646.9248046875, 3047.2646484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000486.npy"} +{"epoch": 0.7136563876651982, "step": 487, "batch_size": 64, "mean": 354.3516540527344, "std": 577.5410766601562, "min": -1879.5528564453125, "p10": -244.39152832031246, "median": 293.2348175048828, "p90": 1076.6990173339848, "max": 2035.2833251953125, "pos_frac": 0.796875, "sample": [283.4404296875, 1251.14892578125, 202.08914184570312, -263.1217346191406, 1156.0933837890625, 1113.4561767578125, 740.5206298828125, 478.9035339355469, 1599.170654296875, -57.74462127685547, 137.00750732421875, -504.752197265625, 1186.5555419921875, -200.68771362304688, 213.3851318359375, 2035.2833251953125, 263.79364013671875, -378.88055419921875, 825.1787109375, 813.6713256835938, 385.38189697265625, 663.5226440429688, -1879.5528564453125, 89.6564712524414, -91.86748504638672, 646.0821533203125, -3.9488296508789062, 938.3572998046875, 1420.482177734375, -27.61991310119629, 224.7465057373047, 342.3628234863281, 398.3120422363281, 499.32562255859375, 208.15713500976562, -608.1359252929688, 218.84579467773438, -542.4197998046875, 625.6842651367188, 324.2756652832031, 383.8140869140625, 50.97998046875, 688.254150390625, -152.2831573486328, 467.99322509765625, 171.80758666992188, 195.24940490722656, 301.52911376953125, 491.58172607421875, 244.128173828125, 150.58677673339844, 613.357421875, 240.8743133544922, 421.6793212890625, 39.46656799316406, -330.40753173828125, 284.9405212402344, 990.9323120117188, 806.019287109375, 673.9725341796875, 21.675384521484375, 454.5666198730469, 20.798919677734375, 720.83056640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000487.npy"} +{"epoch": 0.7151248164464024, "step": 488, "batch_size": 64, "mean": 370.16302490234375, "std": 504.1206970214844, "min": -557.7685546875, "p10": -130.77005615234376, "median": 351.1059265136719, "p90": 1115.7972412109377, "max": 1918.21240234375, "pos_frac": 0.703125, "sample": [-3.4936141967773438, -8.01766586303711, 1556.593505859375, 195.4104461669922, 1073.675048828125, 127.46018981933594, 693.6004028320312, -72.69229888916016, -120.40774536132812, 339.1687316894531, 498.970947265625, 210.7169647216797, 566.4389038085938, 1177.248779296875, 346.99700927734375, 869.7759399414062, -153.52474975585938, 45.533241271972656, -557.7685546875, -128.7791748046875, 616.572265625, -88.02278900146484, 463.24444580078125, 392.6507263183594, -0.3625335693359375, -284.2559814453125, 34.391231536865234, 580.909423828125, 1918.21240234375, 172.06898498535156, 462.39154052734375, 678.8724365234375, -421.6833801269531, 1133.849609375, 771.0877075195312, -28.196247100830078, 1392.4385986328125, 570.486572265625, 355.21484375, 125.31635284423828, 632.6937866210938, 979.80615234375, 218.25762939453125, 492.40570068359375, 210.16737365722656, 450.41937255859375, 596.1673583984375, 1407.943359375, -119.3558578491211, -107.7265396118164, -164.80491638183594, 451.7760314941406, 435.7129211425781, 36.559730529785156, 432.2032470703125, 496.9847717285156, -70.81547546386719, 466.9528503417969, -12.163043975830078, -131.623291015625, -518.3716430664062, 476.7648620605469, 258.8203125, 1269.5655517578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000488.npy"} +{"epoch": 0.7165932452276065, "step": 489, "batch_size": 64, "mean": 366.00018310546875, "std": 556.5921630859375, "min": -755.4341430664062, "p10": -192.54015808105464, "median": 195.11882781982422, "p90": 1229.2572509765625, "max": 2002.260009765625, "pos_frac": 0.703125, "sample": [909.0051879882812, 37.14949035644531, 237.92747497558594, 119.67970275878906, -11.799020767211914, 140.85296630859375, 221.5347442626953, -430.0750427246094, 99.69158935546875, 141.19461059570312, -216.3869171142578, 177.62399291992188, 612.9173583984375, 734.3745727539062, 501.9396667480469, 165.51194763183594, -262.69561767578125, 227.27606201171875, -134.732177734375, 957.3727416992188, -45.73857116699219, -54.591796875, 275.5506591796875, 183.26220703125, -49.74135971069336, 150.6139678955078, 206.97544860839844, -25.354976654052734, 156.51329040527344, 696.407958984375, -87.66888427734375, 48.44325256347656, -25.110750198364258, -286.4044189453125, 1348.95947265625, 619.8051147460938, 20.405315399169922, 107.28007507324219, 341.1978759765625, -755.4341430664062, 759.49560546875, -313.8084411621094, 1431.332763671875, -399.6053466796875, 1228.8717041015625, 1229.4224853515625, 720.4095458984375, 2002.260009765625, 363.1369934082031, 760.908203125, 1838.9312744140625, 522.60107421875, 507.55853271484375, 1487.640625, -136.89772033691406, -63.900978088378906, 784.6533203125, 499.3753967285156, 560.5775146484375, 497.158935546875, -0.461883544921875, 1426.0592041015625, 669.7725830078125, -5.2119140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000489.npy"} +{"epoch": 0.7180616740088106, "step": 490, "batch_size": 64, "mean": 385.1979064941406, "std": 708.7413330078125, "min": -1379.1064453125, "p10": -363.56342468261715, "median": 190.2949981689453, "p90": 1271.8459594726567, "max": 2170.629150390625, "pos_frac": 0.765625, "sample": [72.59967041015625, -1285.2222900390625, 1170.733642578125, -209.43797302246094, 152.69873046875, -13.124473571777344, 170.59393310546875, 945.6244506835938, 936.9642333984375, -645.7032470703125, 105.63748168945312, -47.0838623046875, 1428.9697265625, 78.15701293945312, 491.2117919921875, -321.2976989746094, -205.3095703125, 1117.4560546875, 643.3616333007812, -776.3885498046875, 65.93180847167969, 141.44436645507812, -781.5108642578125, 135.0905303955078, -1379.1064453125, 1419.678466796875, 107.38742065429688, 479.6785888671875, 473.1852111816406, 748.9461669921875, 328.15838623046875, 2170.629150390625, 103.12139892578125, 689.0119018554688, 38.17961883544922, -266.72705078125, 1724.3695068359375, 949.1556396484375, 1876.110107421875, 115.83000183105469, 136.78228759765625, 209.99606323242188, 1315.1798095703125, 787.4990234375, -15.35365104675293, 828.9705200195312, -381.67730712890625, 938.1073608398438, 42.46759033203125, 477.03497314453125, 422.24652099609375, -513.990478515625, 1994.0147705078125, -26.0035400390625, 1001.7852783203125, 1002.1253051757812, 50.21917724609375, 128.12997436523438, 636.3526611328125, 969.1034545898438, 58.73481369018555, 872.6868896484375, 505.65594482421875, 263.594482421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000490.npy"} +{"epoch": 0.7195301027900147, "step": 491, "batch_size": 64, "mean": 259.3618469238281, "std": 498.0423583984375, "min": -854.7910766601562, "p10": -295.1734649658203, "median": 197.8345489501953, "p90": 884.2482116699221, "max": 1734.04052734375, "pos_frac": 0.71875, "sample": [314.16839599609375, -262.7571716308594, 841.7811889648438, 578.59423828125, 1407.3843994140625, 152.6851043701172, 400.3301086425781, -31.58601188659668, 462.0853271484375, 110.45501708984375, 299.5987243652344, 84.19148254394531, 36.879737854003906, -598.9454345703125, 502.1401062011719, 89.71357727050781, 773.1356811523438, 393.6737060546875, -61.62406921386719, 432.4283752441406, 205.48263549804688, -20.893789291381836, 1033.87451171875, -421.4378356933594, 235.58103942871094, 280.4337158203125, 727.0680541992188, -247.6502227783203, 902.4483642578125, 82.95384979248047, 934.3895263671875, 444.5604553222656, -854.7910766601562, 161.47024536132812, -246.21876525878906, 248.87518310546875, 25.096458435058594, 1114.55517578125, 278.77978515625, 296.84808349609375, 676.4186401367188, -689.4727172851562, 1734.04052734375, 111.34896850585938, -178.18763732910156, 190.18646240234375, -309.066162109375, 218.9944305419922, -51.91834259033203, 654.1522827148438, 98.8388671875, -440.42352294921875, 771.4306030273438, -92.51719665527344, -73.89209747314453, 115.25090026855469, 354.865966796875, 807.552734375, 117.79237365722656, -49.119110107421875, 1400.609375, 131.02545166015625, 512.7503051757812, -517.2610473632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000491.npy"} +{"epoch": 0.7209985315712188, "step": 492, "batch_size": 64, "mean": 249.57762145996094, "std": 587.8971557617188, "min": -1006.5714721679688, "p10": -501.17475891113276, "median": 180.85095977783203, "p90": 1139.1705810546878, "max": 1451.4764404296875, "pos_frac": 0.734375, "sample": [213.1519775390625, 230.6920623779297, 605.431884765625, 23.5028018951416, 255.71035766601562, 37.05406188964844, 32.619232177734375, 680.7911376953125, -630.8626708984375, -175.5293426513672, 208.84219360351562, 702.1978759765625, 810.335205078125, -322.11297607421875, -990.6884155273438, 1092.9765625, -122.20780181884766, 1303.08349609375, -204.40255737304688, 1451.4764404296875, -527.9402465820312, 1282.3074951171875, 328.2228698730469, -378.3906555175781, 394.835693359375, 381.62261962890625, 280.5039367675781, -579.0909423828125, 103.92289733886719, 315.27294921875, 20.5606689453125, 20.104421615600586, 162.26901245117188, -1006.5714721679688, -47.15054702758789, 1206.489990234375, 877.0511474609375, 252.1656494140625, -806.0676879882812, 73.44274139404297, 1158.968017578125, -798.3548583984375, 1222.6279296875, 55.78235626220703, -177.2402801513672, 771.480712890625, 199.4329071044922, 79.82991027832031, 977.8236694335938, 16.45510482788086, 512.2352294921875, -89.37583923339844, 95.72832489013672, -438.7219543457031, 1080.6689453125, 887.604248046875, 600.4019165039062, 50.76982116699219, 62.37373733520508, 588.1214599609375, -262.755859375, 1369.82470703125, 48.728614807128906, 404.93707275390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000492.npy"} +{"epoch": 0.7224669603524229, "step": 493, "batch_size": 64, "mean": 295.969970703125, "std": 540.3995361328125, "min": -1916.8348388671875, "p10": -190.21840515136716, "median": 209.1218719482422, "p90": 1093.6062255859379, "max": 1409.9234619140625, "pos_frac": 0.734375, "sample": [1138.2421875, 107.05785369873047, 515.0906982421875, 314.6993713378906, -94.47781372070312, 1278.813232421875, 981.6594848632812, -15.617767333984375, 168.86245727539062, -369.76300048828125, 503.16552734375, 69.91407775878906, 1000.3855590820312, 441.88897705078125, 493.8077392578125, 143.89486694335938, -310.3514709472656, 330.9103088378906, 162.09542846679688, 42.05470275878906, 195.71519470214844, 393.4062805175781, 447.1496887207031, 401.85662841796875, -138.60430908203125, 334.05853271484375, 657.8268432617188, -156.91845703125, -197.7467041015625, -138.83584594726562, 44.92523193359375, -1916.8348388671875, 355.54827880859375, -23.943832397460938, 795.2013549804688, 4.933130264282227, 1349.7735595703125, 1387.623779296875, 188.94842529296875, -217.23851013183594, -34.422149658203125, 414.297119140625, 1170.320556640625, 826.6212158203125, 331.1136169433594, 1024.067138671875, -502.9437255859375, 413.7286376953125, 1409.9234619140625, 1123.40869140625, 156.303955078125, 674.8809814453125, -285.52325439453125, 226.61111450195312, 312.002685546875, -172.65237426757812, -100.69348907470703, 211.00489807128906, 703.2276611328125, 207.2388458251953, 181.27392578125, 13.81890869140625, -79.48495483398438, 48.77662658691406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000493.npy"} +{"epoch": 0.723935389133627, "step": 494, "batch_size": 64, "mean": 459.6351318359375, "std": 509.01898193359375, "min": -643.2147216796875, "p10": -105.47897338867182, "median": 447.7499542236328, "p90": 1212.2382568359376, "max": 1885.8992919921875, "pos_frac": 0.828125, "sample": [-2.4168243408203125, 334.92266845703125, 607.28564453125, 394.8480529785156, 1885.8992919921875, 1442.482666015625, -643.2147216796875, 752.3253173828125, 728.9631958007812, 54.70465087890625, 452.9595031738281, -42.07533645629883, 597.19384765625, 413.23236083984375, -340.16790771484375, 1227.4561767578125, 748.8565673828125, -208.65179443359375, 468.0865173339844, 853.2095947265625, 141.43408203125, -301.019775390625, 282.1600341796875, 931.6911010742188, 799.2672119140625, 49.730628967285156, 879.0330810546875, 1211.833740234375, 244.99253845214844, 391.36822509765625, -522.485595703125, 35.65809631347656, 321.7012023925781, -298.3953857421875, 1690.5545654296875, 394.2203063964844, 466.14599609375, -3.1805191040039062, 536.0660400390625, 1400.7930908203125, 1238.780517578125, 480.9146728515625, -48.26612854003906, 32.065940856933594, 525.9689331054688, 697.20751953125, 737.7496948242188, 972.1484985351562, 471.2986145019531, 708.2623901367188, -129.99876403808594, 263.66455078125, 525.2084350585938, 139.40963745117188, 142.44012451171875, 540.474853515625, 824.908935546875, 588.3792114257812, 181.6746826171875, 442.5404052734375, 1212.41162109375, 72.95976257324219, 364.5766906738281, 54.39935302734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000494.npy"} +{"epoch": 0.7254038179148311, "step": 495, "batch_size": 64, "mean": 392.8917236328125, "std": 618.7874145507812, "min": -1089.258544921875, "p10": -210.3155563354492, "median": 294.92022705078125, "p90": 1128.3706298828124, "max": 2515.85888671875, "pos_frac": 0.765625, "sample": [365.2462158203125, 83.85105895996094, -178.34271240234375, -216.63616943359375, 246.0858154296875, 328.22479248046875, -542.789306640625, 167.02154541015625, 295.73638916015625, 32.010101318359375, 156.47386169433594, 720.9326171875, 959.6860961914062, -699.5560913085938, 447.20794677734375, 444.1463317871094, -316.3931579589844, 903.8399047851562, 78.93508911132812, 758.2923583984375, 137.8472442626953, -130.81417846679688, 261.1771545410156, 62.030147552490234, 378.6784362792969, 1129.844482421875, 977.6957397460938, 876.364501953125, 260.4961242675781, 504.7484130859375, 293.45013427734375, 312.9139404296875, 134.61911010742188, 489.43267822265625, 1124.931640625, 1156.4432373046875, 878.9185791015625, -195.5674591064453, 2515.85888671875, 1393.32861328125, 436.4463806152344, -17.471847534179688, 213.70083618164062, 653.0985107421875, 349.41644287109375, -687.7188720703125, -24.690994262695312, 294.10406494140625, 72.68636322021484, -334.9566345214844, -168.91094970703125, -8.412353515625, 371.09539794921875, 597.503173828125, 1459.22119140625, 596.2359619140625, 265.0946960449219, 877.4723510742188, -1089.258544921875, 1314.800537109375, 1017.9097900390625, 293.49371337890625, 2162.8271484375, -94.98531341552734], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000495.npy"} +{"epoch": 0.7268722466960352, "step": 496, "batch_size": 64, "mean": 197.38121032714844, "std": 484.35321044921875, "min": -1170.425048828125, "p10": -357.6348480224609, "median": 154.64617919921875, "p90": 886.187048339844, "max": 1200.5552978515625, "pos_frac": 0.71875, "sample": [160.67257690429688, -553.511962890625, 1137.0252685546875, 902.4625244140625, -870.8290405273438, 763.3357543945312, 95.20365142822266, 270.553466796875, 800.0537719726562, 148.61978149414062, 1200.5552978515625, 307.61968994140625, 387.9552917480469, 245.18954467773438, -267.24072265625, -26.367549896240234, -250.2252197265625, 483.864501953125, -829.8733520507812, 1158.6234130859375, 917.69140625, 328.7642517089844, 848.2109375, 371.3739929199219, 951.2305297851562, 176.97222900390625, -79.93565368652344, -100.46510314941406, 73.40046691894531, -378.8509216308594, 199.41461181640625, 144.67771911621094, 192.03060913085938, 465.3019104003906, 107.11125946044922, -91.21449279785156, 533.3766479492188, 212.73251342773438, -716.7417602539062, 227.8615264892578, -469.66552734375, -308.13067626953125, 678.73828125, 30.810401916503906, 145.94068908691406, 544.06787109375, 102.99732971191406, 464.1263732910156, 620.8036499023438, 474.9718933105469, 132.21951293945312, -230.6719207763672, 401.87353515625, 14.945180892944336, 124.79470825195312, -1170.425048828125, 24.123462677001953, 963.6741333007812, 23.577476501464844, -18.796470642089844, 462.9920959472656, -52.28440475463867, -7.843513488769531, 32.92974853515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000496.npy"} +{"epoch": 0.7283406754772394, "step": 497, "batch_size": 64, "mean": 207.71688842773438, "std": 495.0066223144531, "min": -955.7325439453125, "p10": -322.1953491210937, "median": 143.17077255249023, "p90": 919.5088867187501, "max": 1404.6500244140625, "pos_frac": 0.6875, "sample": [-341.5212707519531, 933.373779296875, 622.4695434570312, 718.6083374023438, 11.287099838256836, -255.65008544921875, 527.0885009765625, -202.84071350097656, -87.20689392089844, -226.9839630126953, 164.31907653808594, 1014.261474609375, 1260.05517578125, 462.83013916015625, 46.57334899902344, 119.25469970703125, 387.0940856933594, 204.74388122558594, 667.0326538085938, 492.9971618652344, -106.24790954589844, 3.8729934692382812, 661.8917236328125, 966.4669189453125, -40.68211364746094, 71.96379089355469, 887.157470703125, -190.03675842285156, 41.15238952636719, -775.7993774414062, 498.07208251953125, 73.10020446777344, -6.873332977294922, 368.89300537109375, 69.59356689453125, 1302.541259765625, 26.99658203125, 1404.6500244140625, 14.092910766601562, 76.86380004882812, 122.6728744506836, 282.87359619140625, 330.667236328125, 687.864013671875, -948.0170288085938, 191.04229736328125, 589.2483520507812, -419.9007873535156, 335.4239807128906, 163.66867065429688, 325.0193176269531, 267.19781494140625, 962.499755859375, -277.1015319824219, -164.5925750732422, 252.56765747070312, -109.37400817871094, -408.5347900390625, -75.61665344238281, -955.7325439453125, 652.20556640625, -519.678466796875, 312.7001037597656, -168.67820739746094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000497.npy"} +{"epoch": 0.7298091042584435, "step": 498, "batch_size": 64, "mean": 352.5992431640625, "std": 619.0040893554688, "min": -1379.2598876953125, "p10": -280.7842559814453, "median": 350.4651794433594, "p90": 1066.2609375000002, "max": 2094.2802734375, "pos_frac": 0.71875, "sample": [-957.8635864257812, 237.80783081054688, 648.0178833007812, 149.32699584960938, -196.67391967773438, 643.5081787109375, 655.330322265625, 1083.500244140625, 292.26153564453125, 730.5841674804688, 479.88262939453125, 697.2978515625, 288.8871154785156, 957.3648071289062, -1379.2598876953125, 725.8504638671875, 1017.1870727539062, 1516.2320556640625, 46.19928741455078, 600.2639770507812, 125.57170867919922, 753.5374755859375, -123.5350341796875, 1026.035888671875, -262.5988464355469, 316.8954162597656, 388.98004150390625, 85.83496856689453, -463.97796630859375, -5.128044128417969, 290.8055114746094, 384.0349426269531, -1106.3277587890625, 1306.95556640625, 515.9656982421875, 5.8038330078125, -153.25247192382812, 538.3560180664062, 612.4917602539062, 495.25982666015625, 1570.0345458984375, -0.8145065307617188, 0.1478271484375, 45.72346115112305, 593.8341674804688, 641.9501342773438, -399.6899719238281, -38.57476806640625, 1355.6112060546875, 993.6987915039062, 261.6744384765625, -288.5780029296875, 390.7061462402344, -241.1973876953125, -210.85450744628906, -37.111724853515625, 501.12408447265625, 161.39300537109375, 982.3886108398438, 484.65277099609375, -307.9024353027344, 1176.993408203125, 2094.2802734375, -130.5526123046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000498.npy"} +{"epoch": 0.7312775330396476, "step": 499, "batch_size": 64, "mean": 430.9838562011719, "std": 577.1480712890625, "min": -1002.7814331054688, "p10": -191.10684356689453, "median": 461.8444061279297, "p90": 1052.2104370117188, "max": 2342.245849609375, "pos_frac": 0.796875, "sample": [1047.247802734375, 89.48121643066406, 199.10150146484375, -1002.7814331054688, -56.555137634277344, 537.4251708984375, 538.552734375, 659.2630004882812, -79.94721221923828, -95.44802856445312, 533.5247802734375, 639.2406616210938, 169.528564453125, 1054.3372802734375, 617.0344848632812, 185.86453247070312, 734.3896484375, -689.730224609375, 2342.245849609375, 281.05401611328125, 197.925048828125, 1038.4808349609375, 770.785400390625, 30.552528381347656, -552.0579223632812, 512.1795654296875, 288.9795227050781, 995.9991455078125, -251.49037170410156, 1207.001953125, 233.01174926757812, 52.44293212890625, 597.0679321289062, -179.38211059570312, 281.4685363769531, 29.211923599243164, 574.7269897460938, 909.447265625, 937.558349609375, -231.75267028808594, 38.273529052734375, 592.3666381835938, 637.5677490234375, 614.0653076171875, 620.79345703125, 122.57982635498047, 708.2343139648438, 1748.71142578125, 1460.7144775390625, 168.01303100585938, 411.5092468261719, 721.9366455078125, 379.68170166015625, 882.317138671875, -63.59519577026367, 819.7703857421875, 1281.095703125, 372.64935302734375, -609.348876953125, -159.4316864013672, -196.13172912597656, 560.255859375, 50.231651306152344, 1274.722412109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000499.npy"} +{"epoch": 0.7327459618208517, "step": 500, "batch_size": 64, "mean": 352.3294677734375, "std": 564.7052612304688, "min": -1080.2764892578125, "p10": -379.49642333984366, "median": 276.84129333496094, "p90": 956.5584350585939, "max": 1723.73876953125, "pos_frac": 0.78125, "sample": [152.84054565429688, -209.18922424316406, -647.8468627929688, 1723.73876953125, -594.5127563476562, 556.5709228515625, 971.5223999023438, 479.1544189453125, 834.279052734375, 74.01598358154297, 894.07763671875, 674.0467529296875, 798.2965698242188, -411.2840270996094, 29.596342086791992, 514.5596923828125, 32.89869689941406, 140.569091796875, 287.7557678222656, 1372.8922119140625, 598.480712890625, -43.00492858886719, 747.4580688476562, 1416.94140625, -39.62952423095703, 739.2802124023438, -459.48626708984375, 921.6425170898438, 896.5335083007812, -734.1600952148438, 792.148681640625, 824.4873046875, -57.88111877441406, 117.80223846435547, 62.73822784423828, 876.2421875, -663.53369140625, 157.82369995117188, 981.4095458984375, -1080.2764892578125, -131.1619415283203, 65.43711853027344, 117.57471466064453, 686.9151000976562, 54.598365783691406, -96.80902862548828, 590.8161010742188, 79.45098114013672, 106.9273681640625, 265.92681884765625, 1308.6912841796875, 893.2111206054688, 849.505615234375, 199.69332885742188, 617.7105712890625, 486.8327331542969, 674.0836791992188, 349.70098876953125, 26.79781150817871, 590.43701171875, 90.13624572753906, 1263.950439453125, -305.3253479003906, 34.98756408691406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000500.npy"} +{"epoch": 0.7342143906020558, "step": 501, "batch_size": 64, "mean": 379.6474304199219, "std": 600.6553955078125, "min": -1086.092529296875, "p10": -306.8445602416991, "median": 357.8916931152344, "p90": 1187.9832763671877, "max": 1945.726806640625, "pos_frac": 0.734375, "sample": [-99.51273345947266, 416.55029296875, 351.748046875, -345.5299987792969, -40.068912506103516, 1340.6195068359375, 246.25071716308594, 1026.445556640625, 276.9337158203125, -9.493804931640625, 181.92190551757812, -183.3497772216797, 325.279296875, 218.7650909423828, 1945.726806640625, 721.2390747070312, 1049.2073974609375, 430.3120422363281, 434.6856384277344, 975.8134765625, 1210.370849609375, -69.60429382324219, 314.75616455078125, 139.18038940429688, 875.1604614257812, -526.825439453125, -723.9758911132812, 383.53729248046875, 1130.5504150390625, 571.1578979492188, -793.46484375, 243.602783203125, 495.949462890625, 458.4597473144531, 453.2442626953125, 1398.6400146484375, -210.52903747558594, 572.4803466796875, 352.1810607910156, 919.9841918945312, 1284.0621337890625, 12.748954772949219, -146.6732635498047, -378.8871765136719, 680.1344604492188, 30.17535400390625, 29.41421890258789, 637.30126953125, 100.22085571289062, -1086.092529296875, 363.6023254394531, 628.5426025390625, -216.5785369873047, -119.63119506835938, 83.2274169921875, -559.0242919921875, 1135.74560546875, 800.1490478515625, 1331.8310546875, 438.3727111816406, 1780.6373291015625, 572.6959228515625, 459.2879638671875, -22.22616195678711], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000501.npy"} +{"epoch": 0.73568281938326, "step": 502, "batch_size": 64, "mean": 373.10076904296875, "std": 475.6739807128906, "min": -624.6331176757812, "p10": -65.33530883789062, "median": 329.93121337890625, "p90": 994.2436401367188, "max": 2353.671630859375, "pos_frac": 0.8125, "sample": [658.0737915039062, -327.07891845703125, 905.7958374023438, 427.6104431152344, 58.8555908203125, 671.1065063476562, -174.63424682617188, 7.06982421875, 1256.9276123046875, 364.04180908203125, 395.5667724609375, 216.58029174804688, 363.2724609375, 1094.088134765625, 937.6455078125, 10.4130859375, 23.59429931640625, 234.5985565185547, -624.6331176757812, 95.7996826171875, 655.4929809570312, 1095.068603515625, 121.84178924560547, 406.3633728027344, 35.15495300292969, 201.82749938964844, 36.59535217285156, 345.9316711425781, 467.466796875, 332.8201904296875, -355.88385009765625, 1140.0167236328125, -7.292366027832031, 523.6337890625, 487.3739013671875, 1001.599609375, -163.2270965576172, 2353.671630859375, 59.36125183105469, -7.768354415893555, 758.369384765625, 17.796981811523438, 269.95367431640625, 348.64105224609375, 387.36370849609375, 557.8344116210938, 1214.907958984375, -3.974721908569336, 700.771728515625, 505.85174560546875, 787.0406494140625, -33.63063049316406, -57.09929656982422, 117.14584350585938, 977.0797119140625, -95.44784545898438, 246.03883361816406, 77.72750854492188, 709.142333984375, 308.020263671875, -68.86502838134766, 327.042236328125, 168.82850646972656, 333.1689453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000502.npy"} +{"epoch": 0.737151248164464, "step": 503, "batch_size": 64, "mean": 345.94140625, "std": 473.9827880859375, "min": -353.71484375, "p10": -148.71579437255858, "median": 264.2348175048828, "p90": 854.0924499511721, "max": 2147.0087890625, "pos_frac": 0.765625, "sample": [1.7654228210449219, -72.6202621459961, 1160.030517578125, 55.97441101074219, -153.7719268798828, -168.1676025390625, 517.2630615234375, 284.3988342285156, 87.72260284423828, -81.93099212646484, -353.71484375, 334.17498779296875, -208.17807006835938, 256.2122802734375, 1001.0369262695312, 806.7694091796875, 349.201171875, 702.9339599609375, 91.5758056640625, 800.7261962890625, -41.18122863769531, 13.495922088623047, 44.07975769042969, 63.48371124267578, 383.03802490234375, -61.04380798339844, -136.91815185546875, 588.7252197265625, 192.94131469726562, 143.057373046875, 517.60498046875, -18.03759765625, 113.09178161621094, 708.2406616210938, 18.265708923339844, -295.42047119140625, 2147.0087890625, 1849.8677978515625, 583.5289306640625, 384.98284912109375, 120.56683349609375, 697.2776489257812, 764.1688842773438, 1.286977767944336, -82.94085693359375, 632.5270385742188, 56.517738342285156, 818.2904663085938, 225.87509155273438, 394.4796447753906, 174.08566284179688, 616.95556640625, 494.02685546875, 277.6510009765625, 1000.8320922851562, 549.96240234375, -186.54830932617188, 763.5596923828125, -240.68331909179688, 869.4361572265625, 272.2573547363281, 432.32147216796875, 961.9227905273438, -83.79122924804688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000503.npy"} +{"epoch": 0.7386196769456681, "step": 504, "batch_size": 64, "mean": 423.2089538574219, "std": 577.7997436523438, "min": -705.88818359375, "p10": -331.68663940429684, "median": 363.2411193847656, "p90": 1096.4610839843751, "max": 1770.5911865234375, "pos_frac": 0.78125, "sample": [52.025604248046875, 137.30157470703125, 889.7483520507812, 398.774658203125, 134.04457092285156, 353.6415710449219, 722.3212280273438, -261.8118591308594, 1770.5911865234375, 1417.76513671875, 1749.4488525390625, -650.9148559570312, 600.195068359375, 751.9707641601562, -46.42103576660156, -69.21882629394531, 673.991455078125, 209.11309814453125, 9.167594909667969, -26.678314208984375, 39.17925262451172, -37.139808654785156, 395.12432861328125, -315.17626953125, 233.5792999267578, 255.6664581298828, -525.5301513671875, 568.3243408203125, 220.87374877929688, 496.28155517578125, -454.9219665527344, 1099.338623046875, -457.3909606933594, 1410.50634765625, 850.143310546875, 712.8423461914062, 840.1517333984375, 580.3191528320312, 223.12124633789062, 356.5740966796875, 101.70745849609375, 1291.2388916015625, 906.7727661132812, 511.4492492675781, 981.3488159179688, -159.50003051757812, -504.8214416503906, 846.3082885742188, 55.44044494628906, 1041.6875, 189.78866577148438, 787.3369750976562, 1089.746826171875, 732.524658203125, 342.2662353515625, 369.90814208984375, -338.76251220703125, 980.4727172851562, 1680.5389404296875, 324.2940673828125, -705.88818359375, 543.9487915039062, 694.7543334960938, 15.8909912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000504.npy"} +{"epoch": 0.7400881057268722, "step": 505, "batch_size": 64, "mean": 226.26625061035156, "std": 454.06243896484375, "min": -834.6461181640625, "p10": -229.44584045410156, "median": 161.66693878173828, "p90": 881.632489013672, "max": 1388.9892578125, "pos_frac": 0.6875, "sample": [10.809379577636719, 517.3289184570312, -221.3739013671875, 625.2424926757812, -51.49674987792969, 878.6336059570312, 196.22743225097656, -31.982650756835938, 36.776790618896484, 13.875289916992188, 812.9345703125, 882.917724609375, 1388.9892578125, -16.301437377929688, 604.251708984375, 494.22039794921875, -161.76890563964844, 352.50933837890625, 134.71783447265625, -218.5295867919922, 280.25738525390625, 687.416259765625, 174.1536865234375, 81.58786010742188, -434.78509521484375, -664.6210327148438, 149.18019104003906, 293.71270751953125, 959.5315551757812, 245.22576904296875, -154.4510040283203, 408.5242919921875, 369.890380859375, -232.90524291992188, 217.41549682617188, 675.438720703125, 129.03836059570312, 703.6083984375, -71.14766693115234, 507.06683349609375, -678.4641723632812, -834.6461181640625, 143.8813934326172, 260.30401611328125, 13.443267822265625, 126.87999725341797, 1010.4337768554688, 578.0438842773438, -218.3300323486328, 916.094482421875, 148.9939422607422, -180.8720703125, -398.59893798828125, 933.9468994140625, 207.66709899902344, -99.18370056152344, 860.5416259765625, -393.7850341796875, -207.27294921875, 79.91529846191406, 928.9445190429688, 187.64987182617188, 546.638916015625, -23.304180145263672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000505.npy"} +{"epoch": 0.7415565345080763, "step": 506, "batch_size": 64, "mean": 368.8091125488281, "std": 524.0787353515625, "min": -786.2079467773438, "p10": -189.96503753662105, "median": 335.1687927246094, "p90": 1068.4536865234375, "max": 1987.586181640625, "pos_frac": 0.75, "sample": [-452.72650146484375, 31.955322265625, 731.9141235351562, 5.1784515380859375, 268.0343933105469, 289.5367431640625, -205.8428955078125, 541.2155151367188, 850.3590087890625, 66.6761474609375, 499.8001403808594, 300.10723876953125, 288.29473876953125, 219.8424835205078, -703.184814453125, 1069.0076904296875, 446.0708923339844, -29.236770629882812, 317.7701721191406, 645.7093505859375, 448.60736083984375, 1356.4163818359375, 394.23248291015625, 89.73046112060547, 1987.586181640625, -152.9167022705078, -285.09588623046875, 519.3606567382812, 682.8806762695312, 265.4422607421875, 395.72076416015625, 1216.353271484375, 400.6626281738281, 104.63032531738281, 540.5072631835938, -768.8469848632812, 352.5674133300781, 167.260986328125, 1209.65234375, 943.6727294921875, 558.8285522460938, 676.4631958007812, -119.30009460449219, 488.92919921875, 64.2028579711914, -786.2079467773438, 302.0356140136719, -20.808876037597656, 1153.7923583984375, 979.7244873046875, 1067.1610107421875, -3.374235153198242, 359.39697265625, 767.90380859375, 766.3334350585938, 1108.60107421875, -10.892837524414062, 1045.7772216796875, 185.59861755371094, -22.97510528564453, -330.87994384765625, -143.81301879882812, 519.66552734375, -51.28443145751953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000506.npy"} +{"epoch": 0.7430249632892805, "step": 507, "batch_size": 64, "mean": 325.80413818359375, "std": 627.0028686523438, "min": -1124.8377685546875, "p10": -337.7032073974609, "median": 287.6995849609375, "p90": 931.2552978515628, "max": 2491.114013671875, "pos_frac": 0.671875, "sample": [-5.039438247680664, 478.052978515625, -72.06251525878906, -1124.8377685546875, 1200.95849609375, -106.68487548828125, 27.217941284179688, 412.19378662109375, -6.985801696777344, 133.74835205078125, -73.50495910644531, 398.6456298828125, 44.98670959472656, 423.31195068359375, 682.465087890625, 800.8906860351562, -449.5360107421875, 300.99029541015625, 138.5888214111328, 2132.096923828125, 584.5521850585938, 725.1469116210938, 32.77244186401367, -184.76315307617188, 424.947998046875, 327.1910400390625, 693.1810302734375, 874.4924926757812, -357.8561706542969, 335.85858154296875, 292.28057861328125, -63.278289794921875, -290.67962646484375, -30.52311897277832, -5.171655654907227, 607.549560546875, 533.9776611328125, 1488.5506591796875, 635.5322875976562, 61.14354705810547, 283.11859130859375, 18.174362182617188, -62.4373779296875, 421.1661682128906, 2491.114013671875, 955.5822143554688, 165.20654296875, 297.09613037109375, -474.6080627441406, 767.8577270507812, 823.1982421875, -479.46881103515625, -44.75395202636719, 728.69580078125, 2035.2216796875, 640.6556396484375, 984.6807861328125, 446.67364501953125, 214.07110595703125, -42.42351531982422, 30.369529724121094, -134.88021850585938, -425.8010559082031, -807.44677734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000507.npy"} +{"epoch": 0.7444933920704846, "step": 508, "batch_size": 64, "mean": 281.86297607421875, "std": 639.6566772460938, "min": -1552.25146484375, "p10": -380.4529266357422, "median": 253.72866821289062, "p90": 953.8649414062504, "max": 2220.959228515625, "pos_frac": 0.6875, "sample": [210.08596801757812, 161.43927001953125, -712.0643920898438, 708.0481567382812, 592.4080810546875, -89.2998046875, 94.38143920898438, 2013.344482421875, 427.7407531738281, 713.7962036132812, 376.1997985839844, 1098.6156005859375, 274.0516357421875, 2220.959228515625, 576.783447265625, 289.39031982421875, 172.00796508789062, -690.2109375, 1289.5386962890625, 845.248046875, -577.1375732421875, -281.168701171875, 460.4381103515625, -1026.98876953125, 1000.4150390625, -259.9439392089844, 67.0869140625, -166.58267211914062, -110.12034606933594, 394.45147705078125, -113.52436828613281, 337.51873779296875, 1.4085845947265625, -30.049152374267578, 1819.0731201171875, -1552.25146484375, 133.22808837890625, 136.89944458007812, 189.98883056640625, 813.2882080078125, 371.8183288574219, 233.40570068359375, 110.5667724609375, 497.6527099609375, 647.544189453125, 293.1419677734375, 590.072265625, 673.8638916015625, 766.3435668945312, 583.1435546875, 631.5062866210938, -209.09292602539062, -244.7992706298828, 1046.373046875, 629.057373046875, -167.6778564453125, -113.53717041015625, -44.0262451171875, -490.39520263671875, 700.8184814453125, -384.486328125, 379.7720947265625, 100.7139663696289, -371.0416564941406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000508.npy"} +{"epoch": 0.7459618208516887, "step": 509, "batch_size": 64, "mean": 301.72552490234375, "std": 495.1992492675781, "min": -744.2820434570312, "p10": -320.6261688232422, "median": 258.55950927734375, "p90": 994.9968505859375, "max": 1869.89990234375, "pos_frac": 0.78125, "sample": [-38.100440979003906, -75.65234375, -635.169189453125, 903.8072509765625, -144.13275146484375, 521.5113525390625, 89.75074005126953, 56.51667022705078, 516.7925415039062, 290.81707763671875, 722.2650146484375, 49.80921173095703, 292.183837890625, 141.66128540039062, 223.1425323486328, 186.84361267089844, 995.1016235351562, -562.2169189453125, -471.50079345703125, 435.8829345703125, 1182.18994140625, 492.4694519042969, 149.28457641601562, 268.16339111328125, 410.234375, 780.5996704101562, 125.19497680664062, 141.63827514648438, 332.55328369140625, 420.7992248535156, 564.4307250976562, -348.21893310546875, 96.83238220214844, -271.3174743652344, 275.0775451660156, 994.7523803710938, 763.9358520507812, 60.51509094238281, 248.95562744140625, -322.7525634765625, -5.207794189453125, 525.7617797851562, 292.8984375, 191.229248046875, 47.02845764160156, -744.2820434570312, 1256.5919189453125, 1317.0689697265625, -135.83155822753906, 1869.89990234375, -315.6645812988281, 175.3350830078125, -409.4713439941406, 94.33377075195312, 289.8435363769531, 363.82696533203125, 149.28778076171875, 231.05673217773438, 286.6505126953125, 516.5036010742188, 799.6333618164062, 1168.661376953125, 428.09307861328125, 1052.5362548828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000509.npy"} +{"epoch": 0.7474302496328928, "step": 510, "batch_size": 64, "mean": 396.84149169921875, "std": 447.57135009765625, "min": -611.979736328125, "p10": -207.35707473754874, "median": 375.81996154785156, "p90": 924.1010070800783, "max": 1380.1953125, "pos_frac": 0.796875, "sample": [220.79202270507812, 867.552490234375, 1249.6097412109375, 444.2912292480469, -245.26852416992188, 943.1702880859375, 769.5164184570312, 342.55841064453125, 125.43571472167969, 735.8069458007812, 400.3366394042969, 534.8623657226562, -287.9905090332031, 541.5450439453125, 1199.13720703125, 834.0509643554688, 717.9346923828125, -611.979736328125, -37.29625701904297, 879.6060180664062, 275.4454650878906, 351.30328369140625, 538.4091796875, 155.60653686523438, 314.6387634277344, 716.3570556640625, 581.8145141601562, 971.3738403320312, 1160.034912109375, -330.49249267578125, 857.1112060546875, 1380.1953125, -100.04776000976562, 146.42718505859375, 821.7289428710938, 105.30960845947266, 315.0111389160156, 1184.8492431640625, -347.11944580078125, 264.1129150390625, 277.4000244140625, 44.011573791503906, 700.8143310546875, 274.62530517578125, 638.9886474609375, -91.30167388916016, 494.6479187011719, 483.5649108886719, 50.97465515136719, 418.24310302734375, 759.19384765625, -118.89702606201172, 134.47076416015625, 752.1670532226562, -80.78675842285156, 509.7745361328125, -331.6501770019531, 851.4344482421875, -37.17961883544922, -279.77154541015625, 4.601358413696289, 675.4942016601562, 238.05380249023438, 43.242610931396484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000510.npy"} +{"epoch": 0.748898678414097, "step": 511, "batch_size": 64, "mean": 311.1535339355469, "std": 587.685791015625, "min": -1495.482666015625, "p10": -166.70490417480468, "median": 251.25189208984375, "p90": 1051.4988952636725, "max": 1816.775390625, "pos_frac": 0.75, "sample": [-165.95404052734375, 1305.0916748046875, 219.00546264648438, 151.8013458251953, 359.766845703125, 119.11265563964844, 1116.04443359375, 1437.050048828125, 542.6661987304688, 127.8447036743164, 35.92847442626953, 693.2382202148438, 698.95703125, 662.49560546875, 162.7318878173828, 1210.498291015625, -1495.482666015625, 365.54168701171875, -487.1387939453125, 676.5322875976562, 1542.8978271484375, 771.0647583007812, 101.81788635253906, 505.953369140625, 258.8212890625, 3.6411819458007812, -723.0719604492188, 390.78466796875, -36.111175537109375, 640.5905151367188, 449.4501953125, -4.8118133544921875, -22.50230598449707, 299.6217041015625, 313.35546875, 65.0627670288086, 900.8926391601562, -167.02670288085938, -9.651123046875, 258.64422607421875, 866.5152587890625, 181.94784545898438, 22.657184600830078, -303.7673034667969, -90.7291488647461, 703.5202026367188, 706.3017578125, 43.10603332519531, -28.283689498901367, 20.671646118164062, -229.83767700195312, 477.5188903808594, 534.4369506835938, -9.964212417602539, 243.85955810546875, 482.43817138671875, 260.7454528808594, -15.094383239746094, 1816.775390625, 86.45466613769531, 475.5499572753906, -1411.0283203125, 158.36538696289062, 1646.51318359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000511.npy"} +{"epoch": 0.750367107195301, "step": 512, "batch_size": 64, "mean": 434.24078369140625, "std": 456.37841796875, "min": -451.13360595703125, "p10": -97.4338516235351, "median": 379.37391662597656, "p90": 1061.9115234375004, "max": 2019.456787109375, "pos_frac": 0.859375, "sample": [1117.09619140625, 962.124267578125, 796.9238891601562, -307.0640869140625, -141.2942352294922, 24.075836181640625, 2019.456787109375, 260.31341552734375, 58.308074951171875, -42.561012268066406, 163.51524353027344, 742.0032958984375, 3.782135009765625, 457.0078125, -258.4102478027344, 44.65395736694336, 559.906494140625, 535.4546508789062, 403.667236328125, 10.229803085327148, -172.01937866210938, 785.746337890625, 101.01736450195312, 562.182861328125, 1214.2359619140625, 798.5327758789062, 243.59141540527344, 420.1184387207031, 54.84838104248047, 765.9462890625, 351.43682861328125, 297.4378662109375, 537.7706298828125, 489.26177978515625, 219.6985626220703, 164.4779510498047, 1104.677490234375, 301.4036560058594, -120.9507827758789, 925.0361328125, 128.71762084960938, 1186.36669921875, 416.6243591308594, 1105.6883544921875, 32.607513427734375, -451.13360595703125, 344.01080322265625, 61.701908111572266, 275.26312255859375, 399.30133056640625, 1171.5693359375, -1.2023086547851562, 297.0650939941406, 777.1807861328125, 360.2543029785156, 664.548095703125, 738.537841796875, 259.7765808105469, 398.4935302734375, 877.792236328125, -251.8625030517578, 935.126220703125, 820.6115112304688, 790.73193359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000512.npy"} +{"epoch": 0.7518355359765051, "step": 513, "batch_size": 64, "mean": 306.30194091796875, "std": 497.4742736816406, "min": -717.2280883789062, "p10": -180.91434326171876, "median": 228.79302215576172, "p90": 1109.7954223632817, "max": 1435.70263671875, "pos_frac": 0.765625, "sample": [723.8206787109375, 353.5076904296875, 308.888916015625, 153.18637084960938, 286.703369140625, 198.0361785888672, 335.38519287109375, 181.2303009033203, -155.26597595214844, 1009.8289794921875, 681.4979858398438, -174.7244873046875, 738.908447265625, 57.400596618652344, 82.0948715209961, 762.1414184570312, 31.821073532104492, 58.7269287109375, 192.1135711669922, 363.812744140625, 961.7178955078125, 1171.357421875, 1312.5379638671875, 318.7252197265625, 575.4410400390625, 7.855136871337891, 259.54986572265625, 182.73822021484375, 342.77386474609375, -183.567138671875, 127.49649047851562, 909.5437622070312, 515.3077392578125, 459.94921875, 322.17669677734375, -57.29620361328125, -91.55094909667969, -650.2671508789062, 276.0416259765625, 1435.70263671875, -115.49809265136719, 141.40740966796875, 51.8946533203125, -717.2280883789062, 584.3521118164062, 6.19268798828125, -535.4222412109375, -53.00330352783203, 1152.63818359375, 869.718505859375, -588.957275390625, 4.265205383300781, 1158.4244384765625, 1307.6678466796875, 440.1044921875, 179.24313354492188, -393.8203125, 661.4276123046875, -120.9603042602539, -147.72532653808594, -369.667236328125, 1211.44140625, 163.931884765625, 327.5494689941406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000513.npy"} +{"epoch": 0.7533039647577092, "step": 514, "batch_size": 64, "mean": 301.52301025390625, "std": 524.6412353515625, "min": -857.6136474609375, "p10": -223.9020263671875, "median": 204.31349182128906, "p90": 974.5205993652347, "max": 1984.45849609375, "pos_frac": 0.703125, "sample": [-522.1104125976562, -151.243408203125, 125.37037658691406, -92.50798034667969, 1119.808349609375, 144.75192260742188, 608.99169921875, 660.7630004882812, 1906.489990234375, 847.8512573242188, 166.4123992919922, 597.5360717773438, -150.900390625, 213.34991455078125, 306.87518310546875, -112.57188415527344, -281.1817932128906, 198.427734375, 1172.35693359375, 1084.838623046875, -224.1636199951172, -102.22657012939453, 288.7284240722656, -107.55086517333984, 36.140907287597656, 310.8824157714844, 401.407958984375, 137.52841186523438, 507.8663330078125, -156.2801513671875, 835.2622680664062, 556.4091186523438, -138.29473876953125, -64.75390625, 153.83639526367188, 379.718505859375, 1232.8114013671875, -174.52154541015625, 102.87084197998047, -260.8903503417969, 383.44573974609375, -422.6546630859375, -203.17425537109375, 530.477783203125, 219.20828247070312, 122.26194763183594, -857.6136474609375, -223.29164123535156, -342.2235107421875, 880.85693359375, 210.19924926757812, 498.87176513671875, 462.48095703125, 642.1630859375, 1014.6621704101562, 153.13748168945312, 302.1121826171875, 690.0405883789062, 712.771728515625, 114.30314636230469, 706.0552368164062, 26.695484161376953, 134.13992309570312, 1984.45849609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000514.npy"} +{"epoch": 0.7547723935389133, "step": 515, "batch_size": 64, "mean": 231.98915100097656, "std": 441.6803894042969, "min": -1571.38818359375, "p10": -235.5767852783203, "median": 223.2297821044922, "p90": 731.3203308105473, "max": 1356.677001953125, "pos_frac": 0.796875, "sample": [-63.49677658081055, -120.23316955566406, 483.9380187988281, 768.725830078125, 550.5039672851562, -1571.38818359375, 481.2971496582031, 444.07501220703125, 153.5802001953125, -490.3074951171875, -306.60784912109375, 9.861373901367188, 12.334869384765625, 184.5662841796875, 278.0335388183594, 323.24969482421875, 188.8906707763672, 614.6982421875, -353.45458984375, 43.57745361328125, 366.8691711425781, -123.90702819824219, 138.5663299560547, 345.94281005859375, 218.73411560058594, 227.72544860839844, 628.7711791992188, -180.242919921875, 566.3096313476562, 67.9645767211914, 817.826416015625, 29.79001808166504, -232.96197509765625, 597.2900390625, 275.60107421875, 356.5865478515625, 644.0408325195312, 11.769989013671875, 1237.4879150390625, -236.69741821289062, 158.13827514648438, 965.3496704101562, -366.01953125, 11.450965881347656, -376.3232727050781, 207.84072875976562, 106.63687896728516, 812.4204711914062, 0.12318801879882812, 110.76483154296875, 15.789695739746094, 1073.3974609375, 256.8753967285156, 136.556396484375, 265.3816833496094, 566.396240234375, 441.2460632324219, -129.43417358398438, 355.09759521484375, 392.9769287109375, 362.9834289550781, 263.7255554199219, 1356.677001953125, 469.94268798828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000515.npy"} +{"epoch": 0.7562408223201175, "step": 516, "batch_size": 64, "mean": 335.4332275390625, "std": 418.75091552734375, "min": -721.6815185546875, "p10": -212.33054809570308, "median": 357.7476501464844, "p90": 880.9788208007815, "max": 1146.54052734375, "pos_frac": 0.78125, "sample": [233.85232543945312, 238.38296508789062, 718.700927734375, -119.48767852783203, 578.2188720703125, 648.5484008789062, 459.4759521484375, -125.58985900878906, 221.5943145751953, 429.0904541015625, 1146.54052734375, 485.03912353515625, 509.2816162109375, 525.3693237304688, 200.5188446044922, -158.61563110351562, 663.3034057617188, -68.91505432128906, 88.87567138671875, 1102.9813232421875, -721.6815185546875, 99.95901489257812, -235.35122680664062, -494.44049072265625, 1004.5218505859375, 438.13037109375, -535.3544311523438, 813.5180053710938, 1104.090087890625, 166.09837341308594, 367.3932800292969, 815.6887817382812, 1001.4387817382812, 282.65679931640625, 559.1362915039062, 609.2845458984375, 262.833740234375, 53.84743881225586, 477.72833251953125, 323.106689453125, 156.0686492919922, -31.830080032348633, 908.9602661132812, 321.2622375488281, 18.912601470947266, 1091.1083984375, 186.76783752441406, 140.77734375, -22.432228088378906, 443.5874328613281, 348.1020202636719, 523.6135864257812, 88.2637939453125, 589.305419921875, 675.6096801757812, -252.87826538085938, -260.112548828125, -374.1029052734375, 527.9335327148438, 632.951416015625, 497.60235595703125, 778.375, 368.53582763671875, -58.42683792114258], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000516.npy"} +{"epoch": 0.7577092511013216, "step": 517, "batch_size": 64, "mean": 412.10162353515625, "std": 555.355712890625, "min": -595.7506713867188, "p10": -184.89706115722655, "median": 371.75999450683594, "p90": 922.3923461914062, "max": 2517.56884765625, "pos_frac": 0.765625, "sample": [636.0557250976562, 749.4773559570312, 45.85028076171875, 385.8820495605469, 833.6376342773438, 186.31704711914062, -325.65264892578125, 888.4232177734375, 532.2913208007812, -281.6479797363281, 474.4938049316406, 1993.5194091796875, 1358.758056640625, 402.81597900390625, 603.450439453125, -138.94613647460938, 246.96969604492188, 531.7872314453125, 101.82946014404297, 680.6410522460938, 1511.59326171875, 619.180908203125, -24.55281639099121, 301.3807067871094, -595.7506713867188, -275.759765625, 118.82782745361328, 444.2981262207031, -354.1282653808594, 2517.56884765625, 347.4473876953125, -57.86277770996094, -170.90577697753906, 919.8670654296875, 131.1295166015625, 575.9586791992188, -498.589599609375, 462.2666931152344, -35.6995849609375, 369.71734619140625, 756.201416015625, -102.38008880615234, 308.7360534667969, 247.37399291992188, 598.3108520507812, 558.9525756835938, 764.4127197265625, 386.7106018066406, 33.03015899658203, 1420.6434326171875, 127.5191421508789, 1188.6290283203125, 373.8026428222656, 923.474609375, 179.42822265625, 709.9483642578125, -190.89332580566406, 213.01583862304688, 279.6993713378906, 487.3873291015625, -118.17828369140625, 823.3192138671875, 257.1324768066406, -63.713783264160156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000517.npy"} +{"epoch": 0.7591776798825257, "step": 518, "batch_size": 64, "mean": 339.1503601074219, "std": 431.4859924316406, "min": -826.397216796875, "p10": -186.44239807128903, "median": 312.0154724121094, "p90": 916.993621826172, "max": 1262.85986328125, "pos_frac": 0.796875, "sample": [117.299072265625, 1136.068115234375, 529.4142456054688, 369.73443603515625, 200.11082458496094, 400.321533203125, 435.32293701171875, 546.865234375, 72.02632141113281, 238.93185424804688, 1002.7764282226562, 649.375732421875, 80.2562255859375, -154.55039978027344, 478.89483642578125, 218.11280822753906, 425.1227722167969, 176.4381103515625, 127.60272216796875, 568.7266235351562, 930.2774658203125, -29.750221252441406, -285.0699768066406, 153.78408813476562, -31.960636138916016, 327.4465637207031, 826.8916625976562, 67.11480712890625, 722.7823486328125, 863.4212036132812, 848.3618774414062, 49.90187072753906, 1262.85986328125, 1193.939453125, 827.8616943359375, 715.6619873046875, 153.27239990234375, -106.66946411132812, 411.5249328613281, 624.2939453125, 309.02569580078125, -100.61978149414062, 552.6481323242188, -259.8567810058594, 426.85577392578125, -395.107177734375, -402.17315673828125, -826.397216796875, 381.4478454589844, 499.4953308105469, 17.666976928710938, 253.99093627929688, -200.1103973388672, 198.03189086914062, -393.7001953125, 1089.3521728515625, 1022.478271484375, 298.1576232910156, -48.06235122680664, 885.9979858398438, 315.0052490234375, 139.64735412597656, 554.0623779296875, 242.99000549316406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000518.npy"} +{"epoch": 0.7606461086637298, "step": 519, "batch_size": 64, "mean": 235.43357849121094, "std": 457.8655700683594, "min": -988.13427734375, "p10": -309.4398651123047, "median": 245.02144622802734, "p90": 857.5864074707032, "max": 1264.4114990234375, "pos_frac": 0.6875, "sample": [-5.490447998046875, 558.1163940429688, 367.4563293457031, -508.0257873535156, 944.3120727539062, -301.288330078125, 59.07951354980469, 527.5269775390625, 1010.6661987304688, 599.6802368164062, 127.93232727050781, 586.6347045898438, -222.47076416015625, 925.0355834960938, 383.4254455566406, -918.4188232421875, 1202.98828125, 534.375244140625, 23.18645477294922, -133.66993713378906, 287.9588623046875, -98.65742492675781, 47.45008850097656, 373.8731994628906, 533.3695678710938, 162.91000366210938, 280.59429931640625, 641.1262817382812, 532.1417846679688, 64.6539535522461, -30.61529541015625, 681.7816162109375, -206.78073120117188, -84.5933837890625, 372.5337219238281, 79.75116729736328, 509.08935546875, -33.367530822753906, -375.3443603515625, -31.573989868164062, 439.64971923828125, -312.9333801269531, 981.4496459960938, 411.88287353515625, -988.13427734375, 557.1973266601562, 1264.4114990234375, 32.86798858642578, -51.66143798828125, 18.335617065429688, 166.2549285888672, 867.2186889648438, -377.5218200683594, 612.1115112304688, -272.72650146484375, 258.52484130859375, 835.111083984375, 18.008953094482422, 359.9089050292969, 516.1257934570312, 231.51805114746094, -50.9716911315918, 439.3941345214844, -355.6260986328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000519.npy"} +{"epoch": 0.762114537444934, "step": 520, "batch_size": 64, "mean": 316.26123046875, "std": 486.62811279296875, "min": -1019.6341552734375, "p10": -191.46908416748045, "median": 230.03292083740234, "p90": 936.8826049804688, "max": 2142.03857421875, "pos_frac": 0.75, "sample": [-118.70401763916016, -267.2269287109375, 1035.2027587890625, 154.20677185058594, 540.2667846679688, 182.82659912109375, 356.3585510253906, -73.53219604492188, -237.7022705078125, 150.80670166015625, 39.84767150878906, -57.623809814453125, 646.2186279296875, -83.79023742675781, -85.0959243774414, 605.692626953125, 1314.0821533203125, 2142.03857421875, -202.22836303710938, 365.9839782714844, 1087.71142578125, 1294.439697265625, -386.2972106933594, 217.6881103515625, -44.593902587890625, 765.3853759765625, 122.76570892333984, 574.7789306640625, 528.371826171875, 256.91778564453125, 249.02899169921875, 242.3777313232422, 1214.9031982421875, 150.7689971923828, 63.837188720703125, 497.91644287109375, 565.1156616210938, 435.5307312011719, 771.1571044921875, -1019.6341552734375, 534.7296142578125, 151.03018188476562, 184.2771759033203, 364.48553466796875, 62.36541748046875, 924.2904663085938, 123.46996307373047, 773.1827392578125, 453.549560546875, 142.7655029296875, 184.52781677246094, -202.04019165039062, -179.56967163085938, 262.29498291015625, 402.2956237792969, 578.316162109375, -108.717529296875, 942.2792358398438, -196.56883239746094, 528.9802856445312, 304.32391357421875, -152.1530303955078, 38.931976318359375, 127.87396240234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000520.npy"} +{"epoch": 0.7635829662261381, "step": 521, "batch_size": 64, "mean": 342.0847473144531, "std": 460.04351806640625, "min": -789.15478515625, "p10": -123.61929397583008, "median": 255.62297821044922, "p90": 1011.131658935547, "max": 1622.242431640625, "pos_frac": 0.75, "sample": [404.58868408203125, 799.79296875, 266.74761962890625, 26.07103729248047, -27.52518653869629, -789.15478515625, -175.5614013671875, -19.39215087890625, -125.99850463867188, 539.265625, 50.02099609375, -178.35955810546875, 23.99508285522461, 481.88421630859375, 510.2540588378906, 1208.3995361328125, 95.28955078125, 57.92654800415039, -280.5467529296875, 991.074462890625, 263.97308349609375, -23.274276733398438, -118.06780242919922, 247.2728729248047, 488.8154296875, 676.716552734375, 603.7448120117188, 141.756591796875, 72.91641998291016, -21.581298828125, 409.2919921875, 580.12939453125, -4.108314514160156, 1047.3192138671875, 163.99697875976562, 632.6514892578125, 606.845947265625, 229.62759399414062, 522.41162109375, -91.9860610961914, 239.947998046875, 321.92755126953125, 298.10284423828125, 70.96818542480469, 238.8763427734375, 1019.7276000976562, -136.67481994628906, 1110.186279296875, -7.152923583984375, -560.2310791015625, 571.31005859375, 431.1224365234375, 690.7200317382812, -2.4438018798828125, 794.2913818359375, 1077.0335693359375, 127.33891296386719, 1510.64404296875, 10.330314636230469, 1622.242431640625, 476.3919982910156, 81.41459655761719, 821.2618408203125, 798.8636474609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000521.npy"} +{"epoch": 0.7650513950073421, "step": 522, "batch_size": 64, "mean": 309.32373046875, "std": 491.3858337402344, "min": -771.6734619140625, "p10": -158.60409240722655, "median": 220.54083251953125, "p90": 1076.2465698242188, "max": 1369.217041015625, "pos_frac": 0.796875, "sample": [251.30906677246094, 743.9556884765625, 879.8294067382812, -14.374626159667969, 1093.36181640625, 100.99261474609375, 1369.217041015625, 232.77053833007812, 69.33601379394531, 5.283855438232422, -135.0046844482422, 406.1280822753906, 733.64306640625, 40.29460144042969, 480.8914794921875, -66.17935180664062, 318.8146057128906, -103.60398864746094, 462.7606201171875, 213.917236328125, 615.7263793945312, 640.5106811523438, 125.04895782470703, 308.0587158203125, -617.507080078125, 1119.73095703125, 343.87738037109375, 162.69155883789062, 540.9006958007812, 581.7362670898438, 342.9686279296875, 191.1348114013672, 138.5188751220703, 49.647705078125, 154.07708740234375, -771.6734619140625, 600.2042846679688, 111.95800018310547, 212.546142578125, -363.556884765625, -168.71812438964844, 417.609375, 1061.0924072265625, 96.1332778930664, -426.99462890625, 504.1226806640625, 859.391845703125, 237.54791259765625, -768.670166015625, 1266.3614501953125, 1082.7412109375, -19.818756103515625, 1331.4410400390625, 81.53070831298828, 160.81646728515625, -613.3622436523438, 12.871437072753906, -121.96163177490234, 591.227783203125, 227.1644287109375, 1011.3783569335938, 91.55789184570312, 1218.715576171875, 94.59684753417969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000522.npy"} +{"epoch": 0.7665198237885462, "step": 523, "batch_size": 64, "mean": 330.5179443359375, "std": 560.617431640625, "min": -985.0519409179688, "p10": -336.9925231933593, "median": 314.3672637939453, "p90": 1074.7602294921878, "max": 1756.2271728515625, "pos_frac": 0.6875, "sample": [-190.3776092529297, 1210.4444580078125, 130.7215118408203, 342.12957763671875, 391.63311767578125, -350.24945068359375, 425.53216552734375, 598.499755859375, 994.0146484375, 382.3955078125, -754.6654663085938, 761.6099853515625, -4.423826217651367, -43.566619873046875, -434.1619567871094, 116.70274353027344, 233.98681640625, 798.5748291015625, -526.56884765625, 202.1478271484375, 485.0311279296875, 921.9666748046875, -138.04498291015625, 29.318864822387695, 528.1423950195312, 112.95341491699219, 262.0371398925781, 97.13839721679688, -37.73078155517578, 1563.8389892578125, 540.7808227539062, 127.14558410644531, -116.00173950195312, -985.0519409179688, 1153.5150146484375, 822.7196655273438, -490.3374938964844, -57.09223937988281, 500.73260498046875, -102.12944030761719, 750.818115234375, -92.73681640625, 1756.2271728515625, 348.9589538574219, 726.5955810546875, -57.562835693359375, 341.2574157714844, 287.47711181640625, 1671.7506103515625, 427.3834228515625, 640.08837890625, 138.04690551757812, 550.8245239257812, 115.04332733154297, 762.8152465820312, 1184.4189453125, 844.4937744140625, -81.79620361328125, 640.2294921875, 1109.365478515625, 578.7569580078125, -650.6600341796875, -306.0596923828125, -35.898983001708984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000523.npy"} +{"epoch": 0.7679882525697503, "step": 524, "batch_size": 64, "mean": 345.46832275390625, "std": 514.7507934570312, "min": -1609.3436279296875, "p10": -87.75897903442383, "median": 424.17149353027344, "p90": 905.89140625, "max": 1656.380126953125, "pos_frac": 0.734375, "sample": [32.16096496582031, 956.909912109375, 405.73516845703125, 565.5655517578125, 484.9715270996094, 300.4378662109375, 336.39300537109375, 22.466773986816406, 586.3765258789062, 320.97369384765625, 457.4363098144531, 44.097381591796875, 480.6003723144531, 534.8238525390625, 724.770263671875, 393.9808044433594, -10.7764892578125, 915.2941284179688, 435.8193359375, 606.2221069335938, -1609.3436279296875, 489.502197265625, 535.9906616210938, 1656.380126953125, -4.9061431884765625, -51.49658203125, 788.1688842773438, 432.8133239746094, 478.5745849609375, -29.170902252197266, -7.310214996337891, 412.1790771484375, 552.840087890625, -236.22862243652344, 737.2115478515625, 246.42623901367188, 1252.048828125, 1488.3790283203125, 637.3856201171875, 493.2470703125, -60.71746063232422, -869.51806640625, 0.932159423828125, -69.98601531982422, 726.4758911132812, -87.60385131835938, 379.840087890625, 6.506675720214844, 456.903076171875, 472.4687194824219, 1106.63525390625, 1012.4357299804688, 578.8001098632812, -839.058349609375, -87.8254623413086, -11.632675170898438, 883.9517211914062, 597.953857421875, -47.121055603027344, 375.2866516113281, 600.9302978515625, 415.5296630859375, -195.10382080078125, -93.06060791015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000524.npy"} +{"epoch": 0.7694566813509545, "step": 525, "batch_size": 64, "mean": 441.43646240234375, "std": 546.4650268554688, "min": -812.2141723632812, "p10": -98.51968994140624, "median": 333.86631774902344, "p90": 1304.1084472656253, "max": 1634.548095703125, "pos_frac": 0.828125, "sample": [403.5028076171875, 1244.21435546875, 387.4947509765625, 156.60940551757812, 66.14714813232422, 470.75482177734375, 1551.7322998046875, 799.2320556640625, 398.4622497558594, -262.4564514160156, 1329.77734375, -439.35125732421875, 970.4617309570312, 247.34584045410156, 241.04190063476562, -247.90122985839844, 149.0398406982422, 281.4945983886719, 998.953125, 719.6549682617188, 575.3112182617188, 66.7669677734375, -78.99151611328125, 104.92861938476562, -158.90875244140625, 329.36846923828125, 573.1556396484375, 587.60205078125, 464.4515075683594, 1153.2091064453125, 1099.5928955078125, 11.344688415527344, 1491.0582275390625, 2.0375900268554688, 1634.548095703125, 342.69915771484375, 144.1065673828125, 214.02450561523438, 376.5653076171875, -89.42868041992188, 1627.045166015625, -812.2141723632812, -101.28985595703125, -9.138246536254883, 338.3641662597656, 288.64154052734375, 559.7923583984375, 133.26449584960938, 594.8170166015625, 168.67642211914062, 691.7243041992188, 1395.33544921875, -92.05596923828125, 21.31201171875, 779.1480102539062, 288.41058349609375, 313.5992736816406, 61.19579315185547, -478.8175048828125, 1026.0020751953125, 204.529052734375, 1572.608154296875, 402.6863708496094, 968.6463012695312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000525.npy"} +{"epoch": 0.7709251101321586, "step": 526, "batch_size": 64, "mean": 435.17706298828125, "std": 566.6396484375, "min": -1001.7420654296875, "p10": -207.5873580932617, "median": 496.70684814453125, "p90": 1238.09208984375, "max": 1602.8929443359375, "pos_frac": 0.765625, "sample": [1254.618408203125, 894.3623657226562, 73.18122863769531, 175.9891357421875, 339.27740478515625, 314.7445068359375, -174.55433654785156, 4.437433242797852, 546.0518798828125, -758.4757080078125, 571.9600219726562, 756.6817016601562, 744.51220703125, 253.28500366210938, 1358.06201171875, 1081.2120361328125, -131.13983154296875, 751.6234130859375, 311.6122131347656, 616.9177856445312, 344.7394714355469, 599.9947509765625, 271.9361877441406, 1057.1865234375, 534.538330078125, 512.3097534179688, 1602.8929443359375, 34.95210266113281, 371.3934326171875, -228.7050018310547, -214.5402069091797, 607.580078125, -452.6441345214844, 1321.572021484375, -174.86468505859375, 335.2105407714844, 1002.7852783203125, 883.6043090820312, 216.3837890625, -1001.7420654296875, 848.4780883789062, 607.006103515625, 522.96728515625, 1263.044189453125, -487.8603515625, 739.4434814453125, 769.8825073242188, 1119.654052734375, -71.01399993896484, 509.94580078125, -39.93878173828125, 221.9341583251953, 334.2382507324219, 1224.04833984375, 1036.02197265625, -191.36404418945312, 1244.11083984375, 671.4806518554688, 483.4678955078125, -27.689193725585938, -126.76842498779297, 49.12940216064453, -778.025634765625, 1320.197265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000526.npy"} +{"epoch": 0.7723935389133627, "step": 527, "batch_size": 64, "mean": 295.2120361328125, "std": 439.24688720703125, "min": -645.70947265625, "p10": -250.59631347656247, "median": 265.51319885253906, "p90": 804.2431762695313, "max": 1265.3848876953125, "pos_frac": 0.75, "sample": [-510.78424072265625, 555.091796875, 369.0017395019531, 389.4927062988281, 695.8265380859375, 153.20156860351562, 1265.3848876953125, -63.55870056152344, -262.5423889160156, 759.94482421875, -157.2846221923828, -222.72213745117188, 676.1456298828125, 294.5322570800781, -645.70947265625, -601.4216918945312, 525.4517211914062, -142.12423706054688, 706.8681030273438, 432.31402587890625, -286.04156494140625, 1041.237548828125, 106.49456787109375, 97.03729248046875, 163.32272338867188, 793.37548828125, -13.668960571289062, 123.27021026611328, 1038.0943603515625, 697.452880859375, 587.883544921875, 1069.546630859375, 445.55975341796875, 692.82177734375, 1172.4637451171875, 1066.8443603515625, 808.9007568359375, -21.86774253845215, 52.98075866699219, 274.2480163574219, 610.3076782226562, 165.80999755859375, 309.98052978515625, 247.2788848876953, -131.00106811523438, 64.46328735351562, 256.77838134765625, 336.0519714355469, 154.15087890625, -547.4170532226562, 290.91204833984375, 171.53579711914062, 610.339599609375, 230.0828857421875, -9.157196044921875, 75.38249206542969, 697.4440307617188, 648.2349853515625, 509.1297302246094, 357.68670654296875, 173.21719360351562, -206.79568481445312, 63.41942596435547, -311.32891845703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000527.npy"} +{"epoch": 0.7738619676945668, "step": 528, "batch_size": 64, "mean": 363.74908447265625, "std": 583.2496948242188, "min": -692.963134765625, "p10": -358.2716186523437, "median": 302.126953125, "p90": 1184.62763671875, "max": 1909.074462890625, "pos_frac": 0.71875, "sample": [-48.463356018066406, 1188.419677734375, 339.2878723144531, 331.4342956542969, 204.73382568359375, -428.63580322265625, -7.296882629394531, 709.6046142578125, 602.985107421875, 1277.67822265625, 439.66259765625, -126.39732360839844, 384.235595703125, -372.73248291015625, 145.6415252685547, 544.9058227539062, 316.3570556640625, -130.24392700195312, 1175.779541015625, 1909.074462890625, -6.419288635253906, 913.0875244140625, 469.02520751953125, 721.4736938476562, 456.1871643066406, 920.5894165039062, -624.8665161132812, 1383.6346435546875, -275.97119140625, 1080.4620361328125, -584.1618041992188, 287.8968505859375, 421.7347412109375, 244.75961303710938, 361.23388671875, 182.25221252441406, 528.8449096679688, 188.75648498535156, 491.89288330078125, 36.265541076660156, -109.3074722290039, 901.6594848632812, -163.00436401367188, -692.963134765625, 1505.008544921875, 221.01321411132812, 596.564208984375, 1722.6240234375, 187.7316436767578, 2.9127197265625, -585.0314331054688, -631.6929931640625, 771.3342895507812, 1225.3983154296875, 135.20974731445312, 886.6732788085938, 638.5987548828125, 212.47593688964844, -324.52960205078125, -15.64080810546875, 85.61468505859375, -101.02864074707031, 1059.166259765625, 98.44490814208984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000528.npy"} +{"epoch": 0.775330396475771, "step": 529, "batch_size": 64, "mean": 240.1451873779297, "std": 581.5812377929688, "min": -1487.697509765625, "p10": -428.16297912597656, "median": 208.70394897460938, "p90": 984.9620971679689, "max": 1564.1142578125, "pos_frac": 0.671875, "sample": [155.84278869628906, -172.4997100830078, 1564.1142578125, -80.88397216796875, 166.50888061523438, 658.3615112304688, 417.2757873535156, 643.1240844726562, 423.4666442871094, 1517.341064453125, -353.0295104980469, 541.9236450195312, 97.2258529663086, -435.5675354003906, 553.64208984375, -138.74131774902344, 136.53460693359375, 21.863792419433594, 775.796142578125, -109.1259536743164, -357.0201721191406, 321.22320556640625, 1119.077392578125, 968.9318237304688, 991.8322143554688, -679.31298828125, -103.88822174072266, 334.3382263183594, -557.6708374023438, 703.5693359375, 171.20718383789062, 467.1395568847656, -124.2309341430664, 368.9197998046875, -57.128204345703125, 33.90751647949219, 716.6256103515625, 38.2267951965332, 632.7234497070312, 684.8185424804688, -1487.697509765625, 104.98546600341797, 1035.3106689453125, -548.2931518554688, 590.0117797851562, -1078.331298828125, 106.98592376708984, 621.80810546875, 1018.0313110351562, 98.24271392822266, -770.4498901367188, 428.9959716796875, -50.928260803222656, 564.3158569335938, -410.88568115234375, 486.1827087402344, 246.20071411132812, 890.0554809570312, 581.1947021484375, 1274.545166015625, -305.4620666503906, -112.41423034667969, -270.9192810058594, 301.3453063964844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000529.npy"} +{"epoch": 0.7767988252569751, "step": 530, "batch_size": 64, "mean": 395.99542236328125, "std": 694.19873046875, "min": -2165.776611328125, "p10": -183.7158218383789, "median": 268.0205383300781, "p90": 1427.7277709960938, "max": 2190.6923828125, "pos_frac": 0.71875, "sample": [402.4126281738281, -356.7978210449219, -12.457176208496094, 144.02175903320312, 279.2657470703125, -172.35574340820312, 1792.5850830078125, 569.8798217773438, 340.6494445800781, -585.0272216796875, 123.80860137939453, -88.59770965576172, -39.139404296875, -117.43081665039062, 759.1122436523438, 764.7054443359375, -36.19648742675781, 39.1942138671875, -365.8916015625, 1398.06005859375, 1812.1123046875, 604.2200317382812, 989.6546020507812, 731.5516967773438, -109.57398986816406, 784.6295776367188, -129.8170166015625, 175.42929077148438, -188.5844268798828, 646.4635620117188, -6.497663497924805, 1641.5341796875, 1179.1170654296875, 496.0372314453125, 371.0441589355469, -8.407184600830078, 872.2454833984375, 152.79794311523438, -357.38336181640625, -432.0257568359375, 99.89714050292969, 1722.9930419921875, 701.1446533203125, 2190.6923828125, 40.6507568359375, 387.8943786621094, 392.7510681152344, 256.77532958984375, 1539.919677734375, 186.56448364257812, 373.179443359375, 21.88385581970215, 858.7728881835938, -2165.776611328125, 1332.250244140625, 76.89735412597656, -25.173492431640625, 1440.4425048828125, 535.229736328125, 290.6329345703125, 79.64891052246094, 118.91033935546875, 647.7361450195312, 175.44210815429688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000530.npy"} +{"epoch": 0.7782672540381792, "step": 531, "batch_size": 64, "mean": 339.31494140625, "std": 558.1561279296875, "min": -1148.2510986328125, "p10": -243.23834991455078, "median": 275.8150634765625, "p90": 1010.8175292968754, "max": 1853.74951171875, "pos_frac": 0.765625, "sample": [572.1248779296875, 122.9298324584961, 700.8858642578125, 1053.1773681640625, 771.4121704101562, -483.8731689453125, 199.60693359375, 190.54434204101562, 1766.05078125, -165.62545776367188, -238.37452697753906, 491.5682067871094, 9.672430038452148, 449.4646301269531, 278.73779296875, 191.6575469970703, 406.43927001953125, 284.85809326171875, -1148.2510986328125, 357.67852783203125, 898.8604125976562, 749.7011108398438, 352.95037841796875, 81.85018920898438, 787.0548095703125, 158.68148803710938, 464.7010803222656, 283.17913818359375, 478.4344177246094, -60.330833435058594, 1174.5074462890625, -89.46624755859375, 622.4114990234375, 632.5333251953125, 73.86485290527344, 261.2303466796875, -30.05658721923828, -589.4346923828125, -16.321502685546875, 137.02532958984375, 1853.331787109375, 391.6318359375, 55.548179626464844, -93.19108581542969, 1853.74951171875, -367.49761962890625, 548.067138671875, 1381.38916015625, 272.892333984375, 911.9779052734375, 1173.875732421875, -48.002845764160156, 599.8096923828125, -245.32284545898438, 262.5488586425781, 145.79388427734375, 477.41259765625, 172.5323028564453, 405.7799987792969, 82.60823059082031, 233.23196411132812, 554.1023559570312, -822.2844848632812, -265.88916015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000531.npy"} +{"epoch": 0.7797356828193832, "step": 532, "batch_size": 64, "mean": 362.3275146484375, "std": 672.24951171875, "min": -1388.0382080078125, "p10": -498.3867187499999, "median": 283.77268981933594, "p90": 1258.3185180664068, "max": 1910.8194580078125, "pos_frac": 0.75, "sample": [1107.4822998046875, 445.8349609375, 1599.7427978515625, -280.1801452636719, 1407.5301513671875, 230.30511474609375, 861.87353515625, 865.386962890625, -1388.0382080078125, 299.3670349121094, -315.9056396484375, 1394.041748046875, 512.2537841796875, 745.2840576171875, -548.873291015625, 790.67333984375, 564.3277587890625, -1045.9527587890625, 256.5142822265625, 326.7358093261719, 408.4416198730469, 1712.1044921875, 1910.8194580078125, 879.1585693359375, 585.2832641601562, 713.100830078125, -140.48866271972656, 1146.0904541015625, 1306.416259765625, 180.95989990234375, -87.48529815673828, 343.6285705566406, -152.48324584960938, 1027.95703125, 750.113525390625, 175.41317749023438, 250.02261352539062, 1026.0975341796875, -101.04312896728516, 260.7225036621094, 18.96514129638672, 802.1430053710938, 181.76873779296875, 597.1978759765625, 167.13990783691406, -74.0339126586914, 12.484321594238281, 772.8711547851562, 218.59725952148438, 29.31554412841797, -569.4894409179688, -596.3075561523438, 268.1783447265625, 1722.2930908203125, 266.8360595703125, -92.46002197265625, 310.4883117675781, -1059.2840576171875, 115.50018310546875, 204.28573608398438, -380.584716796875, 617.4561767578125, -698.7664184570312, 331.1343688964844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000532.npy"} +{"epoch": 0.7812041116005873, "step": 533, "batch_size": 64, "mean": 265.591552734375, "std": 579.4927978515625, "min": -1519.04052734375, "p10": -303.9934539794922, "median": 224.12490844726562, "p90": 1002.5224914550782, "max": 2233.74755859375, "pos_frac": 0.6875, "sample": [535.66455078125, -412.07080078125, 745.656005859375, -495.9911804199219, 446.49456787109375, 197.826171875, 1248.322998046875, 613.4439697265625, 278.0306396484375, 322.0848388671875, 706.60205078125, 619.97607421875, 596.561279296875, 760.285888671875, -195.3671875, 418.1038513183594, 1012.8751831054688, 167.06625366210938, 250.3900146484375, -803.84033203125, 1318.161865234375, 224.8280029296875, 12.25677490234375, 573.6800537109375, 44.53520584106445, 595.5488891601562, -44.628211975097656, -59.29957580566406, 223.42181396484375, 2233.74755859375, 458.75933837890625, 417.1475524902344, -93.06600952148438, -14.260704040527344, -8.04831314086914, -1519.04052734375, -342.4850769042969, 357.6632995605469, -304.4922790527344, 398.5833435058594, 124.63047790527344, 166.22633361816406, 1257.580078125, 1293.380126953125, -198.93612670898438, 5.8476409912109375, -284.22503662109375, -4.3527679443359375, 353.0409240722656, 278.7900085449219, 321.6878356933594, -105.16331481933594, 978.3662109375, 84.38121795654297, -302.82952880859375, -293.325927734375, -809.5487060546875, 158.91217041015625, 170.902099609375, 1163.9808349609375, -177.26275634765625, 4.063446044921875, 787.0365600585938, 539.5496215820312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000533.npy"} +{"epoch": 0.7826725403817915, "step": 534, "batch_size": 64, "mean": 354.456787109375, "std": 543.5641479492188, "min": -857.911865234375, "p10": -252.00228881835938, "median": 278.47438049316406, "p90": 1007.9321655273437, "max": 1898.53173828125, "pos_frac": 0.78125, "sample": [386.64337158203125, 1007.998046875, -41.416748046875, 1898.53173828125, 58.31851577758789, 830.8383178710938, -557.5572509765625, 269.7289733886719, 328.64776611328125, -551.0974731445312, 123.27362823486328, 1181.22900390625, -208.43063354492188, 110.78530883789062, 1215.2950439453125, 933.1981811523438, 792.811767578125, -254.41195678710938, 362.4057312011719, 849.9826049804688, 270.59283447265625, -170.71359252929688, 580.8291015625, 471.4840087890625, 286.3559265136719, 369.2567138671875, 1053.9654541015625, -92.52676391601562, 368.4805908203125, 166.76693725585938, 200.11874389648438, 899.8743896484375, 256.68206787109375, 127.89730072021484, 650.9400634765625, -582.6083984375, -113.23324584960938, 1574.9891357421875, 803.8501586914062, 1007.7784423828125, 156.31582641601562, 178.18882751464844, -111.89495849609375, 149.31875610351562, 815.6685180664062, 189.57713317871094, 85.19258117675781, 926.6822509765625, 1476.9031982421875, 893.5477294921875, 653.5062866210938, 330.9313659667969, 644.89697265625, 47.31523513793945, 403.7720642089844, 119.8408203125, 289.7669982910156, 235.58453369140625, 293.4277038574219, 84.42169189453125, -246.37973022460938, -857.911865234375, -279.82916259765625, -661.162841796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000534.npy"} +{"epoch": 0.7841409691629956, "step": 535, "batch_size": 64, "mean": 412.41912841796875, "std": 558.0743408203125, "min": -888.400390625, "p10": -218.32346496582028, "median": 388.7114562988281, "p90": 1057.8584228515626, "max": 2353.594482421875, "pos_frac": 0.796875, "sample": [-381.83001708984375, 2353.594482421875, -355.75689697265625, 496.0716552734375, -228.93812561035156, 224.1131591796875, 98.31805419921875, 944.2677001953125, 122.02099609375, 660.6766967773438, 244.06129455566406, 98.33206939697266, 248.54957580566406, 971.2576904296875, 451.0444030761719, 425.651611328125, 126.2780990600586, 1411.845458984375, 311.8752136230469, 350.5689697265625, 985.01025390625, 560.75390625, 571.4996337890625, 470.46295166015625, 9.991958618164062, 1477.064208984375, 1145.56689453125, 509.3882751464844, -65.94277954101562, 333.6531677246094, 1527.8011474609375, 775.4349975585938, 1060.3433837890625, -377.7213439941406, -362.4046936035156, -888.400390625, 497.0347595214844, 730.9007568359375, 141.44195556640625, 451.3133544921875, 379.79150390625, 1052.0601806640625, 841.0843505859375, 208.71359252929688, 504.1744384765625, -62.29899597167969, -17.08679962158203, 771.2144775390625, -193.55592346191406, -579.96435546875, 604.2424926757812, 50.50646209716797, 501.88409423828125, 78.8399429321289, -139.56163024902344, -90.9290771484375, 948.1348876953125, 174.19070434570312, 1484.4622802734375, 317.2412109375, 29.53411293029785, 539.085205078125, 397.63140869140625, 470.2344055175781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000535.npy"} +{"epoch": 0.7856093979441997, "step": 536, "batch_size": 64, "mean": 493.9730529785156, "std": 694.5618896484375, "min": -1156.2452392578125, "p10": -81.2103809356689, "median": 294.50360107421875, "p90": 1226.6305786132812, "max": 4038.584228515625, "pos_frac": 0.8125, "sample": [135.64271545410156, 736.1466674804688, 291.72467041015625, 996.34326171875, 1228.964111328125, 11.487716674804688, 172.7620849609375, 164.58953857421875, 69.43722534179688, -27.61236572265625, 899.8978271484375, 340.3760681152344, 1221.1856689453125, 894.9749755859375, -207.97755432128906, 534.7900390625, 1278.72216796875, 105.84945678710938, 819.255126953125, 815.7184448242188, 2100.97412109375, 545.603759765625, -141.41311645507812, 844.0142822265625, -100.05606842041016, 1273.231689453125, 26.562286376953125, 653.269287109375, 679.0934448242188, -4.001708984375, 4038.584228515625, 764.264404296875, 297.28253173828125, 199.64004516601562, 487.9483642578125, 723.3970336914062, 105.65263366699219, 1151.397216796875, -315.6992492675781, 105.6749267578125, 155.5172882080078, -1156.2452392578125, 1592.6400146484375, 944.498779296875, 644.5338745117188, -127.62156677246094, 90.18707275390625, 253.7376708984375, 303.82958984375, 145.4813232421875, 288.004638671875, -19.89759063720703, 220.66180419921875, 244.36770629882812, 246.2137451171875, 792.7206420898438, 132.14390563964844, -37.23711013793945, -302.22210693359375, -12.703033447265625, 880.2265625, 755.4982299804688, 368.03912353515625, 1294.203857421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000536.npy"} +{"epoch": 0.7870778267254038, "step": 537, "batch_size": 64, "mean": 336.547119140625, "std": 570.430419921875, "min": -1287.856689453125, "p10": -308.5430969238281, "median": 294.53167724609375, "p90": 1101.289990234375, "max": 1405.43408203125, "pos_frac": 0.78125, "sample": [-1287.856689453125, -701.3470458984375, 25.24774932861328, 116.0058364868164, -433.10491943359375, 351.5430908203125, 25.20521354675293, 934.5762939453125, 11.751190185546875, -60.938175201416016, 493.8050231933594, -8.348373413085938, 526.2413330078125, 355.032958984375, -325.07720947265625, 926.881103515625, 1198.0164794921875, -17.487642288208008, 219.10205078125, 0.33716583251953125, 7.694490432739258, 836.1318969726562, 43.837623596191406, -796.6882934570312, 961.009521484375, -269.9635009765625, 1381.5096435546875, 1210.0960693359375, 336.7694091796875, 544.0089111328125, 1105.349365234375, 409.645263671875, 1018.1609497070312, 318.27911376953125, 785.2227172851562, -16.572410583496094, 219.31748962402344, 572.0779418945312, 270.78424072265625, -513.6068115234375, 233.08102416992188, 440.9522705078125, -91.59056091308594, -923.1226806640625, 54.69559860229492, 1091.818115234375, -83.11325073242188, 160.5536346435547, 321.5455017089844, 876.5037231445312, 151.96676635742188, 1405.43408203125, 150.80276489257812, 1268.8074951171875, 518.6207885742188, 99.1599349975586, 1281.02587890625, 104.19784545898438, 989.9808959960938, 395.6846008300781, 563.8091430664062, 682.942626953125, 96.337158203125, 976.2733154296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000537.npy"} +{"epoch": 0.788546255506608, "step": 538, "batch_size": 64, "mean": 382.1604919433594, "std": 521.6039428710938, "min": -598.4373168945312, "p10": -233.8818130493164, "median": 378.05963134765625, "p90": 1036.4849243164062, "max": 2054.41357421875, "pos_frac": 0.75, "sample": [130.3001251220703, 514.5109252929688, 800.4747314453125, 1201.9716796875, 556.2587890625, -154.45770263671875, 985.0003662109375, 580.02392578125, 611.4021606445312, 451.8576965332031, -97.31591033935547, 1618.9189453125, 571.411376953125, 405.996337890625, 238.09056091308594, 1033.6937255859375, -523.422119140625, -13.172309875488281, 330.37799072265625, 1072.1749267578125, 278.51104736328125, 508.3822326660156, 102.16415405273438, 758.6702880859375, 553.6114501953125, 76.81452941894531, -69.863525390625, 1435.8648681640625, -24.657577514648438, -385.0526428222656, -598.4373168945312, 804.3512573242188, 152.64173889160156, -39.17472839355469, 2054.41357421875, 642.8846435546875, 751.0023803710938, 526.7821044921875, 350.1229248046875, 743.217041015625, 216.39907836914062, -102.82794952392578, 10.90053939819336, 649.6593627929688, 191.51162719726562, 667.9871826171875, -571.311279296875, 636.02294921875, -306.7992248535156, 102.70796203613281, 582.568603515625, 429.36077880859375, 318.16778564453125, 642.1598510742188, -236.62681579589844, -29.487327575683594, 139.7486572265625, 23.415599822998047, 1116.799560546875, 1037.68115234375, -371.214111328125, -227.476806640625, 581.427978515625, 21.152395248413086], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000538.npy"} +{"epoch": 0.7900146842878121, "step": 539, "batch_size": 64, "mean": 361.0832214355469, "std": 486.40185546875, "min": -788.9386596679688, "p10": -155.9830352783203, "median": 302.7950134277344, "p90": 921.3892456054691, "max": 1713.4300537109375, "pos_frac": 0.765625, "sample": [279.0475158691406, 313.39813232421875, 56.611167907714844, 6.863828659057617, 294.38623046875, 426.57464599609375, -188.36680603027344, 45.76813507080078, -32.925262451171875, 1330.110107421875, -298.90936279296875, -23.787744522094727, 1713.4300537109375, 156.0600128173828, 962.0079345703125, 632.130859375, 782.424560546875, 704.6963500976562, 29.73388671875, 826.6123046875, 1566.512939453125, 86.69354248046875, 311.20379638671875, 767.0167236328125, 135.04380798339844, 205.89968872070312, -79.77296447753906, 467.725830078125, -115.94898986816406, 704.0105590820312, -372.6196594238281, 685.5436401367188, -788.9386596679688, 571.2697143554688, 658.210693359375, 108.99200439453125, 79.3429946899414, 535.024658203125, 184.292724609375, -161.86203002929688, -84.93898010253906, 1483.12744140625, -188.00711059570312, 697.3697509765625, -38.3035774230957, -142.265380859375, 999.3551635742188, -226.58445739746094, 97.14424896240234, 13.881385803222656, 610.3405151367188, 250.70095825195312, 826.0667724609375, 391.4115905761719, 450.4615478515625, 334.03680419921875, 816.8549194335938, 560.0382080078125, 462.9404296875, 451.2880859375, -65.45185852050781, 1311.9849853515625, 493.5511474609375, 40.81584167480469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000539.npy"} +{"epoch": 0.7914831130690162, "step": 540, "batch_size": 64, "mean": 485.5020751953125, "std": 671.1478271484375, "min": -844.3829345703125, "p10": -302.49952392578115, "median": 364.8134307861328, "p90": 1497.8418701171877, "max": 2607.18505859375, "pos_frac": 0.765625, "sample": [526.9420776367188, 597.2116088867188, 746.6533203125, -452.12164306640625, 2033.9893798828125, -217.58154296875, 303.8939514160156, 124.7410888671875, -844.3829345703125, -503.455322265625, -28.02494239807129, 631.240966796875, -171.15370178222656, 256.20477294921875, 2607.18505859375, -99.84314727783203, 249.3370361328125, 71.27542114257812, 607.920654296875, 617.121337890625, 34.434173583984375, 178.13226318359375, 419.01507568359375, 830.448486328125, 816.7943115234375, 1417.2828369140625, 569.4229736328125, 1459.7769775390625, 1514.1553955078125, 385.65802001953125, 623.9822387695312, 454.21270751953125, 738.234375, 91.57715606689453, 334.30694580078125, 474.6159362792969, -115.72769165039062, -14.96331787109375, 1582.0635986328125, 280.3430480957031, 1183.365966796875, 1883.4671630859375, 1606.300537109375, 1280.5196533203125, -440.7981872558594, 711.505126953125, 226.67410278320312, 317.9764404296875, 811.0480346679688, 337.17724609375, -338.8929443359375, 343.9688415527344, 715.7555541992188, 849.747314453125, 176.31582641601562, 303.77935791015625, 712.454345703125, 172.2064208984375, 1589.907470703125, -67.92530059814453, -5.918968200683594, 684.08642578125, -712.4448852539062, -399.0629577636719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000540.npy"} +{"epoch": 0.7929515418502202, "step": 541, "batch_size": 64, "mean": 249.83155822753906, "std": 587.30322265625, "min": -1212.1220703125, "p10": -451.40711059570305, "median": 198.1755828857422, "p90": 964.6280578613286, "max": 1973.171875, "pos_frac": 0.734375, "sample": [-847.7862548828125, 188.8802490234375, 700.888916015625, 91.10586547851562, 176.6373291015625, 64.74935913085938, 327.1742858886719, 80.01509094238281, 203.0479736328125, -85.4093017578125, -122.18746948242188, 1365.2862548828125, -872.9452514648438, -52.72227478027344, 1005.191650390625, 51.797401428222656, 201.54022216796875, 1264.969970703125, 102.70211029052734, 793.2236328125, 1556.117431640625, 869.9796752929688, 39.438934326171875, -531.582763671875, 274.0296630859375, -291.62158203125, 431.29205322265625, 324.7894592285156, -473.9791259765625, 672.7252197265625, 618.1307373046875, 63.314292907714844, 771.8740234375, 176.47056579589844, 194.81094360351562, 187.01422119140625, 178.65570068359375, 221.4410858154297, 656.1570434570312, 465.51531982421875, -204.6018524169922, 151.64479064941406, 771.317626953125, 1040.426025390625, 698.845947265625, 313.71673583984375, -1212.1220703125, -616.2047119140625, 429.4014587402344, 675.949951171875, -348.7038879394531, 427.0123596191406, 267.1916809082031, 1973.171875, 1194.5068359375, -398.73907470703125, -913.5093383789062, 513.737060546875, -269.8940734863281, -250.53512573242188, -221.81781005859375, 177.43637084960938, 427.886474609375, 322.369384765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000541.npy"} +{"epoch": 0.7944199706314243, "step": 542, "batch_size": 64, "mean": 472.7724609375, "std": 662.3981323242188, "min": -1275.292236328125, "p10": -279.5805282592773, "median": 393.8968048095703, "p90": 1260.0657226562503, "max": 2246.6455078125, "pos_frac": 0.796875, "sample": [954.1895751953125, 359.65667724609375, 272.91387939453125, 221.20614624023438, 226.62969970703125, 1079.968994140625, -378.08087158203125, 2246.6455078125, 1283.2275390625, 410.64349365234375, 26.898544311523438, 511.341796875, 904.3232421875, -95.02487182617188, -71.67574310302734, 476.716064453125, 2024.072509765625, 319.98687744140625, -66.57915496826172, 1921.6578369140625, 352.3209533691406, 146.78578186035156, 482.43853759765625, 500.839111328125, -301.2774963378906, 218.84410095214844, -140.11517333984375, -430.44805908203125, -228.9542694091797, 1027.166748046875, -352.866455078125, 917.969970703125, 816.3051147460938, 194.70956420898438, 1816.106689453125, -70.01632690429688, 699.859375, 529.88916015625, -579.0667114257812, 690.1240234375, 359.22998046875, 448.96356201171875, 817.4916381835938, 107.02571868896484, 1536.0556640625, 209.18411254882812, 229.83856201171875, -1275.292236328125, 735.8479614257812, 121.8714828491211, 707.5665893554688, 352.56207275390625, 102.30781555175781, 398.4413146972656, 1701.482177734375, 1206.021484375, 389.352294921875, -933.1012573242188, 1114.5345458984375, 246.47927856445312, 448.23065185546875, 657.8438720703125, 724.3447265625, 931.8226318359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000542.npy"} +{"epoch": 0.7958883994126285, "step": 543, "batch_size": 64, "mean": 336.0360107421875, "std": 782.145751953125, "min": -1004.4286499023438, "p10": -371.455517578125, "median": 160.56012725830078, "p90": 1434.3758911132823, "max": 3373.8740234375, "pos_frac": 0.65625, "sample": [21.99127197265625, -14.43206787109375, 717.2849731445312, -39.0784912109375, 13.549673080444336, 2470.1650390625, 3.4383316040039062, -24.265518188476562, -1004.4286499023438, 865.240966796875, -62.100746154785156, -879.4212646484375, 79.52435302734375, 219.3695068359375, 2373.4716796875, 59.12025451660156, 38.896148681640625, 3373.8740234375, -708.1924438476562, 34.98229217529297, 569.6385498046875, -483.09112548828125, -306.53948974609375, 187.6295623779297, -380.66058349609375, -153.34408569335938, -73.25408935546875, 558.9146728515625, -437.51580810546875, 238.6975860595703, 645.998779296875, 217.30564880371094, -362.36260986328125, 1527.3150634765625, -207.58859252929688, -225.3459014892578, 1971.115478515625, 628.98291015625, 1217.517822265625, 90.5053939819336, 1527.345947265625, 180.1327667236328, -212.68724060058594, 389.04974365234375, 850.4513549804688, -17.07361602783203, 289.76483154296875, 1836.6436767578125, -375.35247802734375, 627.9064331054688, 213.02810668945312, -182.772705078125, 418.36846923828125, -319.5780944824219, 453.67364501953125, 586.647216796875, 318.4722595214844, -102.19181060791016, 48.41252136230469, 355.8285827636719, 140.98748779296875, 600.0245361328125, 599.7904663085938, 516.524169921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000543.npy"} +{"epoch": 0.7973568281938326, "step": 544, "batch_size": 64, "mean": 384.9022216796875, "std": 589.7805786132812, "min": -1030.514404296875, "p10": -183.91379089355468, "median": 378.89353942871094, "p90": 1154.4454711914066, "max": 1953.4681396484375, "pos_frac": 0.75, "sample": [1185.1002197265625, 553.7642822265625, 358.28875732421875, 462.2815246582031, 1671.07958984375, -985.1202392578125, 417.2251281738281, -106.87114715576172, 766.9364013671875, 1.4945964813232422, 1306.79296875, -75.14085388183594, 707.8619384765625, 261.6165771484375, -925.961669921875, 376.7192077636719, 632.4596557617188, 201.5740966796875, -49.07928466796875, 417.2354431152344, 381.06787109375, 417.63262939453125, -207.31234741210938, 252.30197143554688, 87.78817749023438, 188.2085418701172, 743.47119140625, 77.18365478515625, -375.4515380859375, -188.97714233398438, 1620.8626708984375, 187.123046875, -79.60710906982422, 663.8754272460938, -1030.514404296875, 1082.917724609375, 535.7132568359375, 441.86376953125, 634.4693603515625, -181.26223754882812, 255.299072265625, 22.534358978271484, 267.24395751953125, 1677.421142578125, 784.8140258789062, -105.12089538574219, 142.35130310058594, 1953.4681396484375, 861.3984985351562, 585.3372802734375, -6.286014556884766, 158.77670288085938, 777.2271728515625, 1486.8712158203125, 662.03125, 441.8464050292969, 391.1131286621094, -151.0341033935547, 511.7468566894531, 119.63401794433594, -74.51292419433594, 821.166748046875, -185.0501708984375, 805.8519287109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000544.npy"} +{"epoch": 0.7988252569750367, "step": 545, "batch_size": 64, "mean": 480.9786376953125, "std": 612.5518798828125, "min": -747.8729248046875, "p10": -99.84180221557617, "median": 338.0866394042969, "p90": 1478.2558715820314, "max": 1898.72216796875, "pos_frac": 0.859375, "sample": [-131.56393432617188, 8.509368896484375, -667.2872314453125, 76.86734008789062, 252.5397186279297, 971.9497680664062, 234.79278564453125, 55.64189147949219, 633.8495483398438, -417.1264343261719, 485.2345886230469, 758.379638671875, 68.22600555419922, 681.1416625976562, 1197.3565673828125, 118.72650909423828, 1692.27001953125, 1431.0330810546875, 1493.0380859375, 350.33636474609375, 951.6661987304688, 64.21316528320312, 279.3165283203125, 252.9197540283203, 55.636680603027344, 359.9371643066406, 66.00653076171875, 461.39068603515625, 902.620361328125, -100.0689926147461, 1565.0731201171875, 533.5339965820312, 132.0595245361328, -99.31169128417969, 1898.72216796875, 1143.2554931640625, 18.297622680664062, 1614.833740234375, -633.1051635742188, 506.6282958984375, -29.718711853027344, 459.1432189941406, 110.746826171875, 585.9744873046875, 442.3326110839844, 681.0018920898438, 1007.210693359375, 1443.7640380859375, 271.0269470214844, 267.41583251953125, 167.6581573486328, 82.50093078613281, 345.8825988769531, 411.13507080078125, 136.85415649414062, 1724.6141357421875, 1436.0220947265625, -274.2776184082031, 330.2906799316406, 218.482421875, 716.7559814453125, -747.8729248046875, 191.3631134033203, 1536.7841796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000545.npy"} +{"epoch": 0.8002936857562408, "step": 546, "batch_size": 64, "mean": 300.89483642578125, "std": 506.4837646484375, "min": -1585.2960205078125, "p10": -255.3810089111328, "median": 268.6678924560547, "p90": 855.9471801757813, "max": 1332.51171875, "pos_frac": 0.75, "sample": [-234.00106811523438, 108.0320816040039, 357.67333984375, 50.645050048828125, 221.8082275390625, 505.6353759765625, 260.8885192871094, 483.92303466796875, 148.1793670654297, 220.00897216796875, -201.12832641601562, -243.87362670898438, -235.80162048339844, -135.04473876953125, 779.7366333007812, 129.23594665527344, 20.16504669189453, 264.0627136230469, 1085.627685546875, 281.45623779296875, -87.01714324951172, 789.8646850585938, -317.6789245605469, 1255.0054931640625, 629.2355346679688, 817.1951904296875, 807.6300048828125, 512.814453125, 838.93408203125, 391.8432312011719, 1332.51171875, -303.08660888671875, 15.348091125488281, 465.0153503417969, 529.556396484375, -751.1568603515625, 677.59033203125, 442.94769287109375, 738.5198974609375, 903.5968627929688, -1585.2960205078125, 783.442138671875, 228.38690185546875, 304.91815185546875, 466.5563659667969, 859.9371948242188, -0.11138916015625, 814.3994140625, -33.484901428222656, 179.14163208007812, 34.40338897705078, 68.89263153076172, 846.6371459960938, 273.2730712890625, -260.312744140625, 556.0927734375, 147.26272583007812, 1172.497802734375, -289.1869812011719, 72.6142807006836, -109.34974670410156, 429.31292724609375, -359.11407470703125, 1100.460205078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000546.npy"} +{"epoch": 0.801762114537445, "step": 547, "batch_size": 64, "mean": 404.60955810546875, "std": 576.495361328125, "min": -1284.840087890625, "p10": -124.51261291503906, "median": 361.9092712402344, "p90": 1048.303271484375, "max": 2129.8388671875, "pos_frac": 0.71875, "sample": [579.01513671875, 68.03668975830078, -1284.840087890625, 1018.738525390625, -8.694408416748047, 41.112884521484375, 295.65814208984375, 755.7949829101562, 183.62835693359375, 598.0170288085938, 164.0086669921875, 1862.134033203125, -28.751338958740234, 1847.269775390625, -115.88066101074219, -87.18506622314453, 527.4783325195312, 643.9176025390625, -148.29429626464844, 958.7216796875, -37.66753387451172, 106.60779571533203, 502.5037841796875, 296.3215637207031, 655.5550537109375, -173.33160400390625, 583.093994140625, 236.3207244873047, 215.50755310058594, 409.6197204589844, 1060.973876953125, -128.21202087402344, 605.170166015625, 680.031982421875, 624.5593872070312, 370.86431884765625, -111.10350036621094, 971.2886352539062, 380.08990478515625, -10.55157470703125, 731.4859619140625, 2129.8388671875, 588.8560791015625, -604.34423828125, -250.93692016601562, -22.012367248535156, -341.1171875, 1142.1151123046875, 117.60899353027344, 613.4613037109375, 1533.699951171875, 239.20274353027344, 582.4268798828125, 435.962890625, 215.78858947753906, 272.3179931640625, 798.8880615234375, -89.644287109375, 416.087890625, -72.08770751953125, 724.5181884765625, 1371.78466796875, 352.9542236328125, -99.37226104736328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000547.npy"} +{"epoch": 0.8032305433186491, "step": 548, "batch_size": 64, "mean": 355.14385986328125, "std": 617.1107788085938, "min": -977.9791259765625, "p10": -223.32734374999998, "median": 287.87596130371094, "p90": 907.8351013183594, "max": 3009.965087890625, "pos_frac": 0.734375, "sample": [-15.620956420898438, 776.2684936523438, 278.2801513671875, 333.1211853027344, -197.1953125, 315.2782897949219, -977.9791259765625, 1396.421142578125, 414.0939636230469, 374.9655456542969, 432.3671875, 499.85467529296875, 1073.6751708984375, 309.22711181640625, 698.63427734375, -298.0814208984375, -73.76368713378906, 360.780029296875, 912.910400390625, 188.2947998046875, 895.9927368164062, 241.78921508789062, 199.92230224609375, -542.4231567382812, 463.6707458496094, 64.1357650756836, 182.49879455566406, 310.4065856933594, 297.4717712402344, 132.51930236816406, -259.3854675292969, 3009.965087890625, 860.7880859375, 215.90805053710938, 137.40928649902344, -462.632568359375, -71.11167907714844, 696.701171875, -199.95480346679688, 2232.1728515625, -54.313358306884766, -120.34323120117188, -244.90870666503906, 524.923828125, -94.36428833007812, 57.231781005859375, 424.70281982421875, 136.23309326171875, 399.3581237792969, -233.34414672851562, 467.85516357421875, 370.5422058105469, -60.26527404785156, 1998.772705078125, 445.55169677734375, 588.8944091796875, 102.27662658691406, -53.955108642578125, 227.9818572998047, 260.9407653808594, 19.799400329589844, 638.1107177734375, 961.7691040039062, 758.3804321289062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000548.npy"} +{"epoch": 0.8046989720998532, "step": 549, "batch_size": 64, "mean": 274.687255859375, "std": 579.717041015625, "min": -1188.4671630859375, "p10": -326.2873596191406, "median": 197.86541748046875, "p90": 965.8433776855469, "max": 1791.3455810546875, "pos_frac": 0.65625, "sample": [292.1175842285156, 921.2083740234375, 86.46467590332031, -25.356422424316406, 112.05198669433594, 99.2734375, -289.01898193359375, 533.011474609375, 756.589111328125, 967.9451904296875, -151.9417266845703, 1467.0526123046875, 66.36380767822266, 156.68283081054688, -128.84127807617188, -354.308837890625, 960.9391479492188, -342.259521484375, -175.50198364257812, -817.637451171875, 597.4254760742188, 1091.8455810546875, -401.5071105957031, 272.01275634765625, 347.98126220703125, 287.58392333984375, 510.64569091796875, -67.45635223388672, 782.5679931640625, 917.431884765625, -47.64520263671875, 243.58583068847656, 276.49481201171875, -191.09255981445312, 368.8189392089844, -1188.4671630859375, 181.81619262695312, 677.29443359375, 67.6400375366211, 61.01210403442383, -125.83255004882812, -79.17879486083984, 337.798095703125, 665.6171264648438, 1456.4400634765625, -102.61006164550781, -164.51206970214844, 255.93202209472656, -371.5164794921875, 1713.2969970703125, 1405.815673828125, -852.3919677734375, -70.39839172363281, 747.2781982421875, 453.2664794921875, -205.81588745117188, 213.91464233398438, 166.98056030273438, 666.9488525390625, 1791.3455810546875, 444.37060546875, 340.64544677734375, 137.93968200683594, -168.17190551757812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000549.npy"} +{"epoch": 0.8061674008810573, "step": 550, "batch_size": 64, "mean": 334.016845703125, "std": 529.0935668945312, "min": -1016.1893310546875, "p10": -301.31885986328115, "median": 342.7616729736328, "p90": 894.9503906250002, "max": 2032.751708984375, "pos_frac": 0.8125, "sample": [378.14398193359375, 352.9645690917969, 1280.449951171875, 374.4503479003906, 336.6086730957031, 754.609130859375, 1067.9844970703125, 267.62261962890625, -150.9414520263672, 296.4736328125, 563.2415161132812, 387.1690673828125, 538.4708251953125, 174.3277587890625, 348.9146728515625, 1834.7625732421875, 47.68291473388672, -348.0276184082031, 80.54209899902344, 91.04754638671875, -1016.1893310546875, -9.134490966796875, 483.20751953125, 858.1651000976562, -53.66267395019531, 910.7155151367188, 851.2433471679688, 133.11492919921875, 252.39373779296875, 100.21250915527344, 37.06074523925781, -738.96240234375, 303.83642578125, -116.36385345458984, 422.6084289550781, 793.7840576171875, 75.92311096191406, 437.56884765625, 2032.751708984375, 453.7557678222656, 255.19444274902344, 706.0801391601562, 112.94075012207031, 33.461273193359375, 1336.2972412109375, 412.68585205078125, 1101.4222412109375, 95.01499938964844, 73.69930267333984, 636.7313842773438, 476.1531066894531, 629.5115356445312, -614.94921875, 536.8757934570312, -192.33175659179688, 388.33929443359375, 248.9421844482422, 516.1925048828125, 372.7128601074219, -365.0286865234375, 520.2972412109375, -432.86749267578125, -474.96923828125, 116.14842224121094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000550.npy"} +{"epoch": 0.8076358296622613, "step": 551, "batch_size": 64, "mean": 553.640625, "std": 556.6996459960938, "min": -484.0754699707031, "p10": -111.1850448608398, "median": 494.8992156982422, "p90": 1455.9597290039062, "max": 1962.78955078125, "pos_frac": 0.828125, "sample": [39.92074966430664, -30.71490478515625, 451.0223083496094, 931.5975952148438, 673.0030517578125, 573.3369750976562, -484.0754699707031, -20.366954803466797, 1345.8953857421875, -325.004638671875, -131.451171875, 347.87225341796875, -63.89741516113281, 577.5770263671875, 578.6089477539062, 1645.7579345703125, 497.2231140136719, 409.32470703125, 574.2720336914062, 1719.725830078125, 1006.5458984375, 369.52484130859375, 667.5350341796875, 652.3521728515625, 277.58612060546875, 302.72222900390625, 986.6947631835938, 425.546142578125, 613.6433715820312, 375.8777770996094, 400.7711181640625, -24.62851333618164, 1491.512939453125, 170.30224609375, 492.5753173828125, 594.1953735351562, 859.089599609375, 690.678466796875, 536.3914794921875, 99.56565856933594, -245.78231811523438, 1249.0819091796875, 1456.1817626953125, 460.2202453613281, 111.64924621582031, 1623.29052734375, -416.2486267089844, -295.89410400390625, 1505.1558837890625, 827.9912719726562, -312.0002746582031, 137.57083129882812, 1455.441650390625, 951.22705078125, 433.5561828613281, 585.2755126953125, 763.2884521484375, 336.047607421875, 453.3674621582031, 1081.0167236328125, 122.62696838378906, 1962.78955078125, 552.0515747070312, 336.98883056640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000551.npy"} +{"epoch": 0.8091042584434655, "step": 552, "batch_size": 64, "mean": 611.0692749023438, "std": 725.7998046875, "min": -724.1978759765625, "p10": -112.9920143127441, "median": 539.0704650878906, "p90": 1469.8100341796876, "max": 2758.57666015625, "pos_frac": 0.84375, "sample": [821.8900146484375, 93.63077545166016, 1104.8861083984375, 2758.57666015625, 852.5582275390625, 1288.720458984375, 630.7552490234375, 1070.294921875, 36.70489501953125, 541.6686401367188, 667.7239379882812, 551.425048828125, 141.8567352294922, 481.3603515625, 239.19859313964844, 705.379150390625, 207.51486206054688, 656.3970336914062, 1372.6148681640625, -413.5984191894531, 52.371368408203125, 586.8970947265625, 512.9549560546875, 366.34283447265625, -724.1978759765625, 14.036903381347656, 36.68950653076172, 1483.4302978515625, 243.57322692871094, -225.72586059570312, 1417.435791015625, -588.7910766601562, 823.8472290039062, -74.2521743774414, 539.8106079101562, 132.89317321777344, 511.00634765625, 2465.576416015625, 1438.0294189453125, 1988.1683349609375, 538.330322265625, -189.69338989257812, 530.56591796875, 981.5711669921875, 639.1331787109375, 2646.85205078125, 255.6307373046875, 407.96295166015625, 1488.5457763671875, 2381.615234375, 542.0740966796875, 153.69998168945312, 436.60626220703125, -345.8099060058594, -69.7020263671875, 548.7578125, -129.5948028564453, 143.9926300048828, 422.01690673828125, 591.9302978515625, 618.7468872070312, 993.564697265625, 721.217529296875, -9.234321594238281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000552.npy"} +{"epoch": 0.8105726872246696, "step": 553, "batch_size": 64, "mean": 314.45751953125, "std": 589.4143676757812, "min": -801.7569580078125, "p10": -216.28503417968747, "median": 192.7423095703125, "p90": 999.8455688476563, "max": 2759.284423828125, "pos_frac": 0.734375, "sample": [2759.284423828125, 951.119873046875, -183.53704833984375, 501.7938537597656, 444.15032958984375, -114.5177993774414, 53.09861755371094, 42.28667449951172, 157.82057189941406, 315.98992919921875, 187.69131469726562, 276.84075927734375, 115.11454010009766, 82.34249877929688, -80.46295928955078, 230.10545349121094, -568.2596435546875, 13.91876220703125, 323.2777099609375, -180.2319793701172, 196.95797729492188, 169.21116638183594, 315.4935607910156, -465.2093505859375, 530.0089111328125, -83.25721740722656, 1187.064453125, 92.20111083984375, -2.5496749877929688, 282.490966796875, -161.5863800048828, 480.6319274902344, -230.31988525390625, -150.47132873535156, 714.3751220703125, 345.2750244140625, 349.023193359375, 133.66943359375, 942.5059814453125, -126.8468246459961, 424.19769287109375, -801.7569580078125, 633.6895141601562, 584.7074584960938, 188.52664184570312, 118.76171875, -712.3673095703125, 28.3780517578125, 504.1368408203125, 22.61492156982422, 1172.907958984375, -8.979713439941406, 1349.5675048828125, 514.6692504882812, 904.33837890625, 1152.957275390625, 267.42547607421875, 1002.1179809570312, -487.34564208984375, 1948.12939453125, -230.8154296875, 994.5432739257812, 35.59754180908203, 672.7849731445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000553.npy"} +{"epoch": 0.8120411160058737, "step": 554, "batch_size": 64, "mean": 430.4720153808594, "std": 719.0418701171875, "min": -957.6266479492188, "p10": -470.5794860839843, "median": 382.66773986816406, "p90": 1476.1581665039068, "max": 2335.486328125, "pos_frac": 0.765625, "sample": [164.41981506347656, -666.4711303710938, 887.9465942382812, 469.3446350097656, 219.68167114257812, 931.3486328125, 358.3284606933594, 433.9839172363281, -409.8787536621094, 351.5589599609375, 2335.486328125, 412.9105224609375, 418.21038818359375, 364.1973571777344, -207.3188018798828, 469.26751708984375, 610.079345703125, 575.6331787109375, 609.84228515625, 3.7271499633789062, -821.65869140625, -689.13232421875, -496.5940856933594, 5.912849426269531, 2025.9425048828125, -220.3211669921875, 249.33462524414062, 401.8624267578125, 451.8143310546875, 854.2120361328125, 568.0933837890625, 1531.803955078125, 1677.2808837890625, 176.21011352539062, 993.4906005859375, 347.1918640136719, 916.055908203125, -16.19803237915039, 1712.031005859375, -189.57359313964844, -84.18472290039062, 77.26091766357422, 1139.742919921875, 612.156494140625, 994.5809326171875, 122.14006042480469, 322.48443603515625, 2.615386962890625, 928.624267578125, 854.5491943359375, -957.6266479492188, 782.885498046875, 305.4807434082031, -664.0053100585938, 1100.8404541015625, -933.877197265625, -340.1078186035156, 33.92322540283203, 401.13812255859375, 1346.3179931640625, 1873.7335205078125, 1837.5130615234375, 105.5241928100586, -121.55792236328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000554.npy"} +{"epoch": 0.8135095447870778, "step": 555, "batch_size": 64, "mean": 504.0443420410156, "std": 777.2073364257812, "min": -1214.7677001953125, "p10": -233.6717803955078, "median": 419.50035095214844, "p90": 1623.0852416992188, "max": 2530.083984375, "pos_frac": 0.796875, "sample": [500.82147216796875, 407.24945068359375, 1458.481201171875, 295.9839172363281, 598.6405029296875, 1320.7567138671875, 486.86761474609375, 168.11312866210938, -171.31585693359375, 205.64840698242188, -223.15806579589844, 900.5160522460938, 771.8079833984375, 496.26593017578125, 12.31365966796875, 71.84994506835938, 189.68576049804688, -285.1597595214844, 2530.083984375, 1311.9766845703125, 117.66851806640625, 2137.29052734375, 44.04120635986328, 454.548828125, 511.9046630859375, 128.31240844726562, 912.5953369140625, 35.49739074707031, 498.0588684082031, 85.75291442871094, 763.369873046875, 1260.50048828125, 638.7825927734375, 519.0714111328125, 19.368865966796875, 246.1211395263672, 749.2457275390625, -618.77880859375, 172.64096069335938, -191.6689453125, -1041.2889404296875, 1098.0382080078125, 1809.0396728515625, 2268.15771484375, -587.6224365234375, 490.0961608886719, 2213.9443359375, 111.53501892089844, 431.7512512207031, 82.1392822265625, 1601.9964599609375, -494.319091796875, 1632.123291015625, -1214.7677001953125, -135.06707763671875, -117.54915618896484, 1933.068603515625, 564.333251953125, -156.66696166992188, 142.641357421875, 1295.8355712890625, 339.7868957519531, -238.1776580810547, 698.05810546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000555.npy"} +{"epoch": 0.8149779735682819, "step": 556, "batch_size": 64, "mean": 298.2765808105469, "std": 454.6336975097656, "min": -488.6456298828125, "p10": -244.83353881835933, "median": 251.74628448486328, "p90": 904.4056396484375, "max": 1468.285888671875, "pos_frac": 0.703125, "sample": [1375.5775146484375, 206.6085205078125, 87.77395629882812, -124.598876953125, 276.3497619628906, 140.45675659179688, 604.482421875, 416.8486633300781, -212.98464965820312, 337.19683837890625, 92.55928802490234, 732.3974609375, 275.85919189453125, -304.8255920410156, 218.25628662109375, 562.3358764648438, 1202.3929443359375, -3.19049072265625, 696.4498901367188, -188.85992431640625, -488.6456298828125, -82.59102630615234, -326.1022644042969, 149.5463409423828, -382.307373046875, -154.68389892578125, 423.0707702636719, -66.60444641113281, 907.5036010742188, -287.276611328125, 431.98480224609375, 380.39776611328125, 254.70359802246094, 96.37329864501953, 853.7781982421875, -51.23601531982422, 431.3431396484375, 121.90849304199219, 219.92941284179688, -57.908729553222656, -113.7564697265625, 740.537841796875, 176.5027618408203, 120.01697540283203, -258.4830627441406, 574.0432739257812, 992.6845703125, 456.5574951171875, 1005.4976196289062, 45.996620178222656, 248.78897094726562, -149.0029296875, 897.1770629882812, 284.05926513671875, 1468.285888671875, -84.03478240966797, 890.115478515625, 1422.932861328125, 467.2787780761719, 508.431884765625, 418.1942138671875, -355.7344970703125, 313.38470458984375, 255.957275390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000556.npy"} +{"epoch": 0.8164464023494861, "step": 557, "batch_size": 64, "mean": 453.0914306640625, "std": 739.7955932617188, "min": -1553.614501953125, "p10": -343.3324737548828, "median": 394.72509765625, "p90": 1322.1692260742193, "max": 2820.03955078125, "pos_frac": 0.75, "sample": [-1553.614501953125, 120.21824645996094, -1036.33154296875, 862.9737548828125, 386.925537109375, 229.4793701171875, 160.6613006591797, 532.5223388671875, 694.71337890625, 96.0230712890625, 1026.482177734375, 1370.4991455078125, 1013.7598266601562, -7.049072265625, 145.79718017578125, 341.14288330078125, 461.96392822265625, 974.6533203125, -10.817012786865234, 111.37744903564453, 80.36448669433594, 1209.3994140625, 1832.3446044921875, 698.5501708984375, -25.01007080078125, 530.3843994140625, 12.37603759765625, -442.95794677734375, -339.3760681152344, 817.2784423828125, 171.4415283203125, 618.5635986328125, 1193.9866943359375, 582.4010009765625, 237.4526824951172, 203.26234436035156, 450.2917175292969, -570.0850219726562, 931.1505737304688, 568.1021728515625, 869.0078735351562, -277.4707946777344, 543.5962524414062, 136.23110961914062, 178.2930450439453, -532.3497314453125, 755.10986328125, 1767.3475341796875, -423.1295471191406, 2820.03955078125, -38.45305252075195, 2368.55810546875, 1520.74853515625, 170.54122924804688, 1068.909912109375, -128.1240997314453, -345.028076171875, 1562.8409423828125, -276.007080078125, 1156.94140625, -19.12541961669922, 509.2838439941406, 402.524658203125, 526.2645263671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000557.npy"} +{"epoch": 0.8179148311306902, "step": 558, "batch_size": 64, "mean": 610.5439453125, "std": 648.1548461914062, "min": -1697.0460205078125, "p10": -98.62938537597654, "median": 496.77410888671875, "p90": 1414.8775390625, "max": 2034.571044921875, "pos_frac": 0.8125, "sample": [437.4400634765625, 1051.28466796875, 225.1404266357422, 1386.26025390625, 174.91815185546875, 309.5317687988281, 564.579833984375, 91.10670471191406, 951.7332763671875, -360.4360046386719, -160.04043579101562, 1273.470458984375, 1338.8587646484375, 1157.4749755859375, 449.2630310058594, 1159.035400390625, 655.3182983398438, 355.2321472167969, 1715.131591796875, 1917.70751953125, -273.7369079589844, 376.44219970703125, 275.4136962890625, 1202.0667724609375, -20.04779815673828, 780.7271728515625, 1427.14208984375, 443.17803955078125, 1451.134033203125, 542.74658203125, 415.3393859863281, 604.0494384765625, 899.5602416992188, 1252.7918701171875, 1442.2608642578125, 1056.1197509765625, -1697.0460205078125, -73.035400390625, 499.5582580566406, 190.3260040283203, 373.11676025390625, -5.412199020385742, 125.78465270996094, 396.04559326171875, 469.72100830078125, -44.13108825683594, -123.11872863769531, 2034.571044921875, 1113.84375, -0.5271186828613281, 788.8231201171875, 1153.850341796875, 614.2150268554688, 493.9899597167969, 1142.2156982421875, -563.060546875, 839.2767333984375, 390.0314636230469, 431.078369140625, -109.59823608398438, 224.75515747070312, 1481.6810302734375, 1255.765869140625, 1103.896728515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000558.npy"} +{"epoch": 0.8193832599118943, "step": 559, "batch_size": 64, "mean": 505.05621337890625, "std": 664.85205078125, "min": -905.0733642578125, "p10": -121.57596588134764, "median": 308.7906494140625, "p90": 1270.2769042968753, "max": 2409.185302734375, "pos_frac": 0.796875, "sample": [1025.23779296875, 1393.1832275390625, 714.5654296875, 715.4814453125, 119.27906036376953, 46.68098449707031, 103.49263000488281, 149.61073303222656, 1291.018798828125, 2409.185302734375, 1172.09423828125, 393.0223388671875, 1178.761474609375, -1.6250324249267578, 91.55911254882812, 754.373046875, 1112.715576171875, 1408.2476806640625, -905.0733642578125, -496.15673828125, 1183.6126708984375, 293.50445556640625, 214.70755004882812, 446.18896484375, 248.6428985595703, 1157.26611328125, -163.76687622070312, -106.76959228515625, 1221.879150390625, 194.34381103515625, -0.26982879638671875, 267.372802734375, 118.01020050048828, 59.65662384033203, 98.39482116699219, -127.92155456542969, 31.31387710571289, -74.48986053466797, 213.0779571533203, 208.073974609375, 2128.83984375, 540.613037109375, 961.566162109375, 122.9715347290039, -807.4738159179688, 813.4453735351562, 657.6910400390625, 655.6077270507812, 1145.21435546875, -70.97164154052734, 533.0274658203125, 224.47589111328125, 324.07684326171875, 1156.79296875, 1606.4490966796875, 1978.1474609375, -388.9490966796875, 785.3983764648438, 1034.8841552734375, 505.1214599609375, -15.934011459350586, 70.90623474121094, -505.59429931640625, 708.8079833984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000559.npy"} +{"epoch": 0.8208516886930984, "step": 560, "batch_size": 64, "mean": 373.83026123046875, "std": 598.6519775390625, "min": -982.2417602539062, "p10": -266.4108215332031, "median": 323.6508331298828, "p90": 1269.5274047851565, "max": 2129.708251953125, "pos_frac": 0.71875, "sample": [2129.708251953125, -367.14617919921875, -101.3096694946289, 1160.088623046875, -451.8928527832031, 659.8009033203125, 100.2563705444336, -129.93780517578125, 102.5452880859375, 484.7088317871094, 239.8980712890625, 153.84024047851562, 242.94326782226562, 539.8818359375, 1565.08203125, -247.96759033203125, 716.0604858398438, 1309.6312255859375, 519.84716796875, 1025.98095703125, 711.36572265625, -120.44073486328125, 193.02902221679688, 1388.5938720703125, 1019.7023315429688, -649.5044555664062, -8.103572845458984, 1228.0789794921875, 422.9608154296875, -165.31939697265625, 921.7905883789062, -180.47242736816406, 26.034215927124023, 550.1781005859375, 476.73602294921875, -513.4573364257812, 123.97010040283203, 1452.9912109375, 251.17626953125, 523.9534301757812, 532.3650512695312, -144.15374755859375, 407.18341064453125, 190.97555541992188, 769.6255493164062, 1371.3211669921875, -109.93052673339844, -50.39086151123047, -205.11036682128906, 1287.291015625, 556.1717529296875, 265.0051574707031, 580.8082275390625, 215.50950622558594, 382.2965087890625, 386.4845275878906, -274.3150634765625, 494.56201171875, 824.0554809570312, 639.7408447265625, -708.8717041015625, 97.11502838134766, -982.2417602539062, 94.35748291015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000560.npy"} +{"epoch": 0.8223201174743024, "step": 561, "batch_size": 64, "mean": 297.10052490234375, "std": 682.1762084960938, "min": -2537.79248046875, "p10": -302.2575805664062, "median": 241.31307983398438, "p90": 1123.187377929688, "max": 1976.7691650390625, "pos_frac": 0.71875, "sample": [614.8836059570312, 447.51373291015625, 124.67855072021484, 69.5562973022461, -1152.125, 1421.05859375, 743.7572021484375, 83.53996276855469, 707.2421875, 1160.951416015625, 154.12744140625, 538.027099609375, 1976.7691650390625, -148.89454650878906, 193.86624145507812, 190.38455200195312, 176.038818359375, -243.97952270507812, 383.24981689453125, 461.1363525390625, 1326.0687255859375, 160.94363403320312, 579.0324096679688, 312.0821838378906, -259.4786682128906, 88.43748474121094, 878.271728515625, 319.8695373535156, -53.82556915283203, 1630.1861572265625, 723.8350830078125, 1267.574462890625, -0.080780029296875, 276.1146545410156, 975.254638671875, -619.3108520507812, -320.5914001464844, 527.529296875, 20.80814552307129, 653.8576049804688, 120.75647735595703, -421.5010681152344, -68.89264678955078, 850.6455688476562, 514.7523193359375, -122.98162078857422, 97.55084228515625, 772.0585327148438, 1035.0712890625, 225.63113403320312, -2537.79248046875, 1719.51171875, 75.62605285644531, -196.04058837890625, 376.5426025390625, -29.53973388671875, 477.0926513671875, -85.91569519042969, 668.5960693359375, 314.90655517578125, -668.0772705078125, 256.9950256347656, -530.3172607421875, -218.60391235351562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000561.npy"} +{"epoch": 0.8237885462555066, "step": 562, "batch_size": 64, "mean": 433.372802734375, "std": 657.5842895507812, "min": -1256.5343017578125, "p10": -271.4836791992187, "median": 380.0679626464844, "p90": 1050.35205078125, "max": 2948.965576171875, "pos_frac": 0.765625, "sample": [615.156982421875, 2948.965576171875, 1425.374267578125, 545.206298828125, 155.62850952148438, 416.1089172363281, 746.7314453125, 300.57208251953125, 1106.4197998046875, 783.36474609375, 882.2236328125, 931.6927490234375, 270.15020751953125, 528.5037231445312, 1007.7985229492188, -241.56846618652344, 245.66802978515625, 923.0977783203125, 138.69764709472656, 410.6380615234375, 349.49786376953125, 627.9041137695312, 517.2271728515625, -300.5362243652344, 35.24571990966797, 814.6203002929688, 1052.30908203125, -48.189697265625, 1006.143798828125, 233.8214111328125, -284.32965087890625, -244.6048583984375, -14.295305252075195, 755.754638671875, -14.631477355957031, 2421.3525390625, -147.18316650390625, 453.85528564453125, 600.7864990234375, 999.891357421875, 729.1099853515625, 274.32757568359375, 662.7353515625, 280.7889404296875, -1256.5343017578125, 263.76861572265625, -283.003173828125, 913.4450073242188, 456.90185546875, 1045.78564453125, -20.138484954833984, -126.95854187011719, 84.8454360961914, 45.05741500854492, 720.8292236328125, 268.60382080078125, -747.5447387695312, 64.95355987548828, 162.95359802246094, -622.9660034179688, 98.60984802246094, -541.8917236328125, 1204.5496826171875, 1102.5589599609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000562.npy"} +{"epoch": 0.8252569750367107, "step": 563, "batch_size": 64, "mean": 316.32666015625, "std": 795.9945068359375, "min": -3204.349609375, "p10": -564.0203552246094, "median": 389.9095764160156, "p90": 1040.1227416992188, "max": 1859.3193359375, "pos_frac": 0.765625, "sample": [207.80322265625, -3204.349609375, -1058.66650390625, 409.49072265625, 812.0031127929688, 133.3038330078125, 1452.681884765625, -36.820343017578125, 324.44097900390625, 654.84228515625, 903.148681640625, 207.14727783203125, -633.10888671875, 1025.1165771484375, 799.7896118164062, 795.2847900390625, 105.96788024902344, 1859.3193359375, 717.8923950195312, -218.81394958496094, 614.8914794921875, 152.48196411132812, -179.39889526367188, 353.324951171875, 977.0519409179688, -27.000988006591797, 766.8839721679688, 321.141845703125, 1143.3148193359375, 989.1341552734375, 139.391357421875, 770.1881103515625, -1654.327392578125, 567.9535522460938, -170.5060577392578, 140.59271240234375, 4.749546051025391, 840.814208984375, 587.9815673828125, 374.21124267578125, 892.39501953125, 631.9253540039062, 1309.7266845703125, 266.11395263671875, 1046.553955078125, -186.4530029296875, -1389.8062744140625, 1794.4696044921875, -569.2149658203125, 1402.7708740234375, 33.21396255493164, 758.7520141601562, 74.93701934814453, -697.42529296875, 810.856689453125, -551.8995971679688, 446.29107666015625, -249.95880126953125, 99.40589904785156, 581.0908813476562, 405.60791015625, 416.8927917480469, 352.076904296875, 597.236572265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000563.npy"} +{"epoch": 0.8267254038179148, "step": 564, "batch_size": 64, "mean": 413.9423828125, "std": 545.6494750976562, "min": -565.8392333984375, "p10": -254.36305541992184, "median": 342.28297424316406, "p90": 1167.9204223632821, "max": 2016.4022216796875, "pos_frac": 0.796875, "sample": [356.0384521484375, 450.7850036621094, 751.8013916015625, 139.9794158935547, 716.2030639648438, 565.1835327148438, 705.8507080078125, -324.4419250488281, 385.8814392089844, -565.8392333984375, 652.351806640625, 758.8104248046875, -379.29803466796875, 516.535400390625, 980.1104736328125, 32.226593017578125, -194.5458526611328, 1452.3880615234375, 1248.410400390625, 480.71026611328125, 328.5274963378906, 283.0655517578125, 196.88584899902344, -267.0928039550781, -376.53253173828125, 319.9413146972656, -224.66030883789062, 230.41983032226562, 18.754058837890625, -399.861083984375, 357.640625, 1817.658447265625, 712.9768676757812, -103.33440399169922, 40.029579162597656, 419.25347900390625, 161.8772735595703, 86.7904281616211, 446.1201171875, -1.4001579284667969, 619.1512451171875, 2016.4022216796875, -43.3857421875, 624.8338623046875, 768.2474975585938, 832.2034912109375, 1498.55615234375, 801.4663696289062, 1437.8876953125, 381.9957275390625, 36.29161834716797, 671.0221557617188, 7.096717834472656, 77.47393798828125, 115.50611877441406, 288.6767578125, 197.53419494628906, 908.81689453125, 309.0909729003906, -123.94203186035156, 1387.671142578125, -273.69940185546875, 959.0762939453125, 218.13778686523438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000564.npy"} +{"epoch": 0.8281938325991189, "step": 565, "batch_size": 64, "mean": 443.851806640625, "std": 782.0408325195312, "min": -1350.3958740234375, "p10": -380.18912048339837, "median": 277.72511291503906, "p90": 1302.3808837890626, "max": 2725.807373046875, "pos_frac": 0.65625, "sample": [-12.507099151611328, 1146.3343505859375, 1537.511474609375, -1350.3958740234375, 236.37548828125, 948.3760986328125, 2193.13916015625, -145.01564025878906, -163.4427032470703, -318.06640625, 566.8494873046875, 1145.76806640625, -252.0709228515625, -1112.1444091796875, 269.33294677734375, -487.96844482421875, -406.8131408691406, -35.36407470703125, -100.57901000976562, 515.4405517578125, 1232.6116943359375, -27.472068786621094, 236.95843505859375, 1691.5010986328125, -522.6870727539062, -17.44725799560547, 183.68426513671875, 346.8381652832031, -660.3975830078125, 1115.142333984375, 659.4734497070312, 235.29331970214844, 661.3097534179688, 864.92578125, 918.2789916992188, -2.9950408935546875, 903.0046997070312, 656.0504760742188, -288.22357177734375, -120.15349578857422, -71.7593994140625, 960.9109497070312, 777.3993530273438, 1759.9573974609375, 1017.3781127929688, 2725.807373046875, 567.7443237304688, 2375.5830078125, 1249.8128662109375, -583.9713134765625, -170.9884033203125, 15.009231567382812, 1015.6890869140625, 503.06817626953125, 771.432373046875, 263.2593688964844, 1119.156494140625, 286.1172790527344, 17.860286712646484, 26.66956329345703, -275.27294921875, 315.8607177734375, 174.4274139404297, 1324.9100341796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000565.npy"} +{"epoch": 0.8296622613803231, "step": 566, "batch_size": 64, "mean": 488.22674560546875, "std": 785.5648193359375, "min": -1459.34326171875, "p10": -424.5784271240234, "median": 550.1907348632812, "p90": 1488.8078125000002, "max": 2356.8896484375, "pos_frac": 0.71875, "sample": [-171.95782470703125, -752.8847045898438, 698.1480712890625, 2356.8896484375, 507.8945617675781, -178.32046508789062, 662.5811767578125, 142.80667114257812, -954.860107421875, 541.6007080078125, 618.3170166015625, -270.5606689453125, -25.56591796875, 690.8594360351562, 825.8959350585938, 413.805908203125, 760.4400634765625, 135.49790954589844, 28.04159927368164, -1459.34326171875, 1068.395263671875, 652.8931884765625, 558.78076171875, 47.99176025390625, 1449.990966796875, -519.9365234375, 588.8248901367188, 1117.0399169921875, -122.80650329589844, 1182.1639404296875, 918.134521484375, 963.7220458984375, 758.3123168945312, 102.89643859863281, 1012.810546875, -383.4032897949219, -96.2328872680664, 1971.8609619140625, 1088.12890625, -260.5928955078125, -442.22491455078125, 844.3876953125, 527.4124755859375, 1505.443603515625, 1430.5623779296875, 1882.9212646484375, 2206.97607421875, 589.3837890625, -898.5662231445312, 290.776611328125, -652.8295288085938, 1769.087646484375, 777.431884765625, 1650.4840087890625, -28.153493881225586, -381.87835693359375, 1139.4769287109375, 373.25494384765625, 1135.273193359375, -357.2320556640625, 50.63074493408203, 42.8001708984375, 677.4224243164062, 445.4090270996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000566.npy"} +{"epoch": 0.8311306901615272, "step": 567, "batch_size": 64, "mean": 369.55865478515625, "std": 499.2571716308594, "min": -1154.848388671875, "p10": -118.44011688232418, "median": 286.6072998046875, "p90": 990.5265075683595, "max": 1857.0963134765625, "pos_frac": 0.859375, "sample": [142.42230224609375, 8.759359359741211, 1337.8790283203125, 661.06884765625, 380.4715576171875, 671.0885009765625, 164.78887939453125, -168.89016723632812, 1054.754638671875, 180.7745361328125, 1007.8424072265625, 43.70124053955078, 1165.80908203125, 268.62261962890625, 182.44607543945312, 271.82452392578125, 1245.6204833984375, 644.70263671875, 639.487060546875, 47.7202033996582, 572.368408203125, 191.13778686523438, -308.2669982910156, 574.7398071289062, 344.8468017578125, 663.5238647460938, 690.626220703125, 1555.8642578125, -82.26609802246094, 702.2620849609375, 23.845388412475586, 301.39007568359375, 374.88232421875, -1154.848388671875, 243.4427490234375, -32.00593948364258, 118.37820434570312, 739.1722412109375, 1857.0963134765625, 833.5447387695312, 225.20431518554688, -844.5355834960938, 228.8251953125, 100.05677032470703, 207.00233459472656, 950.1227416992188, 335.95404052734375, 210.29644775390625, 494.10040283203125, 408.029052734375, 229.60447692871094, 936.60302734375, 5.790214538574219, 467.77557373046875, -135.37745666503906, 317.49334716796875, 147.10812377929688, 268.88214111328125, -133.94326782226562, 128.7852783203125, 415.957275390625, 541.5792846679688, -474.40655517578125, 460.21923828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000567.npy"} +{"epoch": 0.8325991189427313, "step": 568, "batch_size": 64, "mean": 504.41290283203125, "std": 578.3226318359375, "min": -812.0108032226562, "p10": -168.02238922119133, "median": 412.47467041015625, "p90": 1185.023425292969, "max": 2041.5107421875, "pos_frac": 0.859375, "sample": [755.1221313476562, -812.0108032226562, 1290.1727294921875, 984.33349609375, 498.637451171875, 426.1957092285156, 758.6397094726562, 796.1929931640625, 398.7536315917969, -805.4494018554688, -390.87713623046875, -198.99801635742188, 666.4249267578125, 1969.647705078125, 243.30767822265625, 518.980224609375, 229.708251953125, 190.18902587890625, 229.9127960205078, -277.4935302734375, 621.9708862304688, 383.6717834472656, 966.3316650390625, 1088.9674072265625, 616.4227294921875, 273.8141174316406, 121.17391204833984, 2041.5107421875, 242.54129028320312, 448.5699768066406, 929.2298583984375, 811.885986328125, 1127.4189453125, 621.561279296875, 1647.8695068359375, 374.8484802246094, 175.00315856933594, 689.6133422851562, 278.0769348144531, 970.5525512695312, 1031.244873046875, 26.65127944946289, 804.6077880859375, 910.2531127929688, -1.6413726806640625, 352.34246826171875, 153.45236206054688, -95.74592590332031, 336.4160461425781, 1209.7110595703125, 89.44173431396484, 1354.1754150390625, 370.06182861328125, -676.7625122070312, 232.01507568359375, -410.89013671875, 1363.250244140625, 1102.30322265625, 270.0842590332031, 633.737548828125, 194.906982421875, 789.984619140625, 195.8907470703125, 144.5127410888672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000568.npy"} +{"epoch": 0.8340675477239354, "step": 569, "batch_size": 64, "mean": 397.50946044921875, "std": 765.1259155273438, "min": -2427.48046875, "p10": -336.70230407714837, "median": 322.66209411621094, "p90": 1378.3633911132813, "max": 2235.118896484375, "pos_frac": 0.78125, "sample": [-706.6355590820312, 142.53115844726562, 2000.727294921875, 182.26760864257812, 443.071044921875, -219.83934020996094, 697.04931640625, 113.88931274414062, -603.8681030273438, 179.87620544433594, 789.5076293945312, 1396.2452392578125, 512.91064453125, 583.5173950195312, 383.8172607421875, -594.9176025390625, 267.127685546875, 930.2063598632812, -275.31060791015625, -135.99856567382812, -1333.231689453125, 1749.05712890625, 89.85940551757812, -234.52337646484375, 5.508150100708008, 703.26953125, 1047.1656494140625, 24.973434448242188, 36.666351318359375, 107.72247314453125, 1392.6689453125, 962.79638671875, 737.4940185546875, -48.012939453125, 1498.1580810546875, 398.44281005859375, 122.95068359375, 352.6907653808594, 776.0863037109375, -2427.48046875, 584.6375732421875, 217.86013793945312, -78.72052001953125, 1206.0513916015625, 934.637939453125, 129.53282165527344, 857.7034301757812, 187.34788513183594, -363.0130310058594, 836.706787109375, -506.7860107421875, 1981.290771484375, 12.061454772949219, 548.4763793945312, 1074.20703125, -246.174560546875, 131.11737060546875, 750.4053955078125, 292.6334228515625, 76.99652099609375, 582.7365112304688, 602.3583984375, 1344.9837646484375, 2235.118896484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000569.npy"} +{"epoch": 0.8355359765051396, "step": 570, "batch_size": 64, "mean": 382.88421630859375, "std": 648.6878051757812, "min": -1059.5238037109375, "p10": -299.8995819091797, "median": 295.6323699951172, "p90": 1306.945727539063, "max": 2897.11669921875, "pos_frac": 0.75, "sample": [282.41168212890625, 1106.266357421875, 440.63134765625, 1698.9498291015625, 2897.11669921875, 171.3791046142578, 760.3261108398438, 1429.985595703125, 1624.24658203125, 305.84375, 377.00494384765625, 964.0786743164062, 285.4209899902344, 533.2833251953125, 125.3399887084961, 399.78070068359375, 921.883056640625, 489.69390869140625, 709.832275390625, -287.576416015625, 804.5797729492188, -305.1809387207031, -599.8438720703125, -773.7577514648438, 129.48643493652344, 850.0039672851562, -663.1452026367188, -318.2352600097656, 250.90093994140625, 384.05010986328125, 501.9766540527344, 279.7181396484375, -200.33892822265625, -532.0502319335938, 226.38858032226562, 317.47662353515625, 244.7818603515625, 455.87518310546875, 106.66155242919922, 1354.56787109375, 475.805419921875, -201.900390625, 323.8370056152344, 171.16986083984375, 1420.673828125, 306.8878173828125, 169.7719268798828, 746.5575561523438, 1199.93408203125, 714.76171875, -7.291095733642578, 215.38131713867188, 565.7052001953125, 393.6519775390625, -80.45240020751953, -1059.5238037109375, 209.60009765625, -20.627365112304688, 228.2654571533203, -122.88601684570312, -211.54690551757812, 1352.807861328125, 162.66726684570312, -198.4726104736328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000570.npy"} +{"epoch": 0.8370044052863436, "step": 571, "batch_size": 64, "mean": 448.14007568359375, "std": 725.8352661132812, "min": -1490.63427734375, "p10": -460.2388610839844, "median": 424.2345886230469, "p90": 1425.7176879882816, "max": 2242.4765625, "pos_frac": 0.765625, "sample": [569.5404663085938, 880.2353515625, 79.93794250488281, 880.9571533203125, -253.67141723632812, 610.148681640625, 69.49629211425781, 1049.98486328125, 321.642333984375, 1184.1077880859375, 2014.96484375, -462.5851745605469, -299.1931457519531, 1824.595458984375, 218.2018280029297, 554.1615600585938, 820.592041015625, 795.3182983398438, 509.8592224121094, 879.5662231445312, 2242.4765625, 1834.55712890625, 725.6552124023438, 1272.9736328125, -454.7641296386719, -31.999513626098633, 753.9453125, 364.90350341796875, 1508.3099365234375, 1183.8116455078125, 162.67396545410156, -99.41265869140625, 310.0647277832031, 1745.168212890625, 857.309814453125, -52.5690803527832, -669.15966796875, 148.97225952148438, 1350.5147705078125, 604.4927978515625, 928.7197875976562, -537.0160522460938, 1457.947509765625, 597.9905395507812, 483.565673828125, 76.42993927001953, 545.681640625, 146.44801330566406, 660.1968383789062, 49.43559265136719, -565.84326171875, -1490.63427734375, 295.95758056640625, 59.5478515625, -729.8971557617188, 363.673095703125, 249.8347625732422, 527.8758544921875, -901.4287109375, 21.598003387451172, -274.1914367675781, 610.2000122070312, -34.442352294921875, 133.529541015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000571.npy"} +{"epoch": 0.8384728340675477, "step": 572, "batch_size": 64, "mean": 471.08837890625, "std": 817.552490234375, "min": -1480.3902587890625, "p10": -350.2122406005859, "median": 399.5254669189453, "p90": 1503.9408081054694, "max": 3280.940673828125, "pos_frac": 0.6875, "sample": [-368.1257629394531, -90.33741760253906, -397.54156494140625, 542.017333984375, 1975.5714111328125, -140.91119384765625, 25.549224853515625, 30.51184844970703, -15.95071029663086, 154.37142944335938, -85.9798355102539, 1358.699951171875, 973.62255859375, 403.4893798828125, 689.9518432617188, 1080.2672119140625, 1043.58154296875, 1236.1214599609375, 1566.1868896484375, 641.3079833984375, 273.90234375, 727.7666015625, -44.9660530090332, 1141.8310546875, -1112.5194091796875, 64.7640609741211, 427.3343505859375, -329.78192138671875, -274.4638977050781, 847.630126953125, -648.896240234375, 565.5751342773438, 468.2835693359375, -70.52963256835938, 251.94375610351562, 3280.940673828125, 75.06700897216797, 395.5615539550781, -333.54534912109375, -663.6537475585938, 1727.0631103515625, 35.50437545776367, -326.5196533203125, 637.1554565429688, -289.18511962890625, -357.3551940917969, 1131.5374755859375, 883.99560546875, 657.2610473632812, -12.829658508300781, 947.9970703125, 340.52825927734375, 389.6197204589844, 2207.548095703125, 302.80340576171875, 497.6719055175781, 698.0194091796875, 2207.918212890625, -25.47906494140625, -1480.3902587890625, 667.7810668945312, 1273.697998046875, 1728.3150634765625, 642.3505249023438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000572.npy"} +{"epoch": 0.8399412628487518, "step": 573, "batch_size": 64, "mean": 415.822021484375, "std": 599.5905151367188, "min": -598.7072143554688, "p10": -189.5795715332031, "median": 298.1593475341797, "p90": 1034.731298828125, "max": 2606.61328125, "pos_frac": 0.828125, "sample": [660.2565307617188, -124.13973999023438, 69.87752532958984, 9.32061767578125, 17.366439819335938, 2606.61328125, 928.2606811523438, -162.392578125, 102.22754669189453, 29.436080932617188, 468.8681335449219, 230.42738342285156, -598.7072143554688, 2185.6943359375, 412.5545654296875, 498.1776123046875, 1914.7237548828125, -529.6261596679688, 17.45899200439453, -201.23114013671875, -589.3811645507812, 1282.948486328125, 781.5880126953125, 599.3650512695312, 69.70270538330078, 308.28851318359375, 892.7360229492188, 1147.97314453125, 135.805419921875, 1207.1820068359375, -278.3477783203125, 976.8920288085938, 288.0301818847656, 585.6357421875, 659.9937744140625, -355.1209411621094, 332.2418212890625, 63.1512451171875, 793.550048828125, 29.45514678955078, 907.2352294921875, 367.3260803222656, 156.25848388671875, -98.65823364257812, 458.9739074707031, 69.0064697265625, 280.77691650390625, 61.99968338012695, 896.9473876953125, -296.07635498046875, 458.8468017578125, 227.77828979492188, 231.26153564453125, 739.8345336914062, 394.54290771484375, 91.05950164794922, -109.4793930053711, 998.5816650390625, 565.856689453125, 591.4617919921875, 167.533203125, 810.0217895507812, 1050.2239990234375, 124.43940734863281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000573.npy"} +{"epoch": 0.8414096916299559, "step": 574, "batch_size": 64, "mean": 454.5906982421875, "std": 620.6673583984375, "min": -1040.6273193359375, "p10": -338.6295257568359, "median": 431.1173553466797, "p90": 1389.715942382813, "max": 2017.048583984375, "pos_frac": 0.796875, "sample": [284.3877258300781, 737.0169067382812, -134.05160522460938, -287.3277587890625, 1286.755615234375, 126.16399383544922, 127.77606201171875, 1433.841796875, 1042.621826171875, 803.6371459960938, 52.34734344482422, 1523.0718994140625, 1039.497314453125, 868.1448974609375, 113.42599487304688, 723.3200073242188, 42.57122039794922, 430.90625, -648.7389526367188, 2017.048583984375, 559.2005004882812, 770.249267578125, 539.9662475585938, 674.2271118164062, 236.0748291015625, 740.9920043945312, 431.3284606933594, -411.71832275390625, 147.44993591308594, -292.221435546875, 164.49148559570312, -544.3040161132812, 209.65203857421875, 1632.794189453125, -60.90216827392578, 794.5595703125, -481.6232604980469, 885.9781494140625, 229.85752868652344, 536.9263916015625, 1006.6292114257812, -381.1444091796875, 726.6768798828125, 814.9371337890625, 743.1978759765625, 476.6266174316406, 511.28533935546875, 14.828964233398438, -1040.6273193359375, -358.5187072753906, 327.2074890136719, 95.61315155029297, 68.17414855957031, 1574.3466796875, -37.129669189453125, 888.4241943359375, 360.5162048339844, 1554.3858642578125, 254.6190643310547, 683.9881591796875, 143.18934631347656, 1781.87646484375, 543.6589965820312, -4.350942611694336], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000574.npy"} +{"epoch": 0.8428781204111601, "step": 575, "batch_size": 64, "mean": 417.9283447265625, "std": 741.8092041015625, "min": -1368.513671875, "p10": -374.722476196289, "median": 408.4753723144531, "p90": 1372.3492431640632, "max": 2464.44091796875, "pos_frac": 0.75, "sample": [1447.9796142578125, 229.8468780517578, 928.2656860351562, 378.79229736328125, 582.6873779296875, -1105.9317626953125, 554.326171875, 531.2344970703125, 215.17123413085938, 148.30711364746094, 85.45783233642578, -46.465065002441406, 635.502197265625, 1195.7664794921875, 680.5100708007812, 336.5994567871094, -133.05612182617188, 781.4992065429688, 302.6936950683594, 322.95843505859375, 395.7004699707031, -39.379150390625, 1481.339111328125, 675.2271728515625, 600.0661010742188, 330.5903015136719, -1368.513671875, 315.3862609863281, -767.0869750976562, 613.4768676757812, -1003.6361083984375, 1188.1494140625, 912.9484252929688, 1504.225341796875, 256.25567626953125, -242.55491638183594, -292.7073974609375, -125.93779754638672, 1436.0985107421875, 1049.458251953125, -275.630126953125, 228.29693603515625, -409.8717956542969, 1044.5465087890625, 1223.6009521484375, -876.6025390625, 501.7238464355469, 498.53680419921875, 827.5390625, 2464.44091796875, 522.7715454101562, 952.7694091796875, 421.2502746582031, -11.3623046875, 1608.4781494140625, 151.82833862304688, 580.9606323242188, -1321.3267822265625, 2197.779052734375, 86.39070129394531, -88.43578338623047, 655.67138671875, 141.45555114746094, 631.351318359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000575.npy"} +{"epoch": 0.8443465491923642, "step": 576, "batch_size": 64, "mean": 312.60577392578125, "std": 585.5676879882812, "min": -941.4209594726562, "p10": -381.8668823242187, "median": 229.1067886352539, "p90": 1000.8891540527344, "max": 1718.741943359375, "pos_frac": 0.75, "sample": [186.9087677001953, 974.8387451171875, -152.12281799316406, 35.391788482666016, -348.99371337890625, -150.34461975097656, -183.37339782714844, 1681.2620849609375, 941.3547973632812, 162.86944580078125, 288.2629699707031, 1666.620361328125, 111.35430908203125, 93.49275207519531, 791.5184936523438, 381.56414794921875, 501.5284118652344, -831.1246948242188, 275.3824462890625, 608.3364868164062, 529.1174926757812, 1074.2882080078125, 604.573486328125, 823.5572509765625, 7.343303680419922, 1441.0712890625, -97.92399597167969, -401.4534912109375, -343.0337219238281, 801.4371948242188, -153.3014373779297, 247.1863555908203, 37.356964111328125, 37.454551696777344, 1012.2481689453125, 593.3079833984375, 1718.741943359375, 349.66436767578125, 211.0272216796875, 760.6074829101562, 61.51233673095703, 840.9996337890625, 201.13534545898438, 307.2689208984375, 188.67410278320312, -897.7244262695312, -58.58198547363281, 686.96435546875, -676.5076904296875, 663.119140625, 396.4241943359375, 1005.4310302734375, 990.2914428710938, 897.1602172851562, -941.4209594726562, -404.7921447753906, 160.87078857421875, 252.20510864257812, 308.0601501464844, 194.392333984375, -395.95538330078125, 123.34164428710938, -322.51385498046875, 138.41799926757812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000576.npy"} +{"epoch": 0.8458149779735683, "step": 577, "batch_size": 64, "mean": 408.823486328125, "std": 662.3851318359375, "min": -1499.9072265625, "p10": -124.45094223022457, "median": 341.04017639160156, "p90": 1363.1252197265626, "max": 2462.689208984375, "pos_frac": 0.828125, "sample": [-1499.9072265625, 2462.689208984375, 1356.7520751953125, 1559.880615234375, 179.648681640625, 29.347213745117188, 340.6792297363281, 132.5297088623047, 160.446533203125, -65.44027709960938, 698.431396484375, 352.1156921386719, 587.1713256835938, 437.33221435546875, 445.475341796875, 765.396728515625, 1027.617919921875, 1365.8565673828125, -586.9407958984375, 624.9016723632812, 173.26051330566406, 85.99259185791016, -1034.5810546875, 229.9910888671875, 287.30413818359375, 793.3765869140625, 141.2322235107422, 233.82798767089844, 469.2237243652344, 408.5535888671875, 168.04281616210938, 479.2745361328125, 264.10052490234375, -92.78289031982422, 1814.8912353515625, 49.69488525390625, 431.77545166015625, -743.08056640625, 242.4704132080078, -395.5321044921875, 343.8830871582031, 183.57992553710938, 341.401123046875, 538.9924926757812, -16.06720733642578, 592.44921875, 62.62693786621094, 640.529052734375, 834.3929443359375, 1119.589599609375, 78.15556335449219, 327.3849182128906, 1467.4295654296875, -138.02296447753906, 1773.39501953125, 501.24151611328125, 477.42193603515625, -707.2640380859375, 98.20204162597656, 1599.4375, -39.19392395019531, 983.7018432617188, 387.1145935058594, 333.3045654296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000577.npy"} +{"epoch": 0.8472834067547724, "step": 578, "batch_size": 64, "mean": 450.94769287109375, "std": 584.2476196289062, "min": -898.1187744140625, "p10": -156.15890197753905, "median": 373.345703125, "p90": 1169.9358398437505, "max": 2203.907470703125, "pos_frac": 0.78125, "sample": [2203.907470703125, 224.67822265625, 788.593017578125, 101.55029296875, 552.2866821289062, 234.11822509765625, 1346.124267578125, 470.1529235839844, 215.86593627929688, -138.0391082763672, 139.40354919433594, 908.2766723632812, 488.7769775390625, 241.83987426757812, 93.82962036132812, -576.1928100585938, 109.28306579589844, -0.44077301025390625, 1908.08544921875, 834.89013671875, 690.3517456054688, 386.7955322265625, 1017.8804321289062, -141.1174774169922, -9.010719299316406, 880.1886596679688, 818.5576782226562, -110.07453155517578, -343.1359558105469, 126.83881378173828, -24.947982788085938, -380.3780517578125, 359.8958740234375, 416.1894226074219, 105.40516662597656, 540.2588500976562, -183.64404296875, 311.328369140625, -898.1187744140625, -149.0643310546875, 528.782958984375, 728.927978515625, 566.3228149414062, 454.4316711425781, 1341.2762451171875, 1015.5230102539062, 346.93975830078125, 73.72419738769531, 847.6297607421875, 185.64646911621094, 890.748779296875, 784.2390747070312, 59.67250442504883, 1226.5572509765625, 933.2139892578125, 1744.664306640625, -159.19943237304688, 248.90916442871094, 1492.7152099609375, 1037.8192138671875, 578.49267578125, -322.36346435546875, 128.9239044189453, 565.8656616210938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000578.npy"} +{"epoch": 0.8487518355359766, "step": 579, "batch_size": 64, "mean": 263.0865173339844, "std": 618.6154174804688, "min": -1730.3099365234375, "p10": -439.40496826171864, "median": 305.1524658203125, "p90": 1003.3020446777346, "max": 2289.741455078125, "pos_frac": 0.71875, "sample": [-61.601356506347656, -200.99229431152344, -1730.3099365234375, 564.20849609375, -71.4853515625, 117.84762573242188, 868.9215698242188, 364.8151550292969, 685.4552612304688, 406.6587219238281, 1.3073654174804688, -148.48240661621094, -751.1778564453125, 327.6142578125, 231.15347290039062, 662.558349609375, 211.6730194091797, 530.39404296875, 1130.892822265625, 212.16575622558594, 1027.2747802734375, 93.63800811767578, 322.37939453125, 507.95941162109375, 7.916738510131836, 467.7269287109375, 345.85888671875, 45.24414825439453, 576.9569091796875, 248.5844268798828, 562.0447998046875, -486.85748291015625, 609.7721557617188, 659.9560546875, 2289.741455078125, 1446.671630859375, 395.6932678222656, -789.060302734375, -50.48060989379883, 537.3689575195312, 1111.0233154296875, -270.3540954589844, 727.6241455078125, -328.68243408203125, 1231.8380126953125, 33.66960144042969, -38.17305374145508, -161.20753479003906, -567.6207275390625, 316.4254150390625, 947.3656616210938, 482.81463623046875, -92.65547180175781, 616.8956909179688, 354.6131896972656, -979.2010498046875, 1255.57568359375, 28.241331100463867, 293.8795166015625, -900.5361938476562, 173.7957000732422, -194.9984130859375, 283.93499755859375, 343.26336669921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000579.npy"} +{"epoch": 0.8502202643171806, "step": 580, "batch_size": 64, "mean": 508.994140625, "std": 712.4046020507812, "min": -1241.568359375, "p10": -97.60138778686523, "median": 360.2447052001953, "p90": 1314.2037231445313, "max": 3031.452392578125, "pos_frac": 0.796875, "sample": [-194.0856170654297, 1016.2464599609375, 246.75376892089844, 591.149169921875, -140.21246337890625, 96.85736846923828, 185.3096160888672, 2104.3583984375, 459.1296691894531, 301.8375549316406, 420.8721618652344, -48.41650390625, 3031.452392578125, 243.54226684570312, 65.29202270507812, 693.54833984375, 393.94287109375, 93.59941101074219, 856.256591796875, 501.5244140625, 282.4189453125, 1005.7210693359375, 1112.4453125, 496.5337829589844, 554.9393310546875, 11.128242492675781, 1200.0426025390625, 1297.8729248046875, 551.682373046875, 144.3387451171875, -1.5579605102539062, 150.3757781982422, 102.7138900756836, 134.57630920410156, 807.9853515625, 1321.20263671875, 34.735076904296875, -104.56339263916016, 837.35791015625, 805.8763427734375, 1958.7908935546875, 9.697675704956055, -1241.568359375, 1071.5869140625, -56.59552764892578, 769.5264282226562, -430.9908752441406, -95.3839111328125, 621.835205078125, 2805.66015625, 48.95439147949219, -66.60235595703125, 1487.9071044921875, 132.26382446289062, 422.915771484375, 391.3563232421875, -98.5517349243164, 329.1330871582031, -6.433874130249023, -294.5851135253906, 459.9810791015625, 984.9277954101562, 1396.93603515625, 310.08209228515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000580.npy"} +{"epoch": 0.8516886930983847, "step": 581, "batch_size": 64, "mean": 357.78289794921875, "std": 637.6695556640625, "min": -875.061279296875, "p10": -495.1543975830078, "median": 370.118896484375, "p90": 1024.33544921875, "max": 2957.865234375, "pos_frac": 0.734375, "sample": [144.10861206054688, 112.58351135253906, 165.84768676757812, -113.98545837402344, 389.7462158203125, 430.6314392089844, -527.4578247070312, 142.52471923828125, 1017.5311279296875, -621.5623779296875, 772.1749877929688, 800.8275146484375, 573.5271606445312, 363.26129150390625, 655.4034423828125, -461.9369201660156, 1445.89013671875, 902.6315307617188, 508.12762451171875, 1364.5706787109375, -204.1852264404297, 639.0792846679688, 83.1114501953125, 610.8177490234375, 618.5638427734375, 492.0337829589844, 330.25518798828125, 225.60577392578125, -25.623046875, 1027.2515869140625, -291.29461669921875, 2957.865234375, -55.550540924072266, 1549.6943359375, 261.02947998046875, 545.0087890625, 899.1407470703125, -115.87577819824219, 87.76459503173828, 452.195068359375, 377.884765625, 84.44656372070312, -51.11187744140625, -296.47479248046875, 1204.6810302734375, 269.23846435546875, 614.5096435546875, 890.5457153320312, 1386.0692138671875, 582.288818359375, -637.025634765625, -875.061279296875, 55.8504753112793, -500.6708068847656, 696.978271484375, 394.22943115234375, -557.9682006835938, -547.3848876953125, 45.13201904296875, -482.28277587890625, 307.9767150878906, 896.4732666015625, 376.97650146484375, 511.4715270996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000581.npy"} +{"epoch": 0.8531571218795888, "step": 582, "batch_size": 64, "mean": 402.342041015625, "std": 586.1719970703125, "min": -1099.01513671875, "p10": -236.53863067626952, "median": 305.73118591308594, "p90": 1183.6563720703125, "max": 1782.0697021484375, "pos_frac": 0.75, "sample": [-1099.01513671875, 205.26031494140625, 1282.9835205078125, 305.2232666015625, 562.2835693359375, -225.17633056640625, 285.48583984375, 462.66375732421875, 500.0703125, -71.91061401367188, 360.57781982421875, 85.25090789794922, -32.71086120605469, 768.04345703125, 924.1218872070312, 1507.737548828125, -60.64100646972656, -13.208480834960938, -302.31927490234375, 1250.587646484375, 409.37738037109375, 82.90248107910156, 621.5330200195312, -627.046875, -292.0535888671875, 945.9688720703125, 174.26150512695312, 1156.0068359375, 1557.019775390625, 1782.0697021484375, 882.975341796875, 351.0236511230469, 1156.5484619140625, 2.6747055053710938, -420.25244140625, 133.32553100585938, 22.488197326660156, 181.49488830566406, 669.4573974609375, 649.048828125, 362.3592834472656, -219.2238006591797, 948.7335815429688, 1056.8702392578125, -0.6829681396484375, 1110.462646484375, 2.7192001342773438, 766.8748168945312, -241.40818786621094, 254.4235382080078, -19.537303924560547, 772.654541015625, 1195.2740478515625, 300.83270263671875, -40.527130126953125, -977.2299194335938, 250.13272094726562, 306.2391052246094, 823.0663452148438, 655.8043823242188, 1260.57568359375, 221.49037170410156, 754.5632934570312, 71.29068756103516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000582.npy"} +{"epoch": 0.8546255506607929, "step": 583, "batch_size": 64, "mean": 243.46913146972656, "std": 683.2799072265625, "min": -1408.2015380859375, "p10": -679.816845703125, "median": 244.92453002929688, "p90": 979.7735351562501, "max": 2107.381103515625, "pos_frac": 0.734375, "sample": [77.47109985351562, 270.42901611328125, 265.1294250488281, -711.7911987304688, 949.075927734375, 22.340469360351562, 962.4983520507812, 267.5736389160156, 2107.381103515625, 147.34506225585938, -215.17144775390625, 37.77760314941406, -907.6898803710938, 331.1378173828125, 177.7437744140625, -251.85342407226562, 938.9865112304688, 939.4381103515625, 391.3907165527344, 1078.2335205078125, 510.82568359375, 66.3387222290039, -605.2100219726562, -398.21923828125, -131.01153564453125, -992.4427490234375, 371.8388671875, 204.4229736328125, -564.7503662109375, -819.5359497070312, -127.30577087402344, -183.97799682617188, 581.38671875, 595.8406982421875, 349.7041931152344, 2038.7728271484375, 408.1026611328125, 12.372749328613281, 987.1771850585938, 242.95651245117188, 5.71160888671875, 358.5417175292969, 375.1145935058594, -96.84085083007812, -1066.908447265625, -1408.2015380859375, 1641.59375, 441.95867919921875, -416.8911437988281, 775.46435546875, 943.0341796875, 1345.8773193359375, 143.18426513671875, 518.021728515625, 899.48828125, -713.9583129882812, 47.47846603393555, 483.0345153808594, 55.00946044921875, 246.89254760742188, 1253.7086181640625, 31.25933837890625, 36.61219024658203, 258.1060485839844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000583.npy"} +{"epoch": 0.856093979441997, "step": 584, "batch_size": 64, "mean": 499.4644775390625, "std": 611.7685546875, "min": -666.3199462890625, "p10": -150.92644424438473, "median": 343.244873046875, "p90": 1293.231433105469, "max": 2544.496337890625, "pos_frac": 0.8125, "sample": [563.158203125, 235.62718200683594, -172.04197692871094, 256.9716491699219, -5.535888671875, -372.85198974609375, 598.8975219726562, 512.101806640625, 1174.642333984375, 71.75172424316406, 25.39776611328125, -101.65686798095703, 114.27435302734375, 203.5418701171875, 398.58502197265625, 1124.7149658203125, -282.62884521484375, 1244.81103515625, 969.21728515625, -514.92724609375, 289.5560302734375, 592.6493530273438, -29.726272583007812, 265.56787109375, 375.09423828125, 282.1013488769531, 1341.23876953125, 214.23477172851562, 205.23861694335938, 250.334228515625, 1193.1351318359375, 534.8570556640625, 334.0116882324219, -666.3199462890625, 694.7509765625, 309.6708984375, 626.5694580078125, 515.5972900390625, 228.96676635742188, -98.86776733398438, 498.31829833984375, -235.25860595703125, 352.4780578613281, 1719.6922607421875, 884.162841796875, 308.24884033203125, 1119.63671875, 1220.21142578125, -252.32861328125, 2544.496337890625, 1937.7447509765625, 32.37528991699219, 1591.490478515625, 1313.9830322265625, 438.7679748535156, 1637.0509033203125, 251.0028533935547, 676.846435546875, -25.927452087402344, 284.9672546386719, 522.8021240234375, 375.49456787109375, 1128.91845703125, 137.8398895263672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000584.npy"} +{"epoch": 0.8575624082232012, "step": 585, "batch_size": 64, "mean": 450.8072204589844, "std": 753.0850830078125, "min": -1992.7523193359375, "p10": -357.9877807617187, "median": 445.75559997558594, "p90": 1356.4398681640625, "max": 2754.621826171875, "pos_frac": 0.75, "sample": [213.11740112304688, -136.3931121826172, 686.352294921875, 328.8426513671875, 302.143798828125, 2754.621826171875, 980.2073364257812, -620.2405395507812, 418.8590393066406, 287.466796875, 1099.0565185546875, -1.1809539794921875, 622.0863037109375, 666.1651611328125, 3.1285629272460938, 1567.4322509765625, 1504.949951171875, 613.27197265625, 1535.5020751953125, 540.9465942382812, 497.9163513183594, 1350.287353515625, 309.897705078125, 474.0883483886719, 1114.984619140625, -98.29493713378906, 785.5389404296875, 1069.155517578125, 672.9800415039062, 160.8409423828125, 68.23579406738281, 780.0758666992188, 1359.07666015625, 80.68115234375, 1104.5787353515625, 226.18634033203125, 968.0484619140625, -313.7217102050781, -44.08500671386719, 924.5231323242188, 398.26513671875, -1992.7523193359375, 920.1571655273438, -821.896484375, 1021.0156860351562, 491.75164794921875, 478.9012451171875, -500.4137878417969, 874.1608276367188, -229.13412475585938, -78.20574951171875, -1138.1396484375, -91.13499450683594, -376.9589538574219, 99.11122131347656, 1642.942626953125, 472.65216064453125, -551.6158447265625, 865.8162841796875, -32.870513916015625, 158.88487243652344, 152.46409606933594, 149.63307189941406, 2081.69873046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000585.npy"} +{"epoch": 0.8590308370044053, "step": 586, "batch_size": 64, "mean": 304.63677978515625, "std": 602.399658203125, "min": -1237.7825927734375, "p10": -380.4061401367187, "median": 349.5049133300781, "p90": 1015.1013610839847, "max": 1641.10986328125, "pos_frac": 0.703125, "sample": [1181.952392578125, -221.9962158203125, -305.142578125, 533.248046875, 590.166259765625, 423.43267822265625, -184.202880859375, 346.2283935546875, 278.5252380371094, -8.683082580566406, -621.9930419921875, 359.7737121582031, 815.7147827148438, 1472.900146484375, -67.2400131225586, -141.0023956298828, 646.9573364257812, -816.455078125, 271.0582275390625, 226.67063903808594, -150.3094024658203, 150.56192016601562, 1072.0009765625, 756.5150146484375, 938.5375366210938, 16.738174438476562, 661.7586059570312, -257.35675048828125, 77.50408172607422, 381.7283630371094, 731.654296875, -168.38961791992188, 345.32415771484375, -1237.7825927734375, 728.653564453125, 793.160400390625, 1641.10986328125, -399.540771484375, 581.6145629882812, -154.1529541015625, 924.828369140625, -442.170166015625, 479.4656982421875, 422.764404296875, -1108.6553955078125, -831.3367919921875, 386.6792297363281, 409.75543212890625, 142.64488220214844, 579.3419799804688, -335.7586669921875, 383.63787841796875, 1047.9144287109375, 1510.977294921875, 867.0413208007812, 247.1974334716797, 424.1094970703125, 1625.746337890625, 110.81392669677734, -108.80884552001953, 230.162353515625, 740.7211303710938, 352.78143310546875, 147.65963745117188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000586.npy"} +{"epoch": 0.8604992657856094, "step": 587, "batch_size": 64, "mean": 406.54833984375, "std": 578.6703491210938, "min": -1460.68017578125, "p10": -183.46811523437498, "median": 372.3928527832031, "p90": 1300.7094238281256, "max": 2010.009765625, "pos_frac": 0.765625, "sample": [567.5638427734375, 950.0826416015625, 425.83062744140625, 81.2901611328125, 328.1465148925781, 459.1860656738281, 593.2048950195312, -77.91825866699219, -0.8299636840820312, 1501.7686767578125, -386.6271057128906, -111.43856811523438, 2010.009765625, 340.6881408691406, 240.87411499023438, 486.789306640625, -30.185546875, 394.60638427734375, 417.8821105957031, 394.449462890625, -155.11988830566406, 588.54638671875, 1461.383056640625, 19.06804084777832, 350.33624267578125, 1169.643310546875, -195.6173553466797, 996.8670043945312, 686.443603515625, -10.329360961914062, 128.7100830078125, 60.07640838623047, -561.4851684570312, 55.119293212890625, 707.9661254882812, 119.65458679199219, 245.97552490234375, -1460.68017578125, 919.851318359375, 135.76539611816406, -329.0933837890625, 827.9232177734375, 149.7838134765625, 756.9219970703125, -47.95127868652344, 1369.42431640625, 652.7261962890625, -246.13414001464844, 214.38613891601562, 328.51690673828125, 462.63531494140625, 1558.060791015625, 442.2733154296875, 655.113525390625, 518.769775390625, 590.7099609375, 948.235595703125, -200.99234008789062, 324.44403076171875, 20.553325653076172, 412.4192810058594, -107.846435546875, 1513.7862548828125, 1356.880615234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000587.npy"} +{"epoch": 0.8619676945668135, "step": 588, "batch_size": 64, "mean": 351.92852783203125, "std": 518.3449096679688, "min": -914.514404296875, "p10": -258.3384338378906, "median": 315.6816864013672, "p90": 1065.5239013671876, "max": 1454.796142578125, "pos_frac": 0.765625, "sample": [138.97315979003906, 553.7627563476562, -212.28634643554688, 146.01580810546875, 43.98252868652344, 662.8480224609375, 717.4118041992188, 27.108030319213867, 758.360107421875, 743.812255859375, 128.60003662109375, -409.5875244140625, -33.87554931640625, 554.7478637695312, -278.0750427246094, -419.2943420410156, 827.7308349609375, 534.465087890625, 226.09097290039062, 259.1846618652344, 659.0474853515625, -152.03005981445312, -173.4718475341797, 1218.134765625, 132.32728576660156, 530.5897827148438, 1351.6451416015625, -105.2264404296875, -484.28662109375, 34.941307067871094, 288.4974670410156, 90.85227966308594, 289.51043701171875, 1076.666748046875, 951.8074340820312, 500.15631103515625, 171.06361389160156, -201.70913696289062, 1294.9337158203125, 1039.52392578125, 1279.7083740234375, 680.1704711914062, 40.572662353515625, -570.7753295898438, 671.4804077148438, 427.0176086425781, 178.7655487060547, 986.5147705078125, -914.514404296875, 376.6046142578125, -17.45046615600586, 1454.796142578125, 1190.750244140625, 416.513427734375, 464.578857421875, 599.627685546875, 646.204345703125, 374.209716796875, -176.6820526123047, 182.62945556640625, 9.196006774902344, 341.8529357910156, 906.736328125, -508.03179931640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000588.npy"} +{"epoch": 0.8634361233480177, "step": 589, "batch_size": 64, "mean": 469.1645202636719, "std": 575.0010986328125, "min": -784.8447265625, "p10": -229.50870056152337, "median": 413.39173889160156, "p90": 1344.613232421875, "max": 2031.087646484375, "pos_frac": 0.84375, "sample": [1587.1064453125, 342.0052795410156, 394.5111083984375, 56.10856246948242, 686.4464111328125, 148.59767150878906, -83.53669738769531, 630.5589599609375, 409.49969482421875, -364.36395263671875, -22.198867797851562, 417.2837829589844, 84.2511215209961, 277.05035400390625, 692.458251953125, 1338.8165283203125, 2031.087646484375, 127.70600128173828, 563.5803833007812, 199.97702026367188, 465.8485107421875, 229.51806640625, 167.58534240722656, 1478.5291748046875, 56.65826416015625, 561.2425537109375, 391.1791076660156, 723.9389038085938, 1796.57666015625, -166.51397705078125, 589.7932739257812, 645.163330078125, 849.9027709960938, 522.343505859375, 1161.83984375, -256.5064392089844, -318.94317626953125, 127.43280792236328, 763.8359985351562, 142.98495483398438, 526.4652709960938, -592.1400756835938, 88.49636840820312, 781.8953857421875, 619.3592529296875, 549.1571044921875, 1118.78369140625, 260.03973388671875, 595.863525390625, 1459.7198486328125, 589.838623046875, 237.07601928710938, 372.2135009765625, -784.8447265625, 346.552490234375, -648.7750854492188, 603.7786865234375, 1403.12744140625, 32.58363342285156, 303.4878234863281, 1347.0975341796875, 703.9378051757812, -442.7946472167969, 1106.25341796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000589.npy"} +{"epoch": 0.8649045521292217, "step": 590, "batch_size": 64, "mean": 457.81378173828125, "std": 602.9258422851562, "min": -484.70989990234375, "p10": -263.5567901611328, "median": 347.11749267578125, "p90": 1251.3738769531253, "max": 2191.18359375, "pos_frac": 0.75, "sample": [579.984130859375, 1629.9434814453125, 801.1719970703125, 271.22021484375, 1022.7948608398438, -363.55670166015625, 155.24151611328125, 2191.18359375, 982.7383422851562, -255.37167358398438, 608.1804809570312, -305.82611083984375, 368.9388427734375, 1464.4859619140625, -140.08074951171875, -74.48664855957031, -246.17971801757812, 125.82959747314453, 818.6785888671875, 229.25015258789062, 74.6275634765625, -440.31915283203125, -137.34188842773438, 307.54852294921875, -105.87652587890625, 180.84783935546875, 823.414306640625, -316.1607666015625, 1350.0955810546875, -125.98865509033203, 1086.7169189453125, 483.2808837890625, 37.853355407714844, 1452.11181640625, 206.42977905273438, 394.7126159667969, 337.18414306640625, 678.828857421875, 1190.860107421875, 571.0563354492188, 550.0891723632812, -267.064697265625, -126.57736206054688, -484.70989990234375, 357.05084228515625, 67.20826721191406, 178.0884552001953, 957.2777709960938, 268.90887451171875, -404.40802001953125, 440.6595764160156, 1277.308349609375, 289.32012939453125, -112.83019256591797, 622.7159423828125, 699.1708374023438, 907.859375, 2017.36083984375, 127.8267593383789, 1034.9212646484375, 1088.6885986328125, 924.1041259765625, 243.00465393066406, 730.0874633789062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000590.npy"} +{"epoch": 0.8663729809104258, "step": 591, "batch_size": 64, "mean": 415.48028564453125, "std": 583.6268310546875, "min": -1075.7181396484375, "p10": -363.5605529785156, "median": 402.725830078125, "p90": 1173.308435058594, "max": 1701.44580078125, "pos_frac": 0.828125, "sample": [-16.49425506591797, 1188.302734375, 1433.83154296875, 649.0939331054688, 1000.1446533203125, -81.32273864746094, 20.467878341674805, -1075.7181396484375, 499.56634521484375, 1701.44580078125, 990.9373779296875, 147.7728271484375, 618.699462890625, 382.2509765625, 54.01971435546875, 1700.1197509765625, 801.9869995117188, 631.1743774414062, 125.5343017578125, 284.82537841796875, -108.87001037597656, 981.4696655273438, 189.30775451660156, 79.1557846069336, 571.7203979492188, 157.87814331054688, -705.1505737304688, 589.111572265625, 617.73291015625, 395.33380126953125, -316.45269775390625, 147.7841796875, 410.11785888671875, -639.0439453125, 1147.236083984375, 216.7899627685547, 178.79043579101562, 244.5757598876953, 690.4317626953125, -383.7496337890625, -387.9815673828125, 815.1867065429688, 186.1210174560547, 410.2183532714844, -596.975830078125, 1024.5177001953125, 854.092529296875, 95.07806396484375, 657.1354370117188, 1053.0689697265625, 13.256431579589844, 1184.4822998046875, 1013.4503784179688, 114.59112548828125, 1298.674560546875, 474.4648132324219, 539.7994384765625, 1325.061279296875, -803.3141479492188, 655.3806762695312, 142.5496063232422, 443.63262939453125, 318.32147216796875, 239.1168212890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000591.npy"} +{"epoch": 0.8678414096916299, "step": 592, "batch_size": 64, "mean": 380.2485656738281, "std": 752.1707153320312, "min": -1387.4378662109375, "p10": -557.9162475585937, "median": 378.18873596191406, "p90": 1309.4586547851563, "max": 2099.388427734375, "pos_frac": 0.734375, "sample": [382.5086669921875, 1266.1505126953125, -103.38174438476562, 433.04327392578125, -910.9908447265625, 169.2821807861328, 534.9540405273438, 1078.5299072265625, 1721.553466796875, 1656.3973388671875, -928.4586791992188, 2099.388427734375, 258.3094177246094, 822.1322021484375, -1032.359619140625, -500.4353942871094, -171.061767578125, 338.4832458496094, 151.8822021484375, 1339.5516357421875, 107.07062530517578, 1160.201904296875, -26.036949157714844, 1184.53173828125, 373.8688049316406, 114.75247192382812, 163.32666015625, 1035.6337890625, 1581.03955078125, 602.8519897460938, 69.61062622070312, -330.2681884765625, 175.23748779296875, 812.211669921875, 1226.2874755859375, 303.2969055175781, -580.1508178710938, 363.9496154785156, -506.03558349609375, 636.846435546875, 488.58929443359375, -245.86495971679688, 735.3248901367188, 1722.44189453125, 362.18035888671875, -330.1060485839844, 485.1437072753906, -1387.4378662109375, 912.9346923828125, 412.8334655761719, 723.65380859375, 75.08088684082031, 829.8455200195312, 583.4725341796875, -1035.4212646484375, 927.5072631835938, 668.71533203125, 134.86514282226562, 1328.019287109375, -307.6230163574219, -1289.490478515625, 788.5136108398438, 701.3567504882812, -22.33172035217285], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000592.npy"} +{"epoch": 0.869309838472834, "step": 593, "batch_size": 64, "mean": 297.4552917480469, "std": 664.4849243164062, "min": -1103.30029296875, "p10": -497.43423461914057, "median": 196.69361877441406, "p90": 1316.5947387695314, "max": 1865.866943359375, "pos_frac": 0.625, "sample": [-20.138214111328125, 727.4019775390625, -265.80767822265625, 104.26922607421875, -739.5697631835938, -461.13519287109375, 1516.1279296875, -709.7437133789062, 1274.203125, 609.12744140625, -192.2010498046875, 1634.489501953125, 1417.1824951171875, 1093.631591796875, -3.9741287231445312, -826.2723388671875, 1334.7625732421875, 1634.6600341796875, 697.2091674804688, 130.9490203857422, -512.990966796875, 178.9853515625, 376.40771484375, 78.62076568603516, 70.95799255371094, 1865.866943359375, 517.8385009765625, 111.0916519165039, 418.6234130859375, -883.310546875, -2.6647891998291016, 482.7345886230469, -360.16534423828125, 214.40188598632812, 710.1287841796875, 826.3355102539062, -125.27365112304688, 1338.669189453125, -270.3389892578125, 443.0506896972656, -831.3797607421875, 611.3961791992188, -326.2820129394531, -1103.30029296875, -27.161741256713867, -101.9794921875, -34.171180725097656, 9.581993103027344, 468.3648986816406, 859.6824340820312, 504.33685302734375, 293.0105285644531, 933.7244873046875, 481.6983337402344, 334.31658935546875, 948.9910278320312, -21.772430419921875, 43.72895050048828, -236.8037109375, 679.403076171875, -83.07427978515625, 558.03759765625, -179.50259399414062, 822.1521606445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000593.npy"} +{"epoch": 0.8707782672540382, "step": 594, "batch_size": 64, "mean": 371.8956298828125, "std": 623.448486328125, "min": -974.0704345703125, "p10": -385.85698547363273, "median": 313.3977355957031, "p90": 1221.362060546875, "max": 1812.449951171875, "pos_frac": 0.765625, "sample": [1002.8441162109375, 4.014984130859375, 164.70155334472656, 17.198640823364258, -53.909019470214844, -723.9934692382812, 617.9157104492188, 419.98504638671875, 60.983985900878906, 773.695556640625, 545.1692504882812, 370.99517822265625, 1812.449951171875, 721.9539184570312, 135.73397827148438, 938.2003173828125, 767.44580078125, 169.4950714111328, 1187.7459716796875, -455.1393127441406, -557.39599609375, 1438.1549072265625, -974.0704345703125, 1656.0015869140625, -292.9410400390625, 572.1614379882812, -124.6108627319336, 341.9762268066406, 450.1231994628906, -194.80313110351562, 74.39039611816406, 462.094970703125, 1371.0206298828125, 565.2611694335938, 291.78680419921875, 623.9727783203125, 665.0636596679688, 28.187015533447266, 271.7283630371094, -689.9246826171875, 9.64272689819336, 992.7852172851562, 163.7098846435547, -174.69094848632812, 1227.8524169921875, 971.5731201171875, 894.9848022460938, 554.630126953125, -412.6347961425781, 450.7922668457031, -908.5673828125, 39.68914794921875, 290.84307861328125, -196.278564453125, 335.0086669921875, -44.37247848510742, 1206.2178955078125, 951.745361328125, 1609.0576171875, 258.3054504394531, 1310.49169921875, -323.37542724609375, 83.23272705078125, 55.01158905029297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000594.npy"} +{"epoch": 0.8722466960352423, "step": 595, "batch_size": 64, "mean": 430.8812255859375, "std": 606.3203125, "min": -480.0907897949219, "p10": -175.6789123535156, "median": 301.1611022949219, "p90": 1058.609069824219, "max": 2760.617919921875, "pos_frac": 0.78125, "sample": [913.6915283203125, 298.28033447265625, 662.6008911132812, 32.405616760253906, -86.63907623291016, 1073.4254150390625, 364.74200439453125, 1004.5958862304688, -295.99981689453125, 416.44879150390625, 1591.318359375, 89.45323181152344, 1213.175048828125, 990.69189453125, -165.6244659423828, 343.492919921875, 358.1253356933594, 2017.94970703125, 192.23243713378906, -110.81887817382812, 93.68704223632812, -186.11672973632812, 641.5166015625, 378.7160949707031, -260.70086669921875, -283.4208679199219, 266.4686584472656, 106.63495635986328, 280.0841064453125, 304.0418701171875, 163.3475341796875, 249.24420166015625, 296.52484130859375, 293.65277099609375, 980.4697265625, 6.899370193481445, 326.5123596191406, 652.3740844726562, 1606.3985595703125, 327.1707763671875, -42.07594299316406, -480.0907897949219, 484.3351135253906, -363.6684875488281, 421.0334167480469, 407.7482604980469, 285.7259521484375, -179.9879608154297, 760.5025024414062, 688.32958984375, -18.79633331298828, 775.7764282226562, 130.19003295898438, -128.95848083496094, 413.99462890625, 98.13871765136719, 1024.03759765625, 99.26129150390625, -156.47576904296875, 64.14163208007812, 2003.48388671875, 536.4407958984375, 845.6439208984375, 2760.617919921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000595.npy"} +{"epoch": 0.8737151248164464, "step": 596, "batch_size": 64, "mean": 296.531982421875, "std": 639.2484741210938, "min": -1199.8231201171875, "p10": -509.6370330810546, "median": 197.4710235595703, "p90": 1241.371496582032, "max": 2122.455810546875, "pos_frac": 0.75, "sample": [-389.6156005859375, 54.44451904296875, 742.429443359375, 55.449005126953125, 918.7175903320312, 599.2418823242188, 225.28146362304688, 535.8988647460938, 36.308441162109375, 547.4609375, 2122.455810546875, -156.03225708007812, -595.012939453125, 468.5503234863281, 1035.5928955078125, 1486.5318603515625, 191.21913146972656, 154.1024627685547, 498.8258972167969, 647.95849609375, -105.04449462890625, 542.84423828125, 97.88078308105469, 236.87379455566406, -553.8137817382812, 162.0457305908203, 486.2066955566406, 425.4256591796875, 286.4503173828125, 134.525634765625, -366.056884765625, 111.4446029663086, -556.5528564453125, 1575.8951416015625, 184.2645721435547, 1063.444091796875, 1572.4534912109375, 528.3950805664062, 26.600095748901367, -113.8269271850586, 192.92706298828125, 506.81829833984375, 137.8470458984375, 390.6556396484375, -225.82208251953125, 218.00743103027344, 1317.6260986328125, 227.92283630371094, -337.3137512207031, -204.31971740722656, 1808.992431640625, 46.67890167236328, -1199.8231201171875, 202.01498413085938, 330.9480285644531, -723.5433959960938, 145.04811096191406, -611.1227416992188, 710.4508666992188, 151.30831909179688, 1581.246826171875, -406.5579528808594, 409.0942077636719, -610.3011474609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000596.npy"} +{"epoch": 0.8751835535976505, "step": 597, "batch_size": 64, "mean": 525.302001953125, "std": 724.3052978515625, "min": -1204.923095703125, "p10": -172.5231124877929, "median": 414.04791259765625, "p90": 1512.2902587890626, "max": 2501.442626953125, "pos_frac": 0.796875, "sample": [966.4365234375, 573.4845581054688, 1265.0224609375, 373.8397216796875, 508.27056884765625, 1559.1986083984375, -196.1330108642578, 1032.41259765625, 1561.6256103515625, -389.39581298828125, -111.51802825927734, -117.433349609375, 414.9586486816406, 78.41775512695312, 780.2568359375, 80.35184478759766, 1133.048095703125, 125.04414367675781, 378.4199523925781, 1822.4185791015625, 457.2953796386719, 509.7615966796875, 4.390289306640625, 1142.47021484375, 168.8879852294922, 318.2503356933594, 34.30111312866211, 285.90338134765625, 117.74589538574219, -965.0467529296875, -1204.923095703125, -9.364805221557617, 1308.19384765625, 511.566650390625, 575.55322265625, 2412.745849609375, 1516.830322265625, -52.32494354248047, -463.3232421875, 96.56536102294922, 791.739013671875, 1332.74853515625, 57.054847717285156, 272.7529296875, 413.1371765136719, -109.05435943603516, 607.9625854492188, 976.7196044921875, 64.68247985839844, 1496.6680908203125, 155.90997314453125, -329.695556640625, -233.63726806640625, 2501.442626953125, 1983.2513427734375, 122.05961608886719, 608.85009765625, 481.54071044921875, 766.5855712890625, 705.9178466796875, 103.07585144042969, 1501.69677734375, -60.013755798339844, 803.7318725585938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000597.npy"} +{"epoch": 0.8766519823788547, "step": 598, "batch_size": 64, "mean": 409.35821533203125, "std": 617.6261596679688, "min": -1175.1072998046875, "p10": -182.05966033935545, "median": 414.1699981689453, "p90": 1222.2988525390626, "max": 2262.76611328125, "pos_frac": 0.78125, "sample": [434.648193359375, 730.37841796875, 351.42059326171875, 1088.439453125, -1175.1072998046875, 393.6918029785156, 1281.2110595703125, 705.3099365234375, 203.2728271484375, 1233.086181640625, 34.919342041015625, -689.5211181640625, -842.1827392578125, 201.52027893066406, 1406.989501953125, -188.9965057373047, 737.696533203125, 730.09716796875, 1352.497802734375, 1817.219482421875, 260.61474609375, 1197.12841796875, 746.419677734375, -376.88079833984375, 314.056396484375, 469.1202087402344, 596.45947265625, 350.18994140625, -1085.27099609375, 171.0281524658203, 813.39404296875, 600.3900146484375, 486.649658203125, -123.97632598876953, -495.84442138671875, 533.2462158203125, 591.789306640625, 1268.5751953125, 195.8501434326172, 311.64141845703125, 151.37240600585938, 203.53875732421875, -110.88167572021484, 490.0589294433594, -86.70106506347656, 709.79150390625, 445.4171142578125, 480.3994140625, 19.99317741394043, 143.05703735351562, 347.3411560058594, 2262.76611328125, 856.5125732421875, 818.0038452148438, 554.8267822265625, 285.3620910644531, -34.224609375, 1005.1473388671875, 586.847900390625, -165.87368774414062, 16.936080932617188, -158.1639404296875, 772.19873046875, -25.971904754638672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000598.npy"} +{"epoch": 0.8781204111600588, "step": 599, "batch_size": 64, "mean": 336.27593994140625, "std": 532.787109375, "min": -2081.221435546875, "p10": -142.37838287353514, "median": 302.56146240234375, "p90": 965.0511779785157, "max": 1603.183837890625, "pos_frac": 0.828125, "sample": [417.4681701660156, 919.774658203125, 316.16412353515625, 440.19775390625, 773.0431518554688, 85.16119384765625, 1603.183837890625, 273.67730712890625, 204.75347900390625, 384.1563720703125, 454.93328857421875, -676.8206787109375, 6.187583923339844, 66.906982421875, -10.355875015258789, 435.73046875, 947.519775390625, 972.5646362304688, 597.316650390625, 1286.0948486328125, 191.85638427734375, 611.3746337890625, 743.8101806640625, 435.6235656738281, 305.66363525390625, 35.60454559326172, 157.01043701171875, 275.81219482421875, 240.06349182128906, -2081.221435546875, 592.2234497070312, 210.11376953125, 457.5487060546875, 175.24375915527344, 1143.5592041015625, 709.9571533203125, 585.9273071289062, -94.6710205078125, 1182.6639404296875, 277.4142761230469, 476.08306884765625, 170.95468139648438, 902.3406982421875, -313.6812744140625, -149.36793518066406, 393.6284484863281, 599.6883544921875, 251.45693969726562, 349.14154052734375, 145.88052368164062, 1200.29736328125, 255.05995178222656, 73.51624298095703, 13.379402160644531, -279.8493347167969, -17.908695220947266, 299.45928955078125, 395.85986328125, -392.92010498046875, 1224.30615234375, -126.06942749023438, -395.76434326171875, 262.731689453125, 530.2007446289062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000599.npy"} +{"epoch": 0.8795888399412628, "step": 600, "batch_size": 64, "mean": 322.5917053222656, "std": 639.6307983398438, "min": -1193.86962890625, "p10": -549.4893432617187, "median": 215.4433135986328, "p90": 1126.8391967773441, "max": 2400.23388671875, "pos_frac": 0.734375, "sample": [135.47434997558594, 687.0761108398438, 541.3002319335938, -1193.86962890625, 833.4791870117188, 595.2230224609375, -66.5247802734375, 572.556396484375, -609.2149047851562, 521.9600830078125, 44.459373474121094, 15.51229476928711, 991.3297119140625, -218.87063598632812, 303.6201477050781, 643.6861572265625, -730.3926391601562, 1451.62939453125, 74.1117172241211, 226.05316162109375, 885.121826171875, 1000.822265625, -125.93379211425781, 244.7935791015625, -128.5579071044922, -371.2462158203125, 740.9886474609375, 1188.47021484375, 1203.314453125, 837.5615844726562, 202.9136199951172, -39.844696044921875, 940.8614501953125, 116.05481719970703, 1029.9576416015625, 662.437744140625, 75.16893005371094, 250.10911560058594, -491.07232666015625, 1510.45068359375, 2400.23388671875, 220.4351043701172, -104.40977478027344, 41.11510467529297, 1499.272216796875, 163.66189575195312, 854.8316650390625, 66.79798889160156, -783.5137329101562, 580.1322021484375, 38.96467590332031, 1168.35986328125, 263.92706298828125, -53.68152618408203, 210.45152282714844, 785.3490600585938, -586.739013671875, 149.99606323242188, 53.40373992919922, -574.5252075195312, -615.1173706054688, 433.7799072265625, -232.23297119140625, 114.40550994873047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000600.npy"} +{"epoch": 0.8810572687224669, "step": 601, "batch_size": 64, "mean": 418.25567626953125, "std": 568.9058837890625, "min": -1191.1351318359375, "p10": -221.99286193847655, "median": 374.5825653076172, "p90": 1139.1808715820312, "max": 1765.8509521484375, "pos_frac": 0.78125, "sample": [671.59375, 1281.527587890625, 1765.8509521484375, 741.2603149414062, -96.02263641357422, 1052.073974609375, 358.1443176269531, 78.30150604248047, -229.464599609375, 277.40826416015625, -232.7823486328125, -164.4888153076172, 1446.2017822265625, 448.49041748046875, 307.8819580078125, 707.6680297851562, 288.0916748046875, 640.750244140625, -204.55880737304688, 1321.76318359375, 819.6793823242188, 624.5112915039062, 903.9767456054688, -73.83409118652344, 560.70361328125, 236.8575897216797, -64.23040771484375, 623.3846435546875, 779.7518310546875, 258.2381896972656, 401.5075988769531, 380.81976318359375, -987.8260498046875, 561.3034057617188, -437.9187316894531, 510.66033935546875, 154.630859375, -104.48533630371094, 164.61135864257812, 276.0287170410156, 692.3489379882812, 788.6314086914062, 161.3798065185547, -1191.1351318359375, 1138.2972412109375, -569.83984375, 45.139923095703125, 1535.1544189453125, 617.4120483398438, -333.0291748046875, 934.7467651367188, 995.0650634765625, 147.43260192871094, 368.3453674316406, 199.9873809814453, 982.8124389648438, 250.9314727783203, 1416.8741455078125, 1139.5595703125, 442.133544921875, -73.61075592041016, 244.54940795898438, 499.3411865234375, 287.77313232421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000601.npy"} +{"epoch": 0.882525697503671, "step": 602, "batch_size": 64, "mean": 520.9512329101562, "std": 591.3538818359375, "min": -488.1795349121094, "p10": -92.27585296630858, "median": 463.5677032470703, "p90": 1179.5487915039064, "max": 2620.80517578125, "pos_frac": 0.84375, "sample": [500.32220458984375, 140.75335693359375, -139.8981475830078, 362.07904052734375, 26.953222274780273, 165.08251953125, 1130.280029296875, 775.9503173828125, 139.6809844970703, 265.3568115234375, 459.68670654296875, 24.232070922851562, 704.32861328125, 1042.2142333984375, 1184.515380859375, 67.46477508544922, 576.8162841796875, 471.7528076171875, 467.4486999511719, 1381.40087890625, -18.45416259765625, 219.21499633789062, -35.67913818359375, -488.1795349121094, 1205.9500732421875, 719.87744140625, 215.0755615234375, 474.1361999511719, 89.75079345703125, -101.17353820800781, 68.96898651123047, 1050.34765625, 830.5051879882812, 135.9702606201172, 909.7427978515625, 1261.6199951171875, 200.43911743164062, 558.8171997070312, -462.2250671386719, -372.5536804199219, 874.8013916015625, 1135.5042724609375, 722.2955932617188, 977.192626953125, 800.2081298828125, 911.166015625, 2347.884521484375, -121.49922180175781, 1167.9600830078125, -446.570068359375, 975.455078125, 826.3601684570312, 218.53604125976562, 148.12057495117188, 1217.0396728515625, -71.51458740234375, 355.9849853515625, 842.103515625, 66.29122924804688, 109.156982421875, 856.2572631835938, 2620.80517578125, 442.3966369628906, 156.36949157714844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000602.npy"} +{"epoch": 0.8839941262848752, "step": 603, "batch_size": 64, "mean": 476.6319580078125, "std": 713.6810302734375, "min": -933.7374267578125, "p10": -358.6540313720703, "median": 347.86158752441406, "p90": 1336.114685058594, "max": 2939.37353515625, "pos_frac": 0.71875, "sample": [-372.59442138671875, -368.4373474121094, 290.03253173828125, 43.57881164550781, 1360.765625, -18.93170166015625, 1244.279541015625, 409.71588134765625, 845.6983642578125, 1223.220947265625, -47.82269287109375, 13.721275329589844, -649.5523071289062, 840.3675537109375, 48.72671127319336, 471.9910583496094, 43.772735595703125, 1526.980224609375, 2097.894287109375, 548.2803344726562, 1065.858642578125, 881.859619140625, 727.8919677734375, 54.8470344543457, 310.12188720703125, 293.6404724121094, 504.63873291015625, 137.30206298828125, -62.98586654663086, 249.8387451171875, 1668.75341796875, -933.7374267578125, 1813.3056640625, -35.211578369140625, -7.3138275146484375, 376.6043701171875, 1042.037353515625, -260.375732421875, 690.9315795898438, 560.8919067382812, 940.9564208984375, -476.13177490234375, -96.93754577636719, -23.33310317993164, -111.75582122802734, 562.8208618164062, 741.6082153320312, 147.3205108642578, -404.64068603515625, 435.5464172363281, 524.0803833007812, 1278.5958251953125, 909.5142822265625, -335.8262939453125, 173.0848388671875, -480.2884521484375, 2939.37353515625, 319.1188049316406, 1832.298095703125, 65.2618408203125, 866.77001953125, 1198.6912841796875, 907.7311401367188, -40.001094818115234], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000603.npy"} +{"epoch": 0.8854625550660793, "step": 604, "batch_size": 64, "mean": 520.3671264648438, "std": 720.8078002929688, "min": -930.552490234375, "p10": -384.58124694824215, "median": 485.39451599121094, "p90": 1659.5297119140625, "max": 2363.778076171875, "pos_frac": 0.765625, "sample": [-427.1708068847656, 176.5966796875, -262.69549560546875, -127.27989196777344, 664.6011962890625, 1216.7825927734375, 995.2723999023438, 1166.2142333984375, 1013.9371948242188, 2270.4091796875, -930.552490234375, 972.4999389648438, 20.633499145507812, -365.8623352050781, 919.9478759765625, 1112.69091796875, 1002.40185546875, 72.47340393066406, 382.4281311035156, 1081.344482421875, 127.3431396484375, -570.8677978515625, 89.64051818847656, 1659.61181640625, 578.9154052734375, -583.5293579101562, 1844.6319580078125, -260.220703125, 162.02084350585938, 213.7130126953125, 4.445960998535156, 429.7565002441406, 548.0020751953125, 486.5464782714844, -173.42312622070312, 1693.695556640625, 700.2042846679688, 9.203559875488281, -392.6036376953125, 880.887939453125, 611.5413208007812, 484.2425537109375, 525.5892944335938, 327.2900390625, 453.3553771972656, 2363.778076171875, -558.2999267578125, 1659.338134765625, 496.49395751953125, -8.754257202148438, 633.3161010742188, 1082.1954345703125, 863.9539794921875, -103.38154602050781, 1998.4991455078125, 577.2406616210938, -426.8048095703125, 68.81759643554688, 406.9909973144531, 821.49609375, -25.2694091796875, 560.9685668945312, 1689.152099609375, 399.1003112792969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000604.npy"} +{"epoch": 0.8869309838472834, "step": 605, "batch_size": 64, "mean": 367.9199523925781, "std": 634.3158569335938, "min": -1187.9111328125, "p10": -259.9810256958008, "median": 275.68202209472656, "p90": 1064.5187744140628, "max": 2790.83837890625, "pos_frac": 0.734375, "sample": [-55.58750915527344, -330.15826416015625, -23.21026611328125, 237.161376953125, -22.861061096191406, 117.20832824707031, 893.0145874023438, 698.1514282226562, 863.3004760742188, 218.8819580078125, 636.1070556640625, -13.881439208984375, 134.3330078125, -271.7872314453125, 451.5123291015625, 452.7802429199219, 76.23851776123047, 828.7680053710938, 1641.60400390625, -208.9877471923828, 2790.83837890625, 81.06735229492188, 625.178955078125, -42.281524658203125, 247.9619598388672, 913.6767578125, -207.02484130859375, -279.9072570800781, 338.63946533203125, 287.6654357910156, 72.2292709350586, -232.43321228027344, 973.0360717773438, -525.236083984375, -202.61956787109375, 429.6639099121094, 1220.927001953125, 88.92892456054688, 251.17333984375, 750.0302734375, 117.17658996582031, 2254.11181640625, -761.3442993164062, -1187.9111328125, 482.2379150390625, 429.61663818359375, -349.4312744140625, 1169.8153076171875, 97.29004669189453, 26.375749588012695, 567.1300659179688, 432.21063232421875, 1237.7237548828125, 307.1230773925781, 263.6986083984375, 413.0634765625, 426.7955017089844, 1098.111083984375, 402.30670166015625, 207.6807403564453, 986.13671875, 398.3114318847656, -179.6864013671875, 804.231689453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000605.npy"} +{"epoch": 0.8883994126284875, "step": 606, "batch_size": 64, "mean": 404.90985107421875, "std": 586.7932739257812, "min": -600.3906860351562, "p10": -353.4496917724609, "median": 279.02503967285156, "p90": 1099.4193359375004, "max": 2182.249267578125, "pos_frac": 0.765625, "sample": [-484.748779296875, 95.58297729492188, 88.36006927490234, -376.7647399902344, 372.8034362792969, 947.4661865234375, 173.8441162109375, 207.38653564453125, 1199.50048828125, 870.6892700195312, 172.21571350097656, 2182.249267578125, 916.0228271484375, -35.29766845703125, -539.974365234375, 233.74847412109375, 483.2320556640625, 228.1494140625, 229.74148559570312, 1257.449951171875, 801.2794189453125, -452.05364990234375, 1650.3551025390625, 20.10742950439453, 959.6241455078125, 687.7584228515625, 634.972412109375, 148.17091369628906, -43.41413116455078, -299.04791259765625, 602.1585083007812, 699.5557861328125, -471.3493347167969, 273.6064758300781, -583.7025146484375, 334.94989013671875, 212.6385498046875, 700.3101196289062, -90.66397857666016, -111.1376953125, -600.3906860351562, -208.073974609375, 2003.71142578125, 1149.2744140625, 272.5089416503906, 632.7400512695312, 109.74201202392578, 603.6397705078125, 825.8863525390625, 321.887939453125, -44.61613464355469, 910.7755737304688, -43.33625793457031, 247.73780822753906, 983.0908203125, 1506.8621826171875, 284.443603515625, 744.2421264648438, 39.34215545654297, 235.43324279785156, 615.75048828125, 542.7113647460938, 558.76220703125, 326.3302307128906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000606.npy"} +{"epoch": 0.8898678414096917, "step": 607, "batch_size": 64, "mean": 338.9346923828125, "std": 596.3048706054688, "min": -1236.029052734375, "p10": -251.68978576660152, "median": 266.0603942871094, "p90": 1032.819451904297, "max": 2500.816650390625, "pos_frac": 0.796875, "sample": [428.60687255859375, -134.45631408691406, 546.4468994140625, 220.6238250732422, 141.2738037109375, 168.2749481201172, 1139.3720703125, 363.66400146484375, 3.4769287109375, 1294.715576171875, -332.20263671875, 704.3478393554688, 448.5653381347656, -273.4363708496094, 119.42459106445312, 1045.46484375, -679.3264770507812, 220.874755859375, 407.7979736328125, 56.58856964111328, -171.52377319335938, 62.09870910644531, 255.32363891601562, 508.04522705078125, -474.56103515625, 622.3477172851562, -177.17047119140625, 1537.1552734375, 745.78662109375, 114.18177795410156, 635.947265625, 105.74530792236328, 744.2297973632812, 266.4591979980469, 500.28753662109375, 241.3419189453125, 1003.3135375976562, 100.3448486328125, 1568.3248291015625, 375.8935852050781, -1183.3292236328125, 577.7437133789062, -200.94775390625, 519.6885375976562, 666.0999755859375, 89.6719970703125, -21.691307067871094, 132.28314208984375, -1236.029052734375, 226.47064208984375, 559.6724853515625, 330.2271423339844, -125.15756225585938, 607.30615234375, 248.10964965820312, 82.53321838378906, 296.2589111328125, 2500.816650390625, 1470.5286865234375, 750.2803955078125, 265.6615905761719, -388.4674072265625, 591.8068237304688, 478.6148986816406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000607.npy"} +{"epoch": 0.8913362701908958, "step": 608, "batch_size": 64, "mean": 402.94073486328125, "std": 499.6820983886719, "min": -477.8456726074219, "p10": -269.561116027832, "median": 407.9303436279297, "p90": 944.007470703125, "max": 1744.6102294921875, "pos_frac": 0.796875, "sample": [819.776611328125, 890.191650390625, 362.9637756347656, 541.9396362304688, -423.0966491699219, 1535.814697265625, -394.9998474121094, 141.4844512939453, -128.67237854003906, 581.030517578125, 779.8092651367188, 26.857038497924805, 35.648643493652344, 669.6912841796875, -149.55752563476562, 185.37741088867188, -477.8456726074219, 440.8563232421875, -113.38922882080078, 402.9112243652344, 463.259765625, 645.9862670898438, 87.14883422851562, 795.0262451171875, 30.577613830566406, 412.949462890625, 634.8687744140625, 464.3271789550781, 950.3967895507812, -206.9696807861328, 88.01519775390625, -43.38185119628906, 248.5540313720703, 284.3572082519531, 765.4428100585938, 288.46588134765625, 459.26739501953125, 421.7127685546875, 1281.5859375, -319.808837890625, 1744.6102294921875, 212.99676513671875, 642.6412963867188, 977.8585205078125, 1173.7689208984375, 287.60418701171875, 81.68205261230469, 772.8355712890625, 33.685020446777344, -296.3860168457031, 660.9269409179688, -44.34050750732422, 556.57177734375, 837.3677368164062, -302.9256286621094, 929.0990600585938, -407.3540344238281, 188.75735473632812, 1711.618408203125, 815.99755859375, 740.15869140625, 502.02655029296875, 287.2670593261719, 203.16619873046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000608.npy"} +{"epoch": 0.8928046989720999, "step": 609, "batch_size": 64, "mean": 521.3295288085938, "std": 809.88720703125, "min": -1169.81103515625, "p10": -280.30283813476547, "median": 432.9218292236328, "p90": 1239.2265136718754, "max": 3999.588623046875, "pos_frac": 0.78125, "sample": [93.02261352539062, 19.72583770751953, 104.27204132080078, -675.450927734375, 220.23960876464844, 430.49932861328125, 1276.546630859375, 501.53021240234375, 524.9671020507812, -416.76812744140625, 810.1005249023438, 1793.1171875, 101.42762756347656, 435.3443298339844, 846.0361938476562, -1169.81103515625, -90.90322875976562, 585.501708984375, 1014.2498168945312, 395.1195983886719, 653.6143798828125, -129.15167236328125, 802.2449340820312, 737.5819702148438, -345.0819091796875, 596.6757202148438, 1358.213623046875, 221.79067993164062, -453.1690979003906, 732.794189453125, 1680.8662109375, 677.9880981445312, 2487.624267578125, 551.8040161132812, 2966.319580078125, 1071.374755859375, -8.658515930175781, 737.2392578125, -117.03325653076172, 327.0299987792969, 285.3126525878906, 189.1880645751953, 924.8106689453125, 3999.588623046875, 785.5396728515625, 86.94097900390625, 473.8468322753906, -73.10186767578125, -712.7416381835938, 167.62789916992188, 331.6024169921875, 267.2863464355469, 1019.1045532226562, -3.0261077880859375, -5.661460876464844, 1086.7308349609375, 326.7247619628906, 668.006103515625, 646.409912109375, 1152.146240234375, 267.02667236328125, 484.44305419921875, -378.8048095703125, 27.257160186767578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000609.npy"} +{"epoch": 0.8942731277533039, "step": 610, "batch_size": 64, "mean": 333.0104064941406, "std": 600.1758422851562, "min": -1849.3968505859375, "p10": -394.11669006347654, "median": 281.0553741455078, "p90": 983.702728271485, "max": 1722.3153076171875, "pos_frac": 0.78125, "sample": [714.070556640625, 688.9822998046875, 157.93385314941406, 222.7867889404297, -435.1560974121094, 665.67138671875, 78.85118103027344, 592.9786376953125, 651.5932006835938, -537.7868041992188, 47.98188018798828, 592.16650390625, 12.273384094238281, 1436.1549072265625, 697.680908203125, 396.58197021484375, 245.79965209960938, 184.42405700683594, -581.6665649414062, -1849.3968505859375, 455.73834228515625, 1722.3153076171875, 156.73648071289062, 1050.7489013671875, -209.9169921875, 373.06793212890625, 186.17626953125, -407.0516357421875, 780.5504760742188, 775.458984375, 1616.119873046875, -124.26689147949219, 4.7980804443359375, 725.9033813476562, 274.85760498046875, 1593.674560546875, -363.9351501464844, 203.86227416992188, 316.53131103515625, 747.9182739257812, 739.6847534179688, 403.20098876953125, -58.33723068237305, 835.1178588867188, 163.03622436523438, 242.73797607421875, 1047.3819580078125, 182.17745971679688, -113.59048461914062, 347.47265625, 38.43028259277344, 628.2402954101562, 235.83140563964844, -113.35258483886719, 709.5830688476562, 145.99923706054688, 297.5500793457031, 343.12139892578125, -242.6074981689453, 287.2531433105469, -486.5267639160156, -602.178466796875, 793.8942260742188, 1627.3330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000610.npy"} +{"epoch": 0.895741556534508, "step": 611, "batch_size": 64, "mean": 554.140625, "std": 763.8386840820312, "min": -786.7107543945312, "p10": -239.65504150390623, "median": 399.22869873046875, "p90": 1523.6409423828125, "max": 2668.87060546875, "pos_frac": 0.734375, "sample": [1525.4188232421875, 390.7445983886719, 2581.93115234375, 1642.5289306640625, 1158.6343994140625, 594.5086059570312, -141.6663818359375, 1391.4298095703125, 34.601829528808594, 229.68826293945312, 290.0304260253906, 761.3945922851562, 1.6521110534667969, -33.46720504760742, -701.956787109375, -216.09205627441406, -364.19049072265625, -786.7107543945312, 300.8815002441406, 1473.6083984375, -714.2831420898438, 1261.1005859375, 2207.80615234375, -463.9226379394531, 1442.1722412109375, 639.1682739257812, 23.361183166503906, 1002.5064086914062, 626.7261352539062, 175.79649353027344, -98.80377197265625, 1248.7327880859375, 403.98291015625, -67.9283218383789, 449.0753173828125, 1179.6004638671875, 90.23444366455078, 190.3165283203125, -186.19778442382812, 322.5921325683594, 988.0368041992188, 356.7293701171875, -12.376571655273438, 1871.0550537109375, 419.87078857421875, 549.6290893554688, -295.2957458496094, 514.1040649414062, 1088.9730224609375, -167.80361938476562, 638.363525390625, -249.7534637451172, 229.092529296875, 394.4744873046875, 774.131591796875, 508.92791748046875, 365.0600891113281, 1751.953125, 462.90576171875, 1519.4925537109375, 1253.2861328125, -7.432960510253906, 2668.87060546875, -22.30167007446289], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000611.npy"} +{"epoch": 0.8972099853157122, "step": 612, "batch_size": 64, "mean": 431.320556640625, "std": 594.1006469726562, "min": -868.7947387695312, "p10": -318.5953247070312, "median": 328.0908203125, "p90": 1112.9020507812502, "max": 2518.395751953125, "pos_frac": 0.8125, "sample": [911.7256469726562, 267.8548889160156, 327.2645568847656, -353.50799560546875, -110.12548828125, 31.310022354125977, 1130.79833984375, 457.8030700683594, 495.17840576171875, 468.30047607421875, -571.054443359375, 759.784423828125, 163.81491088867188, 1071.14404296875, 904.8618774414062, 1043.764404296875, 580.512451171875, 223.001953125, 545.763671875, 1256.61376953125, 817.3521728515625, 118.66609954833984, -868.7947387695312, 1328.170166015625, 673.2094116210938, -355.331787109375, 814.773193359375, -337.3909912109375, 750.4061889648438, 746.045654296875, 516.5617065429688, -797.8690185546875, 126.05513000488281, 326.0648498535156, 85.22760009765625, -356.4324951171875, 656.941162109375, 328.9170837402344, 13.8677978515625, -182.2503662109375, 141.3498992919922, 98.32820129394531, 129.7471923828125, 213.2491455078125, 2518.395751953125, 24.580265045166016, 786.3544921875, 1063.3408203125, 1248.11572265625, 308.11669921875, 186.7034454345703, 755.27734375, 1558.15625, 136.25469970703125, 218.7201690673828, -250.0572967529297, 536.751953125, -0.5015716552734375, 999.68505859375, -274.73876953125, 1302.0333251953125, 752.7196655273438, 1049.7313232421875, 93.203857421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000612.npy"} +{"epoch": 0.8986784140969163, "step": 613, "batch_size": 64, "mean": 499.3277587890625, "std": 623.697509765625, "min": -1011.3561401367188, "p10": -81.68381195068355, "median": 486.0186462402344, "p90": 1344.8997436523437, "max": 2462.74658203125, "pos_frac": 0.828125, "sample": [595.8714599609375, 1372.7147216796875, 1344.6817626953125, -113.37712097167969, 1064.345703125, 1095.8016357421875, 1.2447090148925781, 1155.51318359375, 925.3953857421875, 25.545236587524414, 1378.9954833984375, 195.51284790039062, 639.5338745117188, 225.48699951171875, 1642.5797119140625, -383.100830078125, 19.693161010742188, 428.73681640625, 544.5381469726562, -102.53425598144531, 386.83880615234375, 77.50096893310547, 718.5016479492188, 675.1475830078125, 1344.9931640625, 296.319091796875, 1118.869873046875, 999.5044555664062, -33.03277587890625, 582.5667724609375, 759.7566528320312, -14.870269775390625, -29.05694007873535, 570.0272827148438, 418.2054443359375, 1846.83544921875, 595.36767578125, 706.3955688476562, 2462.74658203125, 2.6581764221191406, -1011.3561401367188, 1073.348388671875, 679.1871337890625, 797.068115234375, 625.5228271484375, -922.1611328125, 252.6424102783203, 1495.6400146484375, -4.149433135986328, 236.6533203125, 732.8839111328125, -262.0196533203125, -449.1506042480469, 154.3463134765625, 1000.2081298828125, 547.627685546875, 123.91232299804688, 381.4653625488281, 50.10527801513672, 543.3004760742188, 6.6096649169921875, 101.30426788330078, 168.11935424804688, 93.413818359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000613.npy"} +{"epoch": 0.9001468428781204, "step": 614, "batch_size": 64, "mean": 501.2961120605469, "std": 695.879638671875, "min": -796.3143920898438, "p10": -256.1655822753906, "median": 457.98634338378906, "p90": 1403.3476684570314, "max": 2801.981201171875, "pos_frac": 0.765625, "sample": [216.21546936035156, 443.17376708984375, -567.7572021484375, 941.951416015625, 17.380813598632812, 1345.6256103515625, 11.762699127197266, 439.7187805175781, 500.2539978027344, 27.98886489868164, 1286.1949462890625, 376.14825439453125, 472.7989196777344, 670.2913208007812, 284.68792724609375, -168.74398803710938, 1433.611328125, 739.2037353515625, 234.09584045410156, -85.17994689941406, 1097.9444580078125, -19.32647705078125, 735.0130004882812, 995.0682373046875, -612.0902709960938, -109.30736541748047, 489.4674072265625, -48.979034423828125, 598.70703125, -276.7989501953125, 676.6826171875, 1727.843017578125, -208.02105712890625, 754.7717895507812, -615.3294677734375, -198.8167724609375, 576.6434936523438, -716.9462890625, -9.53260612487793, 760.9938354492188, 1554.1031494140625, 390.0250549316406, 1650.0819091796875, 305.3302307128906, 1075.1629638671875, 1119.7366943359375, 1341.8038330078125, 255.09051513671875, 637.2894287109375, 721.0975341796875, 47.376243591308594, 298.05303955078125, 1428.085693359375, 102.62356567382812, 675.698974609375, 2801.981201171875, 1968.8291015625, 135.42572021484375, -620.0708618164062, -796.3143920898438, 1201.4600830078125, 176.18780517578125, 579.16796875, 817.3173828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000614.npy"} +{"epoch": 0.9016152716593245, "step": 615, "batch_size": 64, "mean": 418.7528076171875, "std": 674.4854736328125, "min": -1963.6441650390625, "p10": -213.19897918701167, "median": 277.1340560913086, "p90": 1150.6625732421876, "max": 2699.863037109375, "pos_frac": 0.828125, "sample": [188.10446166992188, 772.4423828125, 142.48483276367188, 1550.8291015625, 214.3922576904297, 1206.440673828125, 3.2144317626953125, 63.82294464111328, 38.8819580078125, 99.22909545898438, 932.135986328125, 230.66786193847656, 249.17994689941406, -387.70599365234375, 746.0855102539062, -462.1055603027344, 976.6962280273438, -1963.6441650390625, 305.0881652832031, 403.9862060546875, 309.61431884765625, 160.26568603515625, 1969.8819580078125, 170.4088134765625, 72.7427978515625, 1069.2220458984375, -160.21737670898438, 404.344482421875, 554.2149047851562, 122.04757690429688, 379.2308044433594, 949.4727172851562, 665.2960205078125, 1017.5142822265625, 1305.8355712890625, 91.6279067993164, 137.3979949951172, 127.5146484375, -402.4844970703125, -142.31854248046875, -781.340087890625, 1153.473388671875, 1020.5928344726562, 60.61479187011719, 1602.116455078125, 637.0205078125, 600.5809936523438, 1144.10400390625, 153.81109619140625, 488.253173828125, 938.2761840820312, -235.90538024902344, 206.3754425048828, -48.357879638671875, 783.692626953125, -413.4339904785156, 241.72557067871094, 413.0291748046875, 527.7166137695312, 801.501953125, -155.4610137939453, 2699.863037109375, 713.821044921875, 136.26986694335938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000615.npy"} +{"epoch": 0.9030837004405287, "step": 616, "batch_size": 64, "mean": 337.53411865234375, "std": 579.8196411132812, "min": -770.0923461914062, "p10": -366.8234558105469, "median": 274.6616973876953, "p90": 1159.575134277344, "max": 1769.67724609375, "pos_frac": 0.75, "sample": [925.9771728515625, 97.7803955078125, 142.26055908203125, 715.3290405273438, 429.6864318847656, 74.71878814697266, 194.4796142578125, -241.59725952148438, 371.8368225097656, 1181.5498046875, 1379.8482666015625, 568.56494140625, 137.91513061523438, -24.949050903320312, -739.0947875976562, 1230.8114013671875, 443.0518493652344, 276.9259338378906, 151.4061279296875, -90.25594329833984, 1468.3145751953125, 552.0303955078125, 412.8052673339844, 419.7682800292969, -352.0876159667969, -682.6547241210938, 524.7672729492188, 235.9271240234375, -33.746604919433594, 1185.80810546875, 1769.67724609375, 181.34483337402344, -465.49298095703125, 7.568931579589844, 106.68162536621094, -723.2190551757812, -164.76165771484375, 233.43435668945312, 907.1620483398438, 706.389404296875, 361.39422607421875, 440.196533203125, 496.69085693359375, 683.5886840820312, -770.0923461914062, 1057.6409912109375, 185.3123321533203, -371.32464599609375, 154.04949951171875, 777.646484375, 281.1212158203125, -335.6106262207031, 1687.8543701171875, 387.5260009765625, 733.5042724609375, 153.38116455078125, 272.3974609375, 186.40699768066406, -269.912841796875, 582.7266235351562, 1108.3009033203125, -356.3206787109375, 1073.7581787109375, -434.0137634277344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000616.npy"} +{"epoch": 0.9045521292217328, "step": 617, "batch_size": 64, "mean": 239.94285583496094, "std": 593.8347778320312, "min": -1188.8873291015625, "p10": -428.7472015380859, "median": 270.5278625488281, "p90": 811.3543212890626, "max": 2192.191162109375, "pos_frac": 0.75, "sample": [293.32940673828125, 139.21722412109375, -182.9322509765625, 326.70892333984375, 473.8049011230469, -301.91668701171875, 23.48626136779785, -754.785400390625, 751.2687377929688, 135.29164123535156, -377.5600280761719, 342.9927062988281, 388.6640930175781, 396.6442565917969, 167.9300994873047, 233.41500854492188, 438.91510009765625, 277.81256103515625, 62.94081115722656, 1452.4696044921875, 800.927978515625, 613.2978515625, 122.97557830810547, 10.970693588256836, 509.2720031738281, 429.4144592285156, 79.4405746459961, -220.3648223876953, 349.8083190917969, -1186.23974609375, 14.106172561645508, 366.87396240234375, -347.73712158203125, -388.8343200683594, 667.9911499023438, 300.6581115722656, 1090.1978759765625, 815.82275390625, 651.0496826171875, 435.0594787597656, 544.6835327148438, -467.89910888671875, 879.01953125, 627.5187377929688, 1888.8565673828125, 489.76885986328125, 730.9656982421875, -612.470947265625, 193.17538452148438, 537.5767211914062, -1188.8873291015625, 138.00221252441406, 39.66002655029297, 263.2431640625, 229.94577026367188, -222.5355224609375, 909.55908203125, -166.774658203125, -61.00670623779297, -445.85272216796875, 2192.191162109375, 293.43878173828125, -866.829345703125, 28.606109619140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000617.npy"} +{"epoch": 0.9060205580029369, "step": 618, "batch_size": 64, "mean": 522.0931396484375, "std": 859.965576171875, "min": -1865.09423828125, "p10": -467.8816772460937, "median": 438.67327880859375, "p90": 1315.1710571289063, "max": 3081.5859375, "pos_frac": 0.78125, "sample": [-1865.09423828125, 847.1588134765625, 196.14300537109375, 234.25820922851562, 84.9234390258789, 285.325439453125, 119.01495361328125, 63.3082389831543, 2354.255859375, 914.8034057617188, 324.01824951171875, -929.33544921875, 814.7453002929688, 203.9821319580078, 1298.3829345703125, 319.0231018066406, -384.8946533203125, 1288.48681640625, 759.9171752929688, -225.72396850585938, -140.525390625, -485.7867736816406, -543.7452392578125, 94.26139831542969, -851.721435546875, -738.249267578125, 158.41647338867188, 1360.5914306640625, 1041.022216796875, 164.3868408203125, 630.728759765625, 459.15728759765625, 706.7562255859375, 1023.3305053710938, 1049.00341796875, -890.5966186523438, -426.1031188964844, 1292.5936279296875, 344.1512756347656, 443.62188720703125, 791.5653686523438, 1322.365966796875, 1110.1822509765625, 2603.708740234375, 912.5101928710938, -153.6243896484375, 407.17010498046875, 639.312744140625, 360.7909851074219, 3081.5859375, 778.900146484375, -58.58252716064453, 1498.3802490234375, 1282.0123291015625, 896.7545166015625, 516.5953979492188, -197.8241424560547, 1145.679443359375, 2627.537353515625, 20.359954833984375, 433.72467041015625, 1066.6004638671875, 599.49267578125, 334.7686767578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000618.npy"} +{"epoch": 0.9074889867841409, "step": 619, "batch_size": 64, "mean": 398.9811706542969, "std": 834.60595703125, "min": -1095.140625, "p10": -556.2883239746093, "median": 290.05577087402344, "p90": 1469.076916503907, "max": 3282.93603515625, "pos_frac": 0.671875, "sample": [-1095.140625, -663.901123046875, 22.66162109375, 1178.6705322265625, 1527.2100830078125, -199.81576538085938, 321.2942810058594, 180.07827758789062, 361.7569580078125, -490.0054016113281, 625.0774536132812, 746.763427734375, 478.9906005859375, 1893.166748046875, -787.457275390625, 227.0975341796875, 1219.500732421875, 415.960205078125, 1157.687255859375, 1154.5982666015625, -789.8546142578125, -870.0192260742188, 1297.9991455078125, 1333.432861328125, -1.3442535400390625, -504.872802734375, -117.79195404052734, 601.06689453125, 686.6219482421875, -92.90411376953125, 64.63296508789062, 535.643310546875, -444.25201416015625, 81.0118408203125, 1543.576416015625, -370.1181945800781, -45.6982421875, -301.6939697265625, 791.3712768554688, 646.9489135742188, -41.54561996459961, 1884.970458984375, 538.5584106445312, 575.0850219726562, 974.7704467773438, 76.28180694580078, -173.72689819335938, 2553.746337890625, 1602.05712890625, 302.12567138671875, 49.09345245361328, 227.82113647460938, 488.3317565917969, 3282.93603515625, -1077.0941162109375, -578.3235473632812, 632.4259643554688, -55.272212982177734, 1212.667236328125, -26.84079360961914, 325.1689758300781, 148.5106201171875, 17.11311912536621, 277.9858703613281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000619.npy"} +{"epoch": 0.908957415565345, "step": 620, "batch_size": 64, "mean": 384.1690979003906, "std": 627.1543579101562, "min": -1122.696044921875, "p10": -285.59993591308586, "median": 359.23028564453125, "p90": 1386.3005859375, "max": 1967.8665771484375, "pos_frac": 0.734375, "sample": [477.3862609863281, -385.8997802734375, 440.29620361328125, 621.9178466796875, 493.71173095703125, 820.3494873046875, 349.38653564453125, 287.51287841796875, -77.4636001586914, 1455.893798828125, -309.4596862792969, 369.07403564453125, -229.92718505859375, 1373.0933837890625, 443.4527587890625, 138.14149475097656, 1087.7156982421875, 475.9966735839844, -80.56423950195312, 1391.9608154296875, 1967.8665771484375, 248.85214233398438, 427.17889404296875, 1072.7626953125, -506.5599365234375, 223.58181762695312, 644.8219604492188, 297.6244812011719, 169.1242218017578, 78.82807922363281, -44.21843719482422, 1808.453369140625, 428.31109619140625, 1740.4892578125, -50.20166015625, 411.05499267578125, -84.6379165649414, 585.6414184570312, -10.453617095947266, 440.25079345703125, 373.01715087890625, 471.793212890625, -174.78228759765625, -107.25108337402344, 1392.2977294921875, 764.8474731445312, 649.4126586914062, -1122.696044921875, 117.2663345336914, 226.95932006835938, -203.18853759765625, -1038.9647216796875, 1787.5755615234375, 327.1000061035156, 238.42637634277344, 282.7973327636719, 449.9628601074219, 146.82791137695312, -551.0612182617188, 428.40380859375, 249.0794219970703, 954.16796875, -501.7701110839844, 435.25567626953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000620.npy"} +{"epoch": 0.9104258443465492, "step": 621, "batch_size": 64, "mean": 572.7069091796875, "std": 756.7515258789062, "min": -1748.376220703125, "p10": -269.45154571533203, "median": 563.6435852050781, "p90": 1477.1267578125005, "max": 3164.935302734375, "pos_frac": 0.828125, "sample": [623.1110229492188, -426.0539245605469, -801.473876953125, -242.6554718017578, 1792.7908935546875, -2.8609848022460938, 114.52112579345703, 677.7642211914062, 665.8230590820312, 1064.7994384765625, 904.5460815429688, 369.6446533203125, 311.15264892578125, 895.49072265625, 291.53033447265625, -454.28997802734375, -333.6242370605469, 773.9581298828125, -1748.376220703125, 198.86566162109375, 1572.5716552734375, 212.0455322265625, 1059.9310302734375, 542.7478637695312, 1234.7691650390625, 3164.935302734375, 52.823829650878906, 1158.7489013671875, 1168.78125, -124.35153198242188, 1350.6361083984375, 816.8148193359375, 947.3701782226562, 240.915771484375, 922.5971069335938, 1631.2918701171875, 176.26248168945312, 772.067626953125, 446.0304260253906, 666.97607421875, -125.94126892089844, 1322.7373046875, -280.9355773925781, 584.539306640625, 246.01992797851562, 174.59832763671875, 1531.3370361328125, 398.78948974609375, 1287.455810546875, 1182.5814208984375, -1009.870361328125, 364.046630859375, 90.0514144897461, 218.80096435546875, 300.15203857421875, 689.0447998046875, 923.4025268554688, 1579.202880859375, 250.37045288085938, 306.39910888671875, 1988.59326171875, 1016.70654296875, 772.030517578125, 154.5006561279297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000621.npy"} +{"epoch": 0.9118942731277533, "step": 622, "batch_size": 64, "mean": 399.3359375, "std": 663.733642578125, "min": -1246.098876953125, "p10": -267.0106506347655, "median": 362.2281799316406, "p90": 911.7734497070313, "max": 2924.44384765625, "pos_frac": 0.828125, "sample": [675.3327026367188, 346.96051025390625, -302.2549743652344, 409.87933349609375, 546.2750244140625, 416.5594482421875, -707.7006225585938, 363.3353576660156, 2559.319091796875, 571.0640258789062, 49.932289123535156, -407.1533203125, 71.96321105957031, -342.23455810546875, -33.703155517578125, 569.2879638671875, 275.22918701171875, 406.70953369140625, 26.874847412109375, 269.1766357421875, 1212.3487548828125, -83.7718505859375, 487.6043701171875, 2924.44384765625, 836.5841674804688, 25.270172119140625, 619.8302001953125, 737.4373779296875, 194.31068420410156, -1073.88720703125, 203.66525268554688, 9.8953857421875, 915.5039672851562, 94.81300354003906, 284.3341064453125, 903.0689086914062, 493.14404296875, -530.3544311523438, 1107.2642822265625, 574.6143798828125, 738.003173828125, 874.6792602539062, 40.336097717285156, 722.6273193359375, 135.83572387695312, -84.8055419921875, 361.1210021972656, -184.77389526367188, 54.72825622558594, 316.9295959472656, 1594.9239501953125, 525.8355712890625, 863.6937255859375, 664.0606079101562, 596.1787109375, 751.1932983398438, -1246.098876953125, 9.710960388183594, 118.35353088378906, 1511.1446533203125, 654.9046630859375, 529.0932006835938, 12.166858673095703, 296.6885986328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000622.npy"} +{"epoch": 0.9133627019089574, "step": 623, "batch_size": 64, "mean": 311.66925048828125, "std": 640.7202758789062, "min": -1172.4656982421875, "p10": -280.43679809570307, "median": 237.2112274169922, "p90": 1100.873547363282, "max": 2775.971923828125, "pos_frac": 0.703125, "sample": [25.43157958984375, 261.75, -819.1581420898438, 1336.2108154296875, -153.33319091796875, -26.072826385498047, 713.6436157226562, 49.266265869140625, 845.1464233398438, 57.883544921875, 1418.178955078125, 357.61920166015625, 645.8124389648438, 155.2283172607422, 675.7540283203125, -420.98394775390625, 785.0005493164062, 369.03045654296875, -296.19915771484375, 29.22412109375, -185.17100524902344, 451.58575439453125, 1281.3470458984375, -243.657958984375, -1172.4656982421875, 162.4383544921875, 367.49530029296875, -740.9794921875, 476.77520751953125, 659.4801635742188, -214.25973510742188, 460.0461120605469, -151.73150634765625, -114.28763580322266, -200.00326538085938, 121.48786926269531, 961.5341796875, 715.343505859375, 962.3336181640625, 227.77676391601562, 67.74000549316406, -795.44775390625, 377.6593322753906, 1443.4512939453125, -111.10787963867188, -66.77485656738281, 2775.971923828125, 501.73846435546875, -48.26810836791992, -444.62799072265625, 1160.247802734375, 574.4215087890625, 376.51275634765625, 343.658203125, 257.9915466308594, 1787.955810546875, 65.079833984375, 62.472015380859375, 244.10092163085938, 53.5365104675293, 534.0621337890625, 230.321533203125, 747.9805297851562, -26.36358642578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000623.npy"} +{"epoch": 0.9148311306901615, "step": 624, "batch_size": 64, "mean": 483.58160400390625, "std": 579.7620239257812, "min": -1172.9410400390625, "p10": -198.94970092773437, "median": 510.4460906982422, "p90": 1187.2959228515624, "max": 1482.1240234375, "pos_frac": 0.78125, "sample": [672.7286987304688, -387.46990966796875, 728.7674560546875, 459.5064697265625, 515.615478515625, 1093.67529296875, 1230.4114990234375, 1482.1240234375, 1436.434326171875, 235.6290283203125, 187.48898315429688, -25.75653076171875, 115.0950698852539, 1189.6171875, 872.8785400390625, -163.3377685546875, 600.0139770507812, 759.6087036132812, 1181.879638671875, -996.1476440429688, 49.78889465332031, 261.967529296875, 63.94336700439453, 852.7775268554688, 415.2632751464844, 239.97988891601562, 949.7926025390625, 863.8801879882812, 912.63232421875, -197.96823120117188, 196.6807403564453, 1100.623291015625, -199.37033081054688, 302.4217834472656, -388.764404296875, 983.4791870117188, 502.51318359375, -74.55816650390625, 1136.1839599609375, 343.6361083984375, -1172.9410400390625, 78.99236297607422, -135.6595458984375, 725.7664794921875, 951.58837890625, 1087.000244140625, 249.2366485595703, 846.9041137695312, 1051.2274169921875, 505.2767028808594, 223.2837677001953, -390.89697265625, -7.34547233581543, 1211.1976318359375, 570.182373046875, 708.1384887695312, -299.4093933105469, 649.3170776367188, -108.36347961425781, 707.3765869140625, 1322.47119140625, 212.51422119140625, 1388.7724609375, 1070.9288330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000624.npy"} +{"epoch": 0.9162995594713657, "step": 625, "batch_size": 64, "mean": 539.6091918945312, "std": 577.7627563476562, "min": -434.39691162109375, "p10": -16.041033935546857, "median": 465.8817443847656, "p90": 1251.9223388671876, "max": 2341.5224609375, "pos_frac": 0.890625, "sample": [71.11055755615234, 140.59298706054688, -297.9781799316406, 989.3743286132812, 319.00750732421875, 37.750244140625, 806.90625, 1089.44970703125, 1054.592041015625, 994.6070556640625, 54.6202392578125, 651.8072509765625, 393.30877685546875, 32.58256530761719, 2054.17919921875, 589.4769897460938, 265.5694580078125, 87.71363830566406, 2341.5224609375, 496.65521240234375, 2039.4598388671875, 231.7821807861328, 78.15128326416016, 711.9287719726562, 247.43460083007812, 870.2069091796875, 1875.54052734375, -23.225021362304688, 209.8772735595703, 321.2113037109375, 461.5960693359375, 717.5985107421875, 498.8661804199219, -434.39691162109375, 511.57183837890625, -128.55960083007812, 85.42662811279297, 622.7261962890625, 317.7729187011719, 727.5388793945312, 1263.4097900390625, 465.21478271484375, 1371.3480224609375, 473.59234619140625, 119.81733703613281, 466.5487060546875, -65.77546691894531, 472.21734619140625, 688.5493774414062, 820.4675903320312, 45.15558624267578, 0.7216033935546875, 944.6011962890625, 354.6180419921875, 142.75453186035156, 174.47406005859375, 164.1864776611328, 534.0769653320312, 1225.1182861328125, -380.3801574707031, 775.606689453125, -79.85647583007812, 1423.286376953125, 1019.8782348632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000625.npy"} +{"epoch": 0.9177679882525698, "step": 626, "batch_size": 64, "mean": 298.3277282714844, "std": 678.9322509765625, "min": -2664.941650390625, "p10": -397.04429931640607, "median": 242.66095733642578, "p90": 976.7689941406252, "max": 2073.150390625, "pos_frac": 0.71875, "sample": [627.7662353515625, 25.53814697265625, 485.04736328125, 391.73681640625, 110.56057739257812, 250.5478973388672, 704.2348022460938, 234.77401733398438, 994.0769653320312, -2664.941650390625, 1637.600830078125, 358.3998718261719, 1525.8275146484375, 2073.150390625, -646.5287475585938, 127.9399185180664, 229.69561767578125, -467.5592956542969, -9.066558837890625, -177.3040771484375, 45.31951904296875, 214.9246063232422, 630.1045532226562, 119.63738250732422, 23.537981033325195, -0.8350982666015625, 153.35791015625, -613.5183715820312, 529.112548828125, 680.2452392578125, -107.87757873535156, 651.0907592773438, -563.1884765625, -606.0101928710938, 558.3601684570312, 1318.60693359375, 834.9629516601562, 402.8717346191406, 394.4072265625, 746.6980590820312, 575.6775512695312, -232.50930786132812, 936.3837280273438, 496.91748046875, 1729.4339599609375, 775.7609252929688, -125.94493865966797, 60.061302185058594, 136.0506591796875, 429.34014892578125, -46.97344970703125, 835.19482421875, 296.325439453125, 1306.5394287109375, -16.401718139648438, 200.85606384277344, 704.5594482421875, 360.418212890625, -750.93359375, -112.07621765136719, 120.19566345214844, -98.8725814819336, 301.76959228515625, -12.103069305419922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000626.npy"} +{"epoch": 0.9192364170337739, "step": 627, "batch_size": 64, "mean": 590.864013671875, "std": 884.6433715820312, "min": -766.2708129882812, "p10": -192.88841247558594, "median": 410.2700958251953, "p90": 1515.036596679688, "max": 4423.763671875, "pos_frac": 0.75, "sample": [631.8800048828125, 250.62615966796875, 1151.755859375, 426.1650085449219, -184.30899047851562, -113.71784973144531, 122.96558380126953, 636.04345703125, 507.48101806640625, 411.0092468261719, 1313.3798828125, -505.9135437011719, 178.62042236328125, 504.1211242675781, -109.84078216552734, 1059.1962890625, -196.5653076171875, 766.0376586914062, 1006.9061279296875, 1122.8192138671875, -699.8193359375, -37.73434066772461, 676.4445190429688, -20.622587203979492, 1745.135986328125, 199.07452392578125, 409.53094482421875, 2540.12451171875, 1080.707275390625, 428.3176574707031, 1884.77392578125, 73.1041259765625, 510.186767578125, 1205.7203369140625, 557.1222534179688, 1753.0137939453125, 1392.1038818359375, 173.39663696289062, 500.5384521484375, -205.6448516845703, 178.6743927001953, -59.05543518066406, 3536.0390625, 1567.7220458984375, 769.814208984375, -164.97689819335938, 4423.763671875, -223.591064453125, 441.9781188964844, -43.446746826171875, 1016.2669067382812, 318.5437316894531, 906.1384887695312, -766.2708129882812, 296.772216796875, 284.5945739746094, 280.429931640625, 173.90187072753906, -385.6585388183594, 1299.2147216796875, -43.906349182128906, 251.33433532714844, 295.9355163574219, 316.94342041015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000627.npy"} +{"epoch": 0.920704845814978, "step": 628, "batch_size": 64, "mean": 449.6760559082031, "std": 688.9083251953125, "min": -865.91748046875, "p10": -390.0245544433593, "median": 329.44081115722656, "p90": 1404.7720092773438, "max": 2371.91845703125, "pos_frac": 0.6875, "sample": [313.3423156738281, -30.01392364501953, 345.539306640625, 1425.5684814453125, 1635.1962890625, 163.69873046875, 721.9738159179688, 153.58795166015625, -204.88304138183594, -74.82733154296875, 147.25027465820312, 168.66888427734375, -865.91748046875, 1154.61181640625, 1103.5748291015625, 1384.174560546875, -4.272798538208008, -343.9345703125, 1664.198486328125, 514.855712890625, 2371.91845703125, 544.9842529296875, 246.90097045898438, -304.8167724609375, -64.76058197021484, -456.1317443847656, 513.895751953125, 857.7835693359375, -506.59124755859375, -504.6153259277344, 991.8637084960938, 272.0420837402344, 782.4990234375, 1437.579345703125, 1367.886474609375, 889.744140625, 467.4611511230469, 733.050048828125, 138.1956024169922, 184.04751586914062, -189.70755004882812, 614.4221801757812, 511.97833251953125, 87.65180206298828, 1645.0439453125, 1314.7103271484375, 1378.160888671875, 226.50372314453125, -409.77740478515625, -728.4314575195312, -107.97805786132812, -217.2569122314453, 1413.5994873046875, 831.467041015625, -701.2843627929688, -133.5481719970703, 1036.3316650390625, 469.02862548828125, 496.4349670410156, 150.44134521484375, 1162.6151123046875, 634.3330688476562, -18.788589477539062, -22.011486053466797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000628.npy"} +{"epoch": 0.922173274596182, "step": 629, "batch_size": 64, "mean": 419.21453857421875, "std": 560.6600341796875, "min": -1158.7171630859375, "p10": -242.93312988281247, "median": 449.0620422363281, "p90": 1192.8162353515627, "max": 1816.3236083984375, "pos_frac": 0.796875, "sample": [184.9371337890625, 1285.75244140625, -329.86767578125, -455.9183044433594, 446.48486328125, 451.63922119140625, 396.07403564453125, 797.0789794921875, 1353.9610595703125, 452.06597900390625, 557.3816528320312, 893.1707763671875, -104.4738540649414, 542.52392578125, 1288.4212646484375, -203.22735595703125, 149.73452758789062, 24.436344146728516, -15.423208236694336, 590.8345947265625, 25.936141967773438, 1123.3017578125, 392.4743347167969, -165.37086486816406, 115.99263000488281, 491.3037414550781, -806.0809936523438, -122.56543731689453, 1074.3499755859375, 753.4868774414062, 72.8335952758789, 827.8295288085938, 1491.176513671875, 1816.3236083984375, 460.8622741699219, 164.91160583496094, -1158.7171630859375, 1380.13330078125, 1017.996826171875, -259.94989013671875, -366.62957763671875, -548.5492553710938, 346.06646728515625, 473.25933837890625, 552.3414306640625, 1222.608154296875, 292.33489990234375, 445.85906982421875, 767.8583374023438, 35.193992614746094, 898.1255493164062, 376.45733642578125, 139.68548583984375, 690.5042724609375, 231.3104248046875, 506.28643798828125, 76.84516143798828, 657.9078979492188, 996.490966796875, 559.5988159179688, 439.47589111328125, 564.5105590820312, 531.2093505859375, -60.835235595703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000629.npy"} +{"epoch": 0.9236417033773862, "step": 630, "batch_size": 64, "mean": 236.3033905029297, "std": 723.6176147460938, "min": -1335.3486328125, "p10": -486.45968017578116, "median": 157.420166015625, "p90": 1034.4691650390625, "max": 3548.30517578125, "pos_frac": 0.671875, "sample": [1047.0760498046875, -1088.6385498046875, -529.4056396484375, 370.76910400390625, -271.6186828613281, 704.5155639648438, -1.9420852661132812, -308.56329345703125, 2366.3095703125, -231.49932861328125, 705.1318359375, -274.1561279296875, 181.44357299804688, 1092.813720703125, 759.4468994140625, -1335.3486328125, 108.44351196289062, -630.9558715820312, 324.7244873046875, 236.37005615234375, 450.1482238769531, -174.6241912841797, 142.74940490722656, 1494.8245849609375, -136.9645538330078, 3548.30517578125, 44.13810729980469, 1005.0531005859375, 110.9480209350586, -261.93621826171875, 36.445335388183594, -344.09478759765625, 283.5916748046875, -580.6227416992188, 336.8108215332031, -566.5487060546875, -189.8881378173828, 362.4892272949219, 158.4872283935547, 411.9532470703125, 253.962646484375, -111.68923950195312, 337.9631042480469, -638.8692626953125, 166.89605712890625, 365.66790771484375, 103.64356231689453, 251.95965576171875, 711.6146850585938, -84.3699722290039, 58.96394348144531, 1176.2603759765625, 658.0104370117188, 156.3531036376953, -349.43017578125, 79.86036682128906, 415.1921081542969, 95.47300720214844, 644.099365234375, -386.25244140625, 1260.530517578125, 249.16647338867188, 304.35736083984375, 47.871917724609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000630.npy"} +{"epoch": 0.9251101321585903, "step": 631, "batch_size": 64, "mean": 382.4601135253906, "std": 676.6861572265625, "min": -1860.8319091796875, "p10": -349.8773712158203, "median": 316.0376281738281, "p90": 1192.6641601562503, "max": 2035.7139892578125, "pos_frac": 0.71875, "sample": [1231.39794921875, 957.6788330078125, -325.2162780761719, 283.2193603515625, -360.4464111328125, 508.04736328125, -201.02536010742188, -165.28436279296875, -256.0486145019531, -506.80194091796875, 1790.8363037109375, 1296.8607177734375, -513.5359497070312, 471.5285339355469, -597.5009765625, 191.2716827392578, 656.9266967773438, 829.5472412109375, 49.805885314941406, -309.5737609863281, 148.8065185546875, -25.486051559448242, 315.9929504394531, -619.5733032226562, 1051.099365234375, 46.58176040649414, 102.6855239868164, -254.57005310058594, 821.8680419921875, 819.2864990234375, -1860.8319091796875, 517.7138671875, 2035.7139892578125, 760.134033203125, 356.35162353515625, 624.6786499023438, 211.32077026367188, -164.0211639404297, -94.65767669677734, 1216.0908203125, 237.96865844726562, 642.7384643554688, 1545.3038330078125, 1039.314697265625, 713.6873779296875, -900.8993530273438, 1138.001953125, 286.4163818359375, 1499.865966796875, 623.0714721679688, 67.04357147216797, 708.5377807617188, -105.50762939453125, 439.9471740722656, 181.46966552734375, 243.12982177734375, 1137.4442138671875, 706.2199096679688, 1014.094482421875, -126.86531829833984, 210.34735107421875, 962.58349609375, 316.0823059082031, 856.5774536132812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000631.npy"} +{"epoch": 0.9265785609397944, "step": 632, "batch_size": 64, "mean": 389.77215576171875, "std": 658.3034057617188, "min": -1355.92236328125, "p10": -379.0459869384766, "median": 367.8470458984375, "p90": 1142.2611572265625, "max": 2452.812744140625, "pos_frac": 0.75, "sample": [556.0197143554688, -737.9063110351562, 265.74481201171875, 305.489013671875, 627.2240600585938, -1355.92236328125, 1178.97265625, 1162.339599609375, 1411.3428955078125, 980.5171508789062, 784.4300537109375, 686.8901977539062, 180.39886474609375, 909.4957275390625, 297.6865539550781, 227.83731079101562, 545.1123046875, 404.5088195800781, 64.914794921875, 81.26676177978516, 1585.02099609375, 356.7604675292969, 695.5667724609375, 505.49163818359375, -503.4302978515625, 1142.414306640625, 618.2982177734375, -367.0729064941406, 487.849853515625, -216.36700439453125, -1161.9881591796875, 345.0403747558594, 1057.37744140625, 1130.1444091796875, 607.4359130859375, 842.3424682617188, 1064.8983154296875, 728.3003540039062, 378.9336242675781, -214.67868041992188, 1308.5567626953125, -95.94269561767578, -245.86651611328125, -129.507080078125, 566.6184692382812, 354.89715576171875, 658.9202270507812, 179.19476318359375, 1141.90380859375, 2452.812744140625, 1008.30078125, -167.00277709960938, -492.22589111328125, 53.88727569580078, 143.30039978027344, -384.17730712890625, 379.6062316894531, 265.4468078613281, -600.44677734375, 3.47979736328125, 100.72520446777344, -236.6249542236328, -108.6343002319336, 1129.4940185546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000632.npy"} +{"epoch": 0.9280469897209985, "step": 633, "batch_size": 64, "mean": 275.2097473144531, "std": 595.0234985351562, "min": -1187.8402099609375, "p10": -382.66123962402344, "median": 251.14923095703125, "p90": 1168.038134765625, "max": 1514.766845703125, "pos_frac": 0.703125, "sample": [-387.3971252441406, 1367.346435546875, 740.6728515625, -738.42236328125, 290.465087890625, 368.78424072265625, -421.00543212890625, -83.36709594726562, 1196.43310546875, 83.0813217163086, 244.9541015625, 175.21685791015625, 381.82476806640625, -1069.89306640625, 529.185546875, 145.9110870361328, 197.86412048339844, 1184.5330810546875, 351.3578186035156, -304.9747009277344, -185.52105712890625, 361.3667297363281, 303.795654296875, -40.083839416503906, 763.2455444335938, 118.82009887695312, -3.7559642791748047, -160.0697021484375, -882.0252685546875, 1046.9124755859375, 331.57073974609375, 226.25808715820312, 1196.5921630859375, 398.5331115722656, 781.6167602539062, -61.56319046020508, -292.8324279785156, -1187.8402099609375, -371.61083984375, 84.59693145751953, 1514.766845703125, 496.0404357910156, 262.20855712890625, -869.0391845703125, 601.0006103515625, 678.00927734375, 546.3231811523438, 1493.9022216796875, 981.9989013671875, 1129.5499267578125, 135.48300170898438, 280.06463623046875, -319.8354187011719, 937.929443359375, 241.5253143310547, -60.47245788574219, 176.00221252441406, 400.828369140625, 1304.2138671875, 87.51876831054688, 609.283447265625, -46.027069091796875, 94.22620391845703, 257.3443603515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000633.npy"} +{"epoch": 0.9295154185022027, "step": 634, "batch_size": 64, "mean": 255.07102966308594, "std": 607.6716918945312, "min": -998.73974609375, "p10": -362.7284454345703, "median": 160.91856384277344, "p90": 1144.204211425782, "max": 2191.70947265625, "pos_frac": 0.640625, "sample": [210.39198303222656, 859.3348388671875, -354.2565612792969, 1676.8831787109375, -638.2847290039062, 4.731077194213867, 28.65355110168457, 225.67697143554688, 288.10888671875, 1385.0389404296875, -1.2354145050048828, 377.364990234375, -94.30609893798828, 372.060791015625, 660.6322631835938, 135.33175659179688, 347.58721923828125, -274.23944091796875, -676.6455078125, 988.263916015625, 1232.3995361328125, -113.78837585449219, -88.14833068847656, 101.65733337402344, -24.856109619140625, 95.96461486816406, 157.51931762695312, 684.1334838867188, 475.8208312988281, 58.21065139770508, 2191.70947265625, 356.82147216796875, 564.98583984375, -998.73974609375, 1211.0357666015625, 1242.5318603515625, 602.50244140625, -366.3592529296875, -349.81048583984375, 164.31781005859375, 825.9280395507812, -97.32920837402344, -120.52777862548828, -934.1979370117188, 247.5139617919922, -583.9569702148438, 38.65806198120117, -523.5364990234375, -254.9090576171875, 872.8025512695312, -312.89398193359375, 759.431884765625, 388.75469970703125, 1335.1285400390625, 406.4142761230469, 72.54954528808594, 232.88125610351562, 729.66748046875, -90.6447982788086, 249.27633666992188, -50.946807861328125, -30.994918823242188, -241.33213806152344, 687.8092041015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000634.npy"} +{"epoch": 0.9309838472834068, "step": 635, "batch_size": 64, "mean": 359.61346435546875, "std": 665.2763671875, "min": -1187.1319580078125, "p10": -348.3009628295897, "median": 351.98841857910156, "p90": 1035.1943603515629, "max": 2453.15087890625, "pos_frac": 0.6875, "sample": [930.2689208984375, 136.71925354003906, 2453.15087890625, -108.853271484375, 431.1805725097656, -80.32186126708984, 147.71160888671875, -7.787647247314453, -11.864173889160156, 527.2192993164062, 92.65943145751953, 1432.0977783203125, -30.150131225585938, 819.0650634765625, -652.9346313476562, 379.01904296875, 511.9988098144531, -13.223663330078125, 775.7535400390625, 27.674606323242188, 373.48565673828125, 203.03636169433594, 741.7159423828125, 885.626953125, 348.56280517578125, 41.030128479003906, 10.712631225585938, 446.06640625, -243.79627990722656, 1971.981689453125, -72.90482330322266, 397.73876953125, 209.3682861328125, 1344.9359130859375, 748.4439697265625, -131.899169921875, 148.00433349609375, -14.875934600830078, 355.4140319824219, 1077.8182373046875, -162.94447326660156, 935.7386474609375, 930.3187866210938, 427.1147766113281, -705.9149169921875, -188.46713256835938, 637.1026611328125, -47.83977508544922, -405.238037109375, 721.8482666015625, -434.62945556640625, -393.08868408203125, -1187.1319580078125, 800.2507934570312, 1138.897705078125, 361.68536376953125, 416.2584533691406, 820.88232421875, -974.58544921875, 2370.81982421875, 162.64462280273438, 523.0438232421875, 356.2886962890625, 312.35906982421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000635.npy"} +{"epoch": 0.9324522760646109, "step": 636, "batch_size": 64, "mean": 428.3197021484375, "std": 674.3988037109375, "min": -1217.201416015625, "p10": -244.07832336425778, "median": 357.48826599121094, "p90": 1000.2477905273438, "max": 3568.331787109375, "pos_frac": 0.765625, "sample": [45.475975036621094, -414.81756591796875, 89.44784545898438, -160.72567749023438, 358.2875061035156, -509.9126281738281, 356.68902587890625, 333.18402099609375, 418.3046569824219, 65.89900970458984, 814.3914794921875, -385.61676025390625, 410.3768005371094, 1190.9959716796875, 29.298912048339844, 1007.8298950195312, -99.5650634765625, 2330.749755859375, 1182.13037109375, 481.57147216796875, 256.1563720703125, 982.5562133789062, -73.73294067382812, -278.9696044921875, 756.0489501953125, 437.7037658691406, 1017.726318359375, 481.3656005859375, 213.15098571777344, 3568.331787109375, 33.41188049316406, -179.44093322753906, 944.7374877929688, 1313.0712890625, -350.0162048339844, 928.94873046875, 923.4903564453125, 388.2472839355469, 543.484130859375, -215.39505004882812, 606.7125244140625, 214.3839569091797, 952.2539672851562, 840.7620239257812, 627.4639282226562, 303.23944091796875, 912.13818359375, -43.802703857421875, 299.24652099609375, -256.37115478515625, 179.94190979003906, 30.395843505859375, -1217.201416015625, 363.44659423828125, 255.73477172851562, -197.7244415283203, 972.6231079101562, 754.8583984375, 265.0138854980469, 481.4525146484375, -8.110000610351562, 268.2458801269531, 608.4993286132812, 964.3847045898438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000636.npy"} +{"epoch": 0.933920704845815, "step": 637, "batch_size": 64, "mean": 471.3244934082031, "std": 654.3062133789062, "min": -1049.3040771484375, "p10": -98.9008819580078, "median": 295.6039733886719, "p90": 1410.5452758789065, "max": 2439.488037109375, "pos_frac": 0.828125, "sample": [114.46514129638672, 113.34420013427734, 623.9150390625, 36.01714324951172, 61.97262954711914, 944.159912109375, 294.00726318359375, 31.565004348754883, 663.8699340820312, 1318.9805908203125, 736.234375, -1049.3040771484375, 107.24765014648438, 216.65719604492188, -254.00621032714844, 623.1841430664062, 834.3789672851562, 1433.7684326171875, 1095.6634521484375, 622.05419921875, 1525.5172119140625, 219.81866455078125, 145.79739379882812, 2316.033447265625, 175.07998657226562, 504.1494140625, -294.5716247558594, -57.75754165649414, 237.48178100585938, -104.95108032226562, 330.8364562988281, -150.78030395507812, -214.8096923828125, 607.55322265625, 555.4520263671875, -84.78375244140625, 316.55169677734375, 797.0275268554688, 818.9576416015625, 213.974853515625, 601.9868774414062, 385.82940673828125, 239.46670532226562, 1042.60302734375, 817.589111328125, 645.3164672851562, 1356.35791015625, 49.82410430908203, 1797.8804931640625, -917.8536987304688, 1183.6142578125, 2.6168041229248047, 297.20068359375, 1594.767822265625, 437.7622375488281, 101.42515563964844, 34.62333679199219, 1465.0537109375, -31.520734786987305, 2439.488037109375, 14.795944213867188, 105.85652160644531, 135.91009521484375, -60.57868194580078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000637.npy"} +{"epoch": 0.9353891336270191, "step": 638, "batch_size": 64, "mean": 380.04638671875, "std": 758.9913330078125, "min": -3609.371337890625, "p10": -167.46829452514643, "median": 311.6347198486328, "p90": 1233.2465942382814, "max": 1903.514404296875, "pos_frac": 0.796875, "sample": [1366.9324951171875, -880.1738891601562, -3609.371337890625, 970.735107421875, 315.630859375, 140.38656616210938, 6.079618453979492, 70.59271240234375, 1157.1898193359375, 340.3968811035156, 780.3114013671875, 1384.1986083984375, 140.45164489746094, 733.9931640625, 320.7979736328125, 551.7218017578125, 665.01806640625, -530.5072631835938, 484.86541748046875, 16.286640167236328, 502.0096130371094, 45.503379821777344, -322.76995849609375, 98.92615509033203, 297.9256286621094, 962.8394775390625, 1454.66259765625, -59.42775344848633, 582.2445068359375, 235.72091674804688, 865.0213012695312, 280.56231689453125, 802.2903442382812, -211.99984741210938, -192.5113983154297, 829.6270751953125, 874.189208984375, -54.26168441772461, 328.3342590332031, -367.3066711425781, 7.401994705200195, 502.1149597167969, 307.6385803222656, 1903.514404296875, 73.38379669189453, -18.76304817199707, 1027.9656982421875, 95.35652160644531, 685.3887329101562, 1208.88818359375, -77.87576293945312, -60.993499755859375, 1120.074951171875, 504.604248046875, 138.41842651367188, 30.563262939453125, 100.11539459228516, 58.316444396972656, 870.5648193359375, 1243.6859130859375, -109.03438568115234, 51.46544647216797, 1850.6439208984375, 1432.4150390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000638.npy"} +{"epoch": 0.9368575624082232, "step": 639, "batch_size": 64, "mean": 157.6584014892578, "std": 713.8563232421875, "min": -1322.6422119140625, "p10": -684.9025573730469, "median": 175.00524139404297, "p90": 1069.9893676757818, "max": 1890.928955078125, "pos_frac": 0.609375, "sample": [1393.53369140625, -992.5590209960938, -497.19256591796875, 1266.25390625, 938.4046630859375, -495.46234130859375, 241.05252075195312, 398.6788024902344, -590.9647216796875, 804.2548217773438, 1587.4146728515625, -24.669315338134766, 459.8143005371094, 639.8766479492188, -1117.12109375, 214.62203979492188, -479.932373046875, -393.0672607421875, -43.528228759765625, 454.3384704589844, 745.0325317382812, -1254.0306396484375, 876.0989379882812, 613.0850219726562, -628.310791015625, -1322.6422119140625, -1053.58447265625, 292.3955383300781, 164.71231079101562, 2.7876739501953125, 610.2786254882812, 480.7082824707031, 17.39104461669922, -571.3826293945312, 1126.3828125, 1253.785400390625, -386.3079833984375, 871.9306030273438, -397.2808837890625, -32.733375549316406, 95.93831634521484, -67.27212524414062, 622.7939453125, 421.761962890625, 1890.928955078125, -879.3141479492188, -166.93704223632812, 19.082725524902344, -315.3193359375, -450.56671142578125, 181.94996643066406, 747.44384765625, 252.6972198486328, 901.2054443359375, -678.4470825195312, -420.5317687988281, -687.669189453125, 187.91810607910156, 123.00013732910156, 722.3656616210938, 377.32305908203125, 1289.809326171875, 581.8521118164062, 168.06051635742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000639.npy"} +{"epoch": 0.9383259911894273, "step": 640, "batch_size": 64, "mean": 574.392578125, "std": 803.23193359375, "min": -911.8138427734375, "p10": -190.05155487060546, "median": 417.2586975097656, "p90": 1467.5801025390626, "max": 3141.627685546875, "pos_frac": 0.765625, "sample": [763.0390014648438, 576.0474853515625, -522.4783325195312, -146.4922332763672, 128.52777099609375, 600.810546875, 219.797119140625, 1379.947265625, 1378.220947265625, 207.16024780273438, 1458.786865234375, 1685.2052001953125, 927.1572265625, 60.985870361328125, -269.6239929199219, 487.9815368652344, 906.5166625976562, -170.68438720703125, 24.861679077148438, 1471.3486328125, -856.0379638671875, 961.712158203125, 429.17523193359375, 788.786376953125, -192.89688110351562, -911.8138427734375, 122.93942260742188, 399.50872802734375, 3141.627685546875, 985.3695068359375, 604.7128295898438, 1013.2688598632812, -644.4102172851562, 642.9472045898438, 2584.8505859375, 342.27349853515625, -179.7586669921875, 266.53790283203125, 66.5552978515625, 258.5599365234375, -80.48274230957031, 222.43055725097656, -5.7815093994140625, 1195.4166259765625, 1075.1190185546875, 138.43557739257812, 942.5028686523438, 651.0369262695312, -505.58367919921875, 1298.1795654296875, 2349.04931640625, 347.4377136230469, -80.57192993164062, 1179.2899169921875, -40.40589904785156, 754.7142333984375, 405.3421630859375, 911.93310546875, 72.02656555175781, 778.1048583984375, 1528.6610107421875, 2583.255126953125, -183.41246032714844, 233.40493774414062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000640.npy"} +{"epoch": 0.9397944199706314, "step": 641, "batch_size": 64, "mean": 378.48095703125, "std": 760.3213500976562, "min": -1221.1627197265625, "p10": -540.9864532470702, "median": 379.2915344238281, "p90": 1083.7160888671876, "max": 3335.364501953125, "pos_frac": 0.75, "sample": [161.18661499023438, 324.47076416015625, 499.5301208496094, 1098.264404296875, -565.6256103515625, -158.21768188476562, 475.76220703125, 1645.9864501953125, 515.98681640625, 169.8698272705078, -1023.552490234375, 555.13671875, 3.6866607666015625, -435.7410888671875, 3335.364501953125, -558.8842163085938, -499.2250061035156, -461.775634765625, 70.91966247558594, 1049.77001953125, -45.685211181640625, 116.6879653930664, 90.79510498046875, 690.4786987304688, 235.78390502929688, 591.5660400390625, -1213.2000732421875, 867.4520874023438, 606.3041381835938, 107.21678161621094, 940.3585205078125, 506.86932373046875, 663.03076171875, 79.67071533203125, 116.28723907470703, 38.99093246459961, 1395.218017578125, 317.236328125, 818.629150390625, 666.9730224609375, 97.23645782470703, -9.768918991088867, -57.27942657470703, -647.8531494140625, 260.67730712890625, 834.4266967773438, -1221.1627197265625, 1034.57421875, 1030.9019775390625, 1015.8927001953125, 2240.347412109375, 849.0140991210938, -1029.4149169921875, 758.8292236328125, -50.458229064941406, 403.2468566894531, 640.4248046875, -377.48126220703125, 551.1572875976562, 355.3362121582031, 787.4942626953125, 1189.4422607421875, 576.7904052734375, 1196.830810546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000641.npy"} +{"epoch": 0.9412628487518355, "step": 642, "batch_size": 64, "mean": 454.66107177734375, "std": 936.3311157226562, "min": -1683.145263671875, "p10": -359.41856689453124, "median": 372.04315185546875, "p90": 1379.0308105468757, "max": 4975.771484375, "pos_frac": 0.71875, "sample": [113.51313781738281, 552.9683837890625, 762.7894287109375, 614.7354736328125, 291.1839599609375, -1683.145263671875, 820.5031127929688, 274.3555603027344, -69.02899169921875, -596.3790283203125, -45.57312774658203, 99.12826538085938, 1553.9114990234375, -367.061279296875, 832.8043823242188, 1035.16552734375, 489.69854736328125, 764.6354370117188, -935.9902954101562, 65.79965209960938, 260.5963439941406, -143.34027099609375, 395.788330078125, -10.479082107543945, 368.122314453125, 673.064697265625, -170.63949584960938, 214.5250244140625, 95.38453674316406, 474.2791442871094, 851.258544921875, 656.552490234375, 824.0958251953125, 2424.32080078125, -321.2690734863281, 47.415557861328125, 382.0921630859375, 787.9447021484375, -653.16552734375, 254.41665649414062, 818.8255004882812, 168.26336669921875, 520.4286499023438, 1465.28271484375, -341.5855712890625, 1177.7763671875, 805.451904296875, 1798.329345703125, -195.35267639160156, -165.5853271484375, 4975.771484375, 961.1714477539062, -617.9926147460938, 467.7255859375, -166.11936950683594, -259.8465881347656, 375.9639892578125, 2645.25390625, 42.629974365234375, -974.4285888671875, 504.29400634765625, 1876.97412109375, 288.1044921875, 941.9969482421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000642.npy"} +{"epoch": 0.9427312775330396, "step": 643, "batch_size": 64, "mean": 349.2655029296875, "std": 571.4017333984375, "min": -803.97509765625, "p10": -383.6912628173828, "median": 320.9884033203125, "p90": 1053.9843872070312, "max": 1924.3406982421875, "pos_frac": 0.765625, "sample": [362.1546325683594, 320.7588195800781, 159.88856506347656, 962.7294921875, 821.4489135742188, 689.073486328125, 577.7972412109375, -37.62718963623047, 429.0462646484375, 911.7470703125, 15.858331680297852, -666.0440673828125, 288.0330505371094, 199.77908325195312, -241.45423889160156, 947.853759765625, 893.1784057617188, 1040.84716796875, 1087.33203125, 431.6239013671875, 126.37110900878906, 40.04728317260742, -290.49395751953125, 164.68386840820312, 109.67793273925781, 172.25216674804688, -803.97509765625, 694.9776000976562, 476.7274475097656, 1110.9202880859375, -349.86322021484375, 321.2179870605469, 315.5120849609375, 212.005615234375, 952.29296875, 1273.085205078125, 392.5254821777344, -468.25067138671875, 645.7095336914062, 379.0739440917969, -607.4653930664062, 1924.3406982421875, 1069.210693359375, 377.80126953125, 672.2703857421875, 92.74854278564453, 2.83209228515625, 415.89813232421875, 1900.819580078125, 28.222881317138672, 966.7470703125, 664.74658203125, 693.80126953125, -292.48101806640625, 1059.6146240234375, 40.6314697265625, -425.84228515625, -398.1889953613281, 421.28399658203125, -76.16586303710938, -223.40435791015625, -221.19345092773438, -571.0099487304688, 169.2535400390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000643.npy"} +{"epoch": 0.9441997063142438, "step": 644, "batch_size": 64, "mean": 365.61663818359375, "std": 762.953125, "min": -1870.052978515625, "p10": -567.198501586914, "median": 229.78643798828125, "p90": 1261.613037109375, "max": 2554.60400390625, "pos_frac": 0.75, "sample": [2554.60400390625, 34.943748474121094, 880.68359375, -1870.052978515625, 487.77630615234375, 378.72991943359375, -800.0504150390625, 99.91102600097656, 993.61376953125, 404.93804931640625, 43.39922332763672, -17.366378784179688, 7.026557922363281, 450.99176025390625, 173.8794708251953, 633.6552124023438, 210.28509521484375, -31.7049560546875, 294.7466735839844, -595.353759765625, 157.01580810546875, 2080.297607421875, 45.80528259277344, -617.7354736328125, 826.7144165039062, 952.4735107421875, -1.80950927734375, 490.15814208984375, -275.18768310546875, 1702.2999267578125, -38.96526336669922, -275.5877380371094, 769.224853515625, -77.07640075683594, 27.895830154418945, 0.010858535766601562, 1266.806640625, 702.519775390625, 587.1060180664062, 249.28778076171875, 135.26266479492188, -501.5028991699219, 514.7551879882812, 68.38056182861328, 1169.974853515625, 560.1796264648438, -653.0349731445312, -274.90411376953125, 161.9938201904297, 1088.9990234375, 1679.830322265625, 879.3216552734375, 88.42942810058594, -1211.15185546875, 1249.49462890625, 1199.762939453125, 369.42236328125, 1305.5919189453125, 24.53790283203125, 478.5975646972656, 562.9616088867188, 2049.344970703125, 151.0308837890625, -603.724853515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000644.npy"} +{"epoch": 0.9456681350954479, "step": 645, "batch_size": 64, "mean": 176.3662872314453, "std": 705.8453369140625, "min": -1366.8255615234375, "p10": -743.0362426757812, "median": 104.98989868164062, "p90": 1097.4837036132815, "max": 2255.26513671875, "pos_frac": 0.578125, "sample": [313.8077697753906, 1112.23046875, -635.755859375, 970.2590942382812, -1366.8255615234375, -983.965087890625, -695.7048950195312, 5.998809814453125, 385.1379699707031, -142.99819946289062, -180.28041076660156, -40.6834716796875, 54.414756774902344, -7.600851058959961, -123.32858276367188, -486.3406982421875, 439.72491455078125, 328.2137145996094, 945.3888549804688, 1063.0745849609375, 698.126220703125, -393.7420654296875, 100.3248291015625, -15.647659301757812, 113.78228759765625, 19.324626922607422, 1135.1905517578125, -42.714637756347656, -763.3211059570312, -1119.6865234375, 184.41505432128906, 423.4109802246094, -667.5069580078125, 1470.482421875, 877.2969360351562, -20.115570068359375, 1850.923583984375, 828.9483642578125, -1166.8385009765625, -782.7862548828125, 1335.2598876953125, -70.32070922851562, 109.65496826171875, 347.3490295410156, 557.0069580078125, 546.0651245117188, 412.48052978515625, 326.13909912109375, 154.55828857421875, 618.6373901367188, -14.67156982421875, -259.15081787109375, 1314.0283203125, 33.226097106933594, 351.631591796875, 2255.26513671875, -133.93197631835938, 326.49017333984375, -889.96142578125, -116.28202056884766, 602.3653564453125, 141.7875518798828, -47.12260055541992, -297.6954345703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000645.npy"} +{"epoch": 0.947136563876652, "step": 646, "batch_size": 64, "mean": 309.9844665527344, "std": 604.6002197265625, "min": -1129.5433349609375, "p10": -384.34992980957026, "median": 214.14554595947266, "p90": 1109.2163452148438, "max": 2100.311279296875, "pos_frac": 0.734375, "sample": [812.947021484375, 1101.8831787109375, -88.61231231689453, 1651.62109375, 621.638427734375, 236.37327575683594, 530.8807983398438, 645.2133178710938, 935.01708984375, 23.027633666992188, 432.9496765136719, 538.4361572265625, -981.8724365234375, -3.067962646484375, 1062.5203857421875, 207.02430725097656, -403.9870300292969, 284.9433898925781, 1154.9114990234375, -60.68560028076172, 370.72320556640625, -190.2333984375, -144.24853515625, 595.7364501953125, 123.59902954101562, 358.6156005859375, 460.01251220703125, 250.02891540527344, 109.05023193359375, 908.8953857421875, 110.81365966796875, 483.5163879394531, 27.342132568359375, 148.78855895996094, -224.6038055419922, 205.97645568847656, 74.10772705078125, 1661.0924072265625, -236.5409698486328, -1129.5433349609375, 421.704833984375, 548.247314453125, 140.69398498535156, -456.6455078125, -87.49143981933594, -696.3013916015625, -70.18583679199219, 85.51863861083984, -529.7679443359375, 2100.311279296875, 1112.359130859375, 221.26678466796875, 627.219970703125, 39.07926940917969, 1296.7642822265625, -338.530029296875, -609.9617309570312, 103.89087677001953, 171.55894470214844, 173.72015380859375, 1165.1148681640625, 970.8274536132812, 510.887939453125, 274.4329528808594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000646.npy"} +{"epoch": 0.9486049926578561, "step": 647, "batch_size": 64, "mean": 387.15777587890625, "std": 560.1911010742188, "min": -1176.6917724609375, "p10": -170.35709228515623, "median": 360.6318817138672, "p90": 1150.7571411132812, "max": 1908.7781982421875, "pos_frac": 0.765625, "sample": [838.01708984375, 604.2588500976562, 342.55828857421875, 245.71653747558594, 242.41928100585938, 1174.07373046875, 220.36251831054688, 359.4096984863281, 534.1854248046875, 525.6532592773438, 1519.5528564453125, 55.088897705078125, 42.68231201171875, 1730.74462890625, 769.4012451171875, 630.3580932617188, -68.61509704589844, 1132.2142333984375, 705.299072265625, -146.57962036132812, 215.48941040039062, 202.12261962890625, -63.13105773925781, -175.009033203125, 593.392333984375, 369.79388427734375, -73.09080505371094, 1908.7781982421875, -974.8396606445312, 809.5770263671875, -212.31573486328125, -291.04791259765625, 85.53689575195312, 779.9107666015625, 1158.7041015625, 841.1763916015625, 450.4326171875, 361.85406494140625, -622.6383666992188, 271.7303466796875, 616.6591186523438, 117.33238983154297, 1200.122802734375, -159.5025634765625, -76.30996704101562, 472.4400634765625, 425.5487976074219, 446.40521240234375, -87.68038177490234, 1227.10205078125, 191.1913604736328, 101.2657241821289, 1088.14013671875, 247.679931640625, -8.782180786132812, 622.9347534179688, -1176.6917724609375, 881.8397216796875, -294.5522155761719, 312.2027587890625, 492.2794494628906, 416.25555419921875, 504.4042663574219, 124.58401489257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000647.npy"} +{"epoch": 0.9500734214390602, "step": 648, "batch_size": 64, "mean": 582.78515625, "std": 817.3012084960938, "min": -1075.1087646484375, "p10": -201.61361236572264, "median": 519.8121948242188, "p90": 1654.1310913085945, "max": 3672.803955078125, "pos_frac": 0.75, "sample": [566.1473388671875, 304.0360107421875, 184.4805908203125, 532.7351684570312, 259.0008544921875, 652.513916015625, 1988.12158203125, 1736.3662109375, 673.764404296875, 1462.2491455078125, -43.645172119140625, -683.1300048828125, 46.516632080078125, 965.9702758789062, 204.1143798828125, -202.66468811035156, 1293.268798828125, 551.1921997070312, -32.63862609863281, -132.0013427734375, -26.769453048706055, 525.2127075195312, -88.11890411376953, 988.452392578125, -204.2274627685547, 2036.5552978515625, 633.8942260742188, 109.35116577148438, -49.80451965332031, -194.9854736328125, 832.6618041992188, 803.598876953125, -199.16110229492188, 3672.803955078125, 666.9527587890625, 535.5091552734375, -1075.1087646484375, 548.327392578125, 56.15924072265625, -696.2679443359375, -214.35235595703125, 296.99481201171875, 2416.679443359375, 727.8580322265625, 735.8492431640625, 401.5873718261719, 1379.2947998046875, -83.9757080078125, 578.4903564453125, 648.0397338867188, 1151.2664794921875, 366.3113708496094, 316.5121154785156, 514.4116821289062, 1367.7884521484375, 118.72908782958984, 497.0061340332031, 829.447998046875, 443.8201904296875, 2505.831298828125, 1325.7320556640625, 225.88482666015625, -404.3041687011719, 1951.91064453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000648.npy"} +{"epoch": 0.9515418502202643, "step": 649, "batch_size": 64, "mean": 283.9723205566406, "std": 691.89990234375, "min": -911.466064453125, "p10": -478.4241302490234, "median": 211.6774139404297, "p90": 1081.8225341796876, "max": 3335.921630859375, "pos_frac": 0.609375, "sample": [573.4129638671875, 14.948371887207031, 474.8147888183594, -128.92166137695312, 541.23876953125, -311.133056640625, -11.27755355834961, 160.15130615234375, 737.8466796875, 346.26287841796875, 572.024169921875, -140.84576416015625, -531.2295532226562, 67.19755554199219, -578.0602416992188, -176.90475463867188, -99.93406677246094, 395.6227111816406, -364.8854064941406, 288.31671142578125, 661.5909423828125, 189.9741668701172, -498.3694152832031, -158.4207000732422, 858.2232666015625, -820.7417602539062, 764.6583862304688, 1029.598876953125, -62.02665328979492, 585.185302734375, 352.9161682128906, -405.4273986816406, 58.247802734375, 39.946327209472656, 950.0841064453125, 518.7884521484375, 814.4951171875, 370.9127197265625, 853.2954711914062, -430.1346435546875, 1345.179443359375, 442.10662841796875, -4.525943756103516, 1178.587890625, 889.4044189453125, -247.63644409179688, 1489.6351318359375, 3335.921630859375, 483.017822265625, 357.39984130859375, -142.46153259277344, 233.3806610107422, 52.65235900878906, 837.1438598632812, -358.4434814453125, 1269.314453125, 1299.53271484375, -518.64697265625, -13.328479766845703, -151.50311279296875, -864.7972412109375, -911.466064453125, 1104.2041015625, -431.8851318359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000649.npy"} +{"epoch": 0.9530102790014684, "step": 650, "batch_size": 64, "mean": 437.9971008300781, "std": 746.2782592773438, "min": -1260.77978515625, "p10": -482.35199890136715, "median": 297.781982421875, "p90": 1371.5266235351564, "max": 2593.650390625, "pos_frac": 0.765625, "sample": [206.4737548828125, -663.9244995117188, 818.20654296875, 791.3927612304688, 199.86727905273438, 1008.4638671875, 304.4488220214844, -911.9646606445312, 2379.168701171875, 661.8443603515625, 1122.83544921875, -655.25, 1450.1043701171875, 1027.667236328125, 919.0204467773438, 1381.15185546875, 283.633056640625, 1743.3798828125, 1166.252197265625, 2593.650390625, 821.7991333007812, -107.32164764404297, 1570.1090087890625, 597.3379516601562, 259.95501708984375, 2.294872283935547, -105.52130126953125, 490.7139587402344, 182.59439086914062, 421.8092956542969, 700.3566284179688, -82.12605285644531, -277.5670471191406, -971.5440673828125, -445.15875244140625, 244.18801879882812, -24.088329315185547, 1991.3017578125, -127.60227966308594, 18.1064453125, 551.1796875, 200.54287719726562, -498.2919616699219, 646.8870239257812, 651.837646484375, 609.0074462890625, 76.13304138183594, 887.43701171875, 115.41840362548828, 1349.0677490234375, 291.1151428222656, 271.7731628417969, 644.0758056640625, 858.967041015625, 868.0796508789062, 42.33489227294922, 436.88360595703125, -583.8198852539062, -1260.77978515625, 239.7923583984375, 755.080322265625, 108.07056427001953, -401.2668151855469, 186.23208618164062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000650.npy"} +{"epoch": 0.9544787077826725, "step": 651, "batch_size": 64, "mean": 555.2301025390625, "std": 632.68359375, "min": -617.2399291992188, "p10": -40.655047035217265, "median": 463.89703369140625, "p90": 1339.6764648437504, "max": 2323.61669921875, "pos_frac": 0.859375, "sample": [905.8801879882812, 626.5337524414062, 2312.0517578125, 140.97918701171875, -454.86029052734375, -48.8929443359375, 401.73211669921875, 601.006591796875, 35.56898498535156, 151.1443328857422, 1221.7608642578125, 2323.61669921875, 311.1827087402344, -146.11199951171875, 141.747314453125, -239.75173950195312, 39.79227066040039, 526.0668334960938, -310.2259216308594, 899.6354370117188, 1801.584716796875, 416.56689453125, 832.4459228515625, 318.05767822265625, 715.3911743164062, 55.60444641113281, -21.433286666870117, 0.11785888671875, 194.4150390625, 719.5709228515625, 189.7562713623047, -6.283164978027344, 1111.0374755859375, 109.69329833984375, 1241.0289306640625, 925.9961547851562, 585.3931884765625, 54.980072021484375, 777.0640869140625, 3.7274646759033203, 457.74609375, 1156.7781982421875, -338.0657043457031, 530.063232421875, 673.4662475585938, 708.6455688476562, 53.93927001953125, 979.1613159179688, 107.56132507324219, 654.1876220703125, 202.50375366210938, 313.733154296875, 1381.9539794921875, 1915.9207763671875, 383.99566650390625, -617.2399291992188, 1079.906494140625, 470.0479736328125, 1800.630126953125, 479.8725280761719, 1131.1300048828125, 1509.6474609375, 921.29443359375, 114.27529907226562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000651.npy"} +{"epoch": 0.9559471365638766, "step": 652, "batch_size": 64, "mean": 429.65704345703125, "std": 718.1599731445312, "min": -1573.8304443359375, "p10": -274.2612274169921, "median": 284.01219177246094, "p90": 1326.8586669921874, "max": 2670.940185546875, "pos_frac": 0.78125, "sample": [374.0682373046875, 170.52517700195312, 782.8931274414062, -1137.935302734375, 596.3657836914062, 454.55267333984375, 234.67332458496094, -516.3482055664062, 712.3096923828125, 159.827880859375, 1327.8092041015625, 1432.4105224609375, 1324.6407470703125, -1573.8304443359375, 71.5396957397461, 572.238037109375, 41.001739501953125, 957.3873901367188, 1480.904541015625, -315.1583557128906, 1045.621337890625, 387.7356872558594, 83.26561737060547, -197.44927978515625, 16.35935401916504, -23.23388671875, 961.6231689453125, -143.46128845214844, 397.5616455078125, 121.34778594970703, 77.30632019042969, 2670.940185546875, -307.1806335449219, 2347.96630859375, 294.6945495605469, 1638.354736328125, 21.904098510742188, 76.4127197265625, 67.41064453125, 29.662261962890625, 465.12933349609375, -41.44525909423828, 203.43472290039062, -119.66434478759766, 992.5137939453125, 888.39404296875, 937.8795166015625, -96.3819351196289, 655.3209838867188, 801.7808227539062, 1860.5335693359375, 87.46870422363281, 862.16796875, 246.98818969726562, -559.034423828125, 418.3507995605469, 1308.3494873046875, -330.58392333984375, 416.8954162597656, 844.6439208984375, 271.401611328125, 416.49444580078125, 273.329833984375, -22.635040283203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000652.npy"} +{"epoch": 0.9574155653450808, "step": 653, "batch_size": 64, "mean": 386.3274841308594, "std": 696.8011474609375, "min": -1242.1859130859375, "p10": -401.17788391113277, "median": 286.0798645019531, "p90": 1325.7160766601564, "max": 2233.72607421875, "pos_frac": 0.734375, "sample": [885.6083984375, -1.3316802978515625, -121.15461730957031, 2148.22998046875, 182.00978088378906, -375.8140563964844, 100.76606750488281, -424.988037109375, 46.69439697265625, 1017.4942626953125, 536.6033935546875, -369.421630859375, 23.46826934814453, 1401.0335693359375, 888.2123413085938, 1354.9951171875, 894.1337890625, 1257.3983154296875, 1233.597412109375, -194.47430419921875, 159.3421630859375, 326.2259826660156, 1746.511474609375, 399.4654846191406, 526.1203002929688, 735.9226684570312, 1047.4276123046875, 302.0953369140625, 11.844276428222656, -522.85498046875, 1434.0521240234375, 270.06439208984375, 593.1755981445312, -1242.1859130859375, -222.42391967773438, -173.3897247314453, 353.596435546875, 588.44140625, -675.923828125, 2105.89794921875, 2233.72607421875, 640.5563354492188, 25.959455490112305, -32.445213317871094, 585.9381103515625, 609.554443359375, 19.013916015625, -412.048095703125, 265.8813171386719, 255.2335205078125, 517.5423583984375, 146.7915496826172, 832.431396484375, 168.85025024414062, 528.7252807617188, -854.0487670898438, -435.3395080566406, 386.49334716796875, 687.042724609375, 31.235490798950195, 567.3364868164062, -299.2105712890625, 230.71121215820312, -221.43821716308594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000653.npy"} +{"epoch": 0.9588839941262849, "step": 654, "batch_size": 64, "mean": 376.21954345703125, "std": 629.2120361328125, "min": -470.1534423828125, "p10": -361.5203918457031, "median": 281.99497985839844, "p90": 1344.1203369140626, "max": 2126.8662109375, "pos_frac": 0.65625, "sample": [104.74312591552734, 368.71771240234375, -229.46905517578125, 105.56616973876953, 494.40374755859375, -136.7861785888672, -375.142822265625, 392.521484375, 14.400531768798828, 794.9298095703125, 731.6743774414062, -331.6388854980469, 111.82866668701172, 467.184814453125, -404.24066162109375, -208.9183349609375, -71.11565399169922, 2126.8662109375, 991.2433471679688, 585.38720703125, 550.9063720703125, 1588.7542724609375, 481.56658935546875, 311.18414306640625, 1533.3316650390625, 700.0968627929688, -166.9813690185547, 698.2359008789062, 123.64443969726562, -107.646240234375, -196.2064971923828, 1290.450927734375, 372.05035400390625, 53.096466064453125, 31.42767333984375, -26.59178924560547, 886.6712036132812, 250.83895874023438, 232.93643188476562, 499.6878967285156, 442.2735595703125, 838.3359985351562, 756.9825439453125, -298.9841613769531, -470.1534423828125, 910.4248657226562, 328.06341552734375, 1760.80859375, 1353.060791015625, -322.3517150878906, -83.98779296875, 1509.6405029296875, 606.6956787109375, 252.80581665039062, -85.34123992919922, -387.67041015625, 594.9697875976562, -374.3267517089844, 1813.306396484375, 1323.25927734375, -25.112335205078125, -378.46185302734375, -405.76220703125, -220.034912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000654.npy"} +{"epoch": 0.960352422907489, "step": 655, "batch_size": 64, "mean": 392.17303466796875, "std": 641.191650390625, "min": -787.6956787109375, "p10": -330.0212158203125, "median": 259.9714889526367, "p90": 1189.644616699219, "max": 2412.729736328125, "pos_frac": 0.71875, "sample": [-405.8531494140625, -787.6956787109375, 1370.6368408203125, 213.48745727539062, 53.03968048095703, 1208.336669921875, 895.14697265625, 899.7201538085938, 317.2815856933594, -232.6080322265625, 704.9823608398438, 49.25164794921875, -59.96148681640625, -591.28759765625, -6.794347763061523, 1145.8558349609375, -138.84811401367188, 863.7796630859375, -341.037353515625, 1055.3468017578125, -255.93377685546875, 417.09002685546875, 942.4141235351562, -567.9127807617188, -304.31689453125, 583.3116455078125, 307.5653381347656, 50.532745361328125, -141.4369354248047, 267.83837890625, -181.45468139648438, 1710.0155029296875, 164.48255920410156, 226.59519958496094, 647.391357421875, 865.2230834960938, -652.3670043945312, 798.5279541015625, 587.158935546875, 1068.2587890625, 392.1493225097656, 76.7476577758789, 638.694580078125, 2412.729736328125, 1627.2144775390625, 11.329452514648438, 403.4336242675781, -519.8359985351562, 1155.377685546875, 567.2731323242188, -104.84111022949219, 170.7430419921875, 60.61895751953125, 233.66551208496094, 1204.3304443359375, 252.10459899902344, 1631.8338623046875, -289.15350341796875, -136.3993682861328, 540.7802734375, 683.12841796875, 244.7725372314453, 156.0328369140625, 940.58154296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000655.npy"} +{"epoch": 0.9618208516886931, "step": 656, "batch_size": 64, "mean": 663.841796875, "std": 742.0775146484375, "min": -595.9990844726562, "p10": -116.99443130493164, "median": 427.6307373046875, "p90": 1797.725927734375, "max": 2886.087646484375, "pos_frac": 0.8125, "sample": [1552.0035400390625, 732.187744140625, -595.9990844726562, -28.678119659423828, 217.70745849609375, 297.62518310546875, 333.62078857421875, 570.807373046875, 319.9635925292969, 798.05322265625, 806.850830078125, 664.363525390625, 299.0284423828125, 68.75662231445312, 399.020751953125, 1810.583740234375, -190.41326904296875, 65.33718872070312, 1767.724365234375, 166.73556518554688, 840.1636962890625, -103.1427001953125, 2103.333251953125, -154.17782592773438, -115.12008666992188, 234.41473388671875, 959.3143310546875, 1189.04052734375, 446.5701904296875, 1092.616943359375, 387.4407653808594, 2886.087646484375, 711.9395141601562, -150.5763397216797, 2367.21728515625, 882.0943603515625, 457.83331298828125, 1028.6763916015625, 2448.657958984375, 93.68881225585938, 2240.18896484375, -160.2322235107422, 1260.3807373046875, 54.86181640625, 1007.8726806640625, 1020.9306030273438, 378.49688720703125, 2223.266845703125, 360.16571044921875, 261.6742248535156, 881.4586791992188, 144.55662536621094, -161.08438110351562, 408.6912841796875, 960.1546020507812, 268.4432678222656, -25.731460571289062, 317.292724609375, -117.79772186279297, 1134.5316162109375, 1031.792724609375, -49.98607635498047, 655.3378295898438, 729.2584228515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000656.npy"} +{"epoch": 0.9632892804698973, "step": 657, "batch_size": 64, "mean": 533.8377075195312, "std": 722.1788330078125, "min": -1615.4117431640625, "p10": -112.0333885192871, "median": 462.8014221191406, "p90": 1394.8278930664062, "max": 2553.186279296875, "pos_frac": 0.859375, "sample": [1302.25, 29.24970245361328, 734.3887329101562, 477.77191162109375, 1507.7642822265625, 1631.8961181640625, 800.8225708007812, 138.04783630371094, 1396.8212890625, -128.375732421875, -367.81036376953125, 604.3031005859375, 1390.1766357421875, -100.89482116699219, 198.0516815185547, 505.1422119140625, 60.18998718261719, 348.6540222167969, 102.16295623779297, 1517.83642578125, 2454.5966796875, 511.4169921875, 80.07402801513672, 2553.186279296875, 1289.8458251953125, 1249.4249267578125, 234.89772033691406, 24.41632080078125, 694.09423828125, 641.274658203125, 827.538818359375, 447.8309326171875, 33.900146484375, -1615.4117431640625, 566.0064086914062, 602.1412353515625, 280.3988037109375, -116.80706024169922, 17.7139892578125, 332.62896728515625, 661.264404296875, -1164.7100830078125, -303.7391662597656, 607.9803466796875, 929.7191162109375, 130.8018341064453, 239.98692321777344, 585.876708984375, 443.47039794921875, 55.97052764892578, 724.4403686523438, 860.2974243164062, 180.48924255371094, 806.4689331054688, 416.53668212890625, 323.0288391113281, 1285.4688720703125, 2525.812744140625, 382.83258056640625, -223.57907104492188, 275.9295959472656, -71.46296691894531, 680.71240234375, 554.3991088867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000657.npy"} +{"epoch": 0.9647577092511013, "step": 658, "batch_size": 64, "mean": 425.004638671875, "std": 627.71630859375, "min": -1123.6024169921875, "p10": -413.93569641113277, "median": 441.28082275390625, "p90": 1205.7565185546875, "max": 2196.323974609375, "pos_frac": 0.734375, "sample": [907.8508911132812, 856.5645751953125, 795.4217529296875, -339.563720703125, 440.8700866699219, -230.81085205078125, 1150.5654296875, 267.8132019042969, -423.98175048828125, 440.8798522949219, 341.87054443359375, 1496.888671875, -3.3876495361328125, 294.5151062011719, 329.17010498046875, -537.42041015625, 441.6817932128906, 467.4181823730469, 111.77433013916016, -54.5998649597168, 564.6148681640625, 185.4515380859375, 451.74163818359375, 149.63174438476562, -1123.6024169921875, 915.1700439453125, -464.8529357910156, 581.1267700195312, 518.7780151367188, -154.61386108398438, -149.0266571044922, 274.8717041015625, -65.802001953125, 479.7236328125, 301.1214599609375, 1542.8944091796875, -30.71109962463379, 1209.7406005859375, 703.90087890625, 1137.321533203125, 106.86524963378906, 387.7564697265625, 656.4102783203125, 1516.6156005859375, 1196.4603271484375, 586.8968505859375, -393.7590637207031, 565.0569458007812, 851.8216552734375, 849.88818359375, -574.541259765625, 1382.4381103515625, -422.58282470703125, 2196.323974609375, 712.4620361328125, 1196.3209228515625, 603.92724609375, 1468.352783203125, -297.5545349121094, 754.67578125, 52.33448791503906, 466.7868347167969, 184.17779541015625, -627.837890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000658.npy"} +{"epoch": 0.9662261380323054, "step": 659, "batch_size": 64, "mean": 410.4945068359375, "std": 606.286865234375, "min": -792.6492309570312, "p10": -232.67102355957027, "median": 360.42051696777344, "p90": 1162.0375244140628, "max": 2621.2294921875, "pos_frac": 0.75, "sample": [686.4600219726562, -24.092498779296875, 185.83660888671875, 943.46875, -91.98033142089844, -250.79579162597656, 1221.4151611328125, -347.4940185546875, -554.1766967773438, -141.5944061279297, 1106.8687744140625, 335.90655517578125, 551.7704467773438, -50.84013366699219, 76.67433166503906, 176.16766357421875, 614.4139404296875, 174.50485229492188, 225.9263916015625, 158.31423950195312, 346.9106140136719, 546.7618408203125, -550.890625, 2621.2294921875, -792.6492309570312, 1369.3828125, 391.6597595214844, 619.795654296875, 1253.0948486328125, 320.481201171875, 385.8417663574219, 381.8612365722656, 0.38999366760253906, -43.76826477050781, 575.9168701171875, 373.930419921875, 10.098800659179688, -0.4695701599121094, 59.741912841796875, 1185.6812744140625, -623.9349975585938, -11.458122253417969, 752.4548950195312, -190.37989807128906, 725.3429565429688, 75.93817138671875, -60.340110778808594, 684.6348266601562, 1792.7523193359375, 1839.853759765625, 849.4213256835938, 220.3753204345703, 811.7774658203125, 465.0611267089844, 397.82489013671875, 293.2962646484375, 1021.1118774414062, 1087.8514404296875, 547.7882080078125, -323.2558898925781, 832.4710693359375, 102.71607971191406, 444.90399169921875, 483.6865234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000659.npy"} +{"epoch": 0.9676945668135095, "step": 660, "batch_size": 64, "mean": 535.7288818359375, "std": 686.8460693359375, "min": -1109.2269287109375, "p10": -210.54115905761716, "median": 456.9500274658203, "p90": 1485.5112548828126, "max": 2505.447021484375, "pos_frac": 0.765625, "sample": [-146.77210998535156, 302.09552001953125, 1149.1578369140625, 477.6556091308594, 471.52752685546875, 479.89837646484375, 660.6152954101562, 1493.1114501953125, 1160.358642578125, 889.4780883789062, 328.628662109375, -420.87005615234375, 1218.00927734375, 612.8610229492188, 906.37451171875, 1753.377197265625, -201.7771453857422, -41.79078674316406, -186.2024383544922, -289.4880065917969, 1153.4976806640625, 1587.18310546875, 81.27112579345703, 442.3725280761719, 804.5443725585938, 1964.881591796875, 329.3546142578125, -214.2971649169922, 780.9395751953125, 371.2578125, 111.5371322631836, 1467.7774658203125, -587.6585083007812, 2505.447021484375, 294.6852111816406, 649.3755493164062, 815.8823852539062, 72.20161437988281, 440.6597900390625, 835.3905639648438, 257.4973449707031, 110.43086242675781, 751.3684692382812, 850.4095458984375, -145.19189453125, -61.611114501953125, 123.46524810791016, 1684.071533203125, 961.8048706054688, 1124.8023681640625, -328.8509521484375, -20.400184631347656, -569.1102294921875, 701.675537109375, 1055.9508056640625, 636.4679565429688, 137.28463745117188, 326.2316589355469, 598.5558471679688, 2049.13232421875, -150.811279296875, -1109.2269287109375, 361.69110107421875, 418.4600524902344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000660.npy"} +{"epoch": 0.9691629955947136, "step": 661, "batch_size": 64, "mean": 465.5032958984375, "std": 677.452392578125, "min": -772.16943359375, "p10": -112.08723831176756, "median": 369.09193420410156, "p90": 1090.4141967773446, "max": 3463.065673828125, "pos_frac": 0.828125, "sample": [206.01866149902344, -98.8824234008789, 851.2257080078125, -50.81776428222656, 370.9518127441406, 71.58134460449219, 586.7015380859375, -193.26889038085938, 676.8629150390625, 301.046875, 1173.67822265625, 329.30224609375, 651.1369018554688, 617.3248291015625, 3463.065673828125, 866.23828125, 192.8802490234375, 29.171051025390625, -772.16943359375, 130.9844970703125, -32.89048385620117, 840.1377563476562, 138.45419311523438, 402.54736328125, 253.7222900390625, 208.28553771972656, 319.82122802734375, 896.1314697265625, 1300.921142578125, 54.95771789550781, 379.16864013671875, 649.8655395507812, 548.1514892578125, 1226.051025390625, 532.5082397460938, 299.58184814453125, 144.05340576171875, 313.2959289550781, 243.025146484375, 595.6913452148438, -590.285400390625, 367.2320556640625, -117.74644470214844, 783.9092407226562, 864.0511474609375, -654.5482177734375, 695.404052734375, 712.7979736328125, -13.933618545532227, 670.4627075195312, 2972.087890625, -620.9369506835938, 356.2706298828125, 36.66447448730469, 828.7877197265625, 1509.48193359375, 84.01622772216797, 574.12353515625, -132.73309326171875, 1325.5045166015625, 480.714599609375, 496.52398681640625, 447.71185302734375, 0.13568878173828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000661.npy"} +{"epoch": 0.9706314243759178, "step": 662, "batch_size": 64, "mean": 375.7431335449219, "std": 716.1057739257812, "min": -1233.657470703125, "p10": -551.084161376953, "median": 413.9751281738281, "p90": 1176.2091796875002, "max": 2438.3154296875, "pos_frac": 0.71875, "sample": [-53.514366149902344, 901.9761962890625, 1267.05224609375, -1233.657470703125, 621.3800048828125, -504.52471923828125, 447.32061767578125, 1064.963134765625, 1909.563232421875, 24.441566467285156, 891.450927734375, -571.0382080078125, -1133.2783203125, -226.37013244628906, 1479.47265625, 77.97411346435547, -9.033456802368164, 417.8251953125, 2438.3154296875, 860.6705322265625, 1191.273193359375, -180.28680419921875, 410.12506103515625, -156.231689453125, 145.88720703125, 835.489501953125, 130.34434509277344, 1029.6907958984375, 475.0544128417969, 328.24249267578125, 391.9541015625, -257.3013916015625, 396.5936279296875, -347.0545959472656, 495.41119384765625, 1082.684814453125, 633.7841796875, 430.6678771972656, 690.4268798828125, 634.6729736328125, 34.50315856933594, 143.3159942626953, 966.5218505859375, 2083.486328125, 728.2730712890625, 1355.583740234375, -849.14404296875, 466.0769958496094, 1141.059814453125, 395.65960693359375, -619.76611328125, 50.11355209350586, -94.53526306152344, 587.7451782226562, -651.1109008789062, -210.54434204101562, 600.8978271484375, 578.4786376953125, 342.2068176269531, 548.1912841796875, 594.5272827148438, -892.094482421875, -296.406982421875, 12.104415893554688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000662.npy"} +{"epoch": 0.9720998531571219, "step": 663, "batch_size": 64, "mean": 447.7265930175781, "std": 760.4644775390625, "min": -1725.1246337890625, "p10": -230.35004119873037, "median": 306.31959533691406, "p90": 1197.1114868164063, "max": 3335.766845703125, "pos_frac": 0.765625, "sample": [904.897705078125, 315.1590576171875, 856.2091674804688, 51.440948486328125, 414.2200927734375, 1154.1468505859375, 212.85679626464844, 297.4801330566406, -58.224853515625, 375.5027770996094, 80.79888153076172, 147.6477508544922, 1512.0777587890625, 883.9238891601562, 63.77702331542969, 950.1810913085938, 714.8406982421875, 30.9602108001709, -1725.1246337890625, 315.38885498046875, 280.522705078125, 580.0580444335938, -324.41265869140625, 769.8816528320312, 692.0972900390625, 373.5980224609375, 694.2766723632812, 143.02243041992188, 460.9901123046875, 3335.766845703125, -320.333740234375, 1148.02978515625, 1675.8509521484375, -5.8073577880859375, 599.8054809570312, 2279.760498046875, 1638.2628173828125, 73.72923278808594, 30.059175491333008, 1998.7388916015625, 1103.8428955078125, -267.8226623535156, 158.82958984375, 382.03948974609375, -136.8524169921875, 660.2676391601562, 1077.2554931640625, 154.7359161376953, 138.14126586914062, 1015.229736328125, 230.44935607910156, -73.6956558227539, 1215.52490234375, -1149.720458984375, 870.7950439453125, -48.44124221801758, -554.1622314453125, -142.91392517089844, 72.37826538085938, 714.9409790039062, -345.02801513671875, -72.98807525634766, -111.53463745117188, 141.1739501953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000663.npy"} +{"epoch": 0.973568281938326, "step": 664, "batch_size": 64, "mean": 511.424560546875, "std": 710.501220703125, "min": -849.461181640625, "p10": -229.18840179443356, "median": 422.4744567871094, "p90": 1339.481494140625, "max": 2515.443359375, "pos_frac": 0.734375, "sample": [1139.924560546875, -39.44793701171875, 597.1029052734375, 682.5516357421875, 891.042236328125, -81.2068862915039, 369.1539306640625, 2095.865234375, -122.2102279663086, 1603.1341552734375, -477.4599609375, 762.7429809570312, 472.05157470703125, 324.9580383300781, 564.7250366210938, 240.6344451904297, -849.461181640625, 445.1011962890625, -71.5020980834961, 551.8833618164062, 1350.6854248046875, 1430.333251953125, 87.40264892578125, 1197.98486328125, -302.4676818847656, 123.34835815429688, 2152.671875, -192.22250366210938, 580.115234375, -483.3280334472656, 1191.397216796875, 163.32498168945312, -168.19293212890625, 1214.402587890625, 519.7014770507812, 415.5909729003906, -766.6781616210938, 429.3579406738281, 703.5972900390625, 1147.284423828125, 155.0816192626953, 542.6842041015625, 1027.54833984375, -92.026123046875, 977.4496459960938, 2228.73095703125, 998.2362060546875, 239.26846313476562, 2515.443359375, -191.365478515625, -162.55728149414062, 1313.3389892578125, 1199.905029296875, 233.3221893310547, 218.20504760742188, 542.294189453125, 206.77920532226562, 330.79296875, -245.0309295654297, -53.733734130859375, 401.0891418457031, 5.175445556640625, 872.4642944335938, -425.81585693359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000664.npy"} +{"epoch": 0.9750367107195301, "step": 665, "batch_size": 64, "mean": 426.11090087890625, "std": 537.1138916015625, "min": -692.0691528320312, "p10": -124.4433479309082, "median": 371.4980010986328, "p90": 1165.7152587890628, "max": 1807.155029296875, "pos_frac": 0.78125, "sample": [-125.2343978881836, 25.89175033569336, 439.0529479980469, 405.1513977050781, -114.5959243774414, 452.9664306640625, 666.02978515625, -64.53271484375, 1573.406982421875, 441.8045654296875, 314.20806884765625, 561.2337646484375, 120.45553588867188, -519.1315307617188, 620.3045654296875, 82.6361083984375, -68.06314086914062, 238.93740844726562, -31.833847045898438, 545.4356079101562, 769.6808471679688, 211.98377990722656, 461.525146484375, 262.9261779785156, -84.85905456542969, 316.8825988769531, -692.0691528320312, 1568.9356689453125, 766.4189453125, -29.956119537353516, 780.9472045898438, 309.612548828125, 674.3849487304688, 394.1161804199219, 1189.218017578125, 229.6259765625, 1208.2513427734375, 544.60791015625, 465.1910095214844, 216.78892517089844, 261.2709045410156, 1732.669189453125, 591.09375, -340.5179443359375, 608.0776977539062, 906.507080078125, 348.87982177734375, -484.24725341796875, 561.8385009765625, 1110.87548828125, 182.7938995361328, 187.03074645996094, 939.8101196289062, 223.82302856445312, 1709.824951171875, -244.1142120361328, 704.26904296875, -192.51235961914062, 157.92965698242188, 904.7545166015625, 1807.155029296875, 51.93769836425781, 536.2088012695312, -122.59756469726562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000665.npy"} +{"epoch": 0.9765051395007343, "step": 666, "batch_size": 64, "mean": 424.9931640625, "std": 626.6178588867188, "min": -996.3318481445312, "p10": -346.2975708007812, "median": 330.5122375488281, "p90": 1065.2236450195312, "max": 2719.95849609375, "pos_frac": 0.75, "sample": [762.211669921875, 373.90545654296875, 719.5465698242188, 411.97821044921875, -414.8540344238281, 705.4511108398438, 1067.625, 995.6845703125, 141.93399047851562, 1045.3856201171875, -36.13835906982422, 272.3442687988281, 371.5265808105469, -496.168212890625, -365.69342041015625, 223.57749938964844, 260.284423828125, -142.78045654296875, -114.47772216796875, -83.42303466796875, 1317.9093017578125, -7.335990905761719, 762.3317260742188, 455.31298828125, 2106.30029296875, -8.583511352539062, 812.3555297851562, 321.8121337890625, -164.21945190429688, -682.1165161132812, 328.6189270019531, 738.3411865234375, 304.55096435546875, 1160.1103515625, 918.0474853515625, 457.93145751953125, 937.9801635742188, -369.600830078125, 408.5360107421875, 599.4705810546875, 48.7943229675293, 757.2938842773438, -301.04058837890625, 1408.060791015625, 380.2408447265625, 256.61041259765625, -996.3318481445312, 1059.6204833984375, 329.1658630371094, 59.404937744140625, 424.00604248046875, 731.9417114257812, 836.089111328125, -28.531612396240234, 221.93701171875, 146.0821533203125, 242.8143310546875, 318.6068115234375, 2719.95849609375, 247.29347229003906, -532.271484375, 954.4364013671875, 331.8586120605469, 1487.846923828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000666.npy"} +{"epoch": 0.9779735682819384, "step": 667, "batch_size": 64, "mean": 507.36602783203125, "std": 673.9800415039062, "min": -815.4957885742188, "p10": -269.30458374023436, "median": 486.3523406982422, "p90": 1294.4839965820313, "max": 2346.6611328125, "pos_frac": 0.8125, "sample": [466.0671691894531, -581.4059448242188, 684.3840942382812, -707.5418090820312, -77.86117553710938, 1004.9617919921875, 1010.1485595703125, 611.1516723632812, 529.1193237304688, 1301.7608642578125, 269.576904296875, 169.19314575195312, 105.66039276123047, 380.673583984375, 323.06304931640625, -815.4957885742188, 757.3226928710938, 546.3109741210938, 1791.1258544921875, 930.4663696289062, 782.0590209960938, 277.0075988769531, 1277.504638671875, 59.739967346191406, 647.21142578125, 310.23944091796875, 972.4899291992188, -199.2449951171875, 989.7244873046875, 843.4273071289062, 761.2156982421875, 635.871337890625, 1093.49267578125, 651.36767578125, 442.6544494628906, -244.86807250976562, 411.64117431640625, 506.63751220703125, 385.5777282714844, 6.621391296386719, -92.16697692871094, 1604.6085205078125, 1860.5169677734375, 1.9739990234375, 726.2849731445312, 1506.6998291015625, 727.9713745117188, 200.74728393554688, 134.8646240234375, 793.025634765625, 6.053306579589844, 877.2757568359375, -487.524658203125, 158.22877502441406, 2322.224853515625, 2346.6611328125, 79.07762908935547, 1047.86474609375, -642.048095703125, 262.3110656738281, -668.2569580078125, -279.7773742675781, -101.07431030273438, 776.8314819335938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000667.npy"} +{"epoch": 0.9794419970631424, "step": 668, "batch_size": 64, "mean": 348.3375244140625, "std": 792.5498657226562, "min": -1351.2894287109375, "p10": -572.6430236816406, "median": 282.11138916015625, "p90": 1286.4211181640626, "max": 2743.81591796875, "pos_frac": 0.703125, "sample": [524.234619140625, 273.0933837890625, 1342.17138671875, -1348.9173583984375, 165.57447814941406, 966.7347412109375, 1463.1480712890625, 217.7235565185547, 1091.91748046875, 475.60467529296875, -679.4807739257812, 705.8533325195312, 18.55681610107422, 71.2088623046875, 358.43157958984375, 1074.2059326171875, 253.3233184814453, 27.453872680664062, -492.8890380859375, -606.8233032226562, -969.09228515625, 1005.0530395507812, -449.94964599609375, 827.9644165039062, -177.94271850585938, 837.2213745117188, 357.8525695800781, 461.2839050292969, 36.93272399902344, 693.05419921875, 336.025146484375, 654.6953735351562, 1250.1279296875, -475.2087707519531, 1061.600830078125, 412.19842529296875, -896.6778564453125, 1110.0653076171875, 74.13227081298828, -234.25961303710938, 184.17498779296875, -70.94306182861328, 1494.115234375, -175.15074157714844, -620.4187622070312, 1221.8338623046875, -135.52032470703125, -464.38409423828125, 299.7160339355469, -32.560707092285156, 309.6898193359375, 222.1920166015625, 291.12939453125, 247.63015747070312, -201.68734741210938, 1301.975341796875, 404.6623229980469, 338.23065185546875, 2420.83447265625, -1351.2894287109375, -95.68846893310547, 2085.578857421875, 59.464202880859375, 2743.81591796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000668.npy"} +{"epoch": 0.9809104258443465, "step": 669, "batch_size": 64, "mean": 507.1175842285156, "std": 676.0285034179688, "min": -1356.3023681640625, "p10": -228.6688720703125, "median": 434.3072052001953, "p90": 1442.2489013671877, "max": 2602.919677734375, "pos_frac": 0.8125, "sample": [194.2798309326172, 53.215240478515625, -67.4372787475586, 1055.9583740234375, -890.6073608398438, 298.13018798828125, 695.6976928710938, 740.9987182617188, 384.364501953125, -83.13042449951172, 2602.919677734375, -231.77798461914062, 26.762680053710938, 330.51153564453125, -299.9055480957031, 396.2580871582031, 1555.0626220703125, -62.94927215576172, 119.97272491455078, 210.0294952392578, 547.170654296875, 779.1165161132812, 1590.792236328125, 1321.590087890625, 77.89445495605469, 1750.38623046875, 50.8875732421875, 1521.00146484375, 1116.098876953125, 63.80051040649414, 582.3292236328125, 1296.5423583984375, -221.41427612304688, 579.6376953125, 1460.655029296875, -598.847900390625, -288.2176208496094, 824.8696899414062, 815.010009765625, 903.0178833007812, 1399.30126953125, 371.77593994140625, 129.13623046875, 324.2405700683594, -1356.3023681640625, 837.74853515625, 940.7730712890625, 623.7074584960938, 428.42376708984375, 268.24102783203125, -546.2671508789062, 1014.0685424804688, 440.1906433105469, 491.9889831542969, 606.8343505859375, 781.3505249023438, 72.47750091552734, 252.98208618164062, 506.3759460449219, 1349.8546142578125, -36.51565170288086, 1525.15380859375, 446.5715637207031, 382.7388610839844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000669.npy"} +{"epoch": 0.9823788546255506, "step": 670, "batch_size": 64, "mean": 519.20654296875, "std": 697.5200805664062, "min": -1307.198974609375, "p10": -274.8091552734375, "median": 378.0685272216797, "p90": 1460.0151611328126, "max": 1915.34130859375, "pos_frac": 0.75, "sample": [204.33535766601562, 509.56219482421875, 240.0787353515625, 1462.798583984375, 479.2277526855469, -215.64776611328125, -209.83016967773438, -22.04136848449707, 1077.1431884765625, 617.1071166992188, 297.8890380859375, -112.50067138671875, -1307.198974609375, 1915.34130859375, 1249.6339111328125, 1801.450927734375, 752.5563354492188, 389.7464294433594, 1229.23193359375, 615.418212890625, 174.6421661376953, -571.0606079101562, 1417.2930908203125, -263.6898193359375, -579.7752075195312, 366.390625, 1551.4234619140625, 34.145511627197266, 1081.206787109375, -524.207275390625, 1551.2325439453125, 820.281005859375, -245.87709045410156, 252.56964111328125, 221.0182647705078, 1433.8634033203125, 1280.14794921875, -313.439453125, 201.24478149414062, 1401.4903564453125, 406.11871337890625, 1234.6026611328125, 1043.72265625, -234.75640869140625, 1910.6968994140625, 859.753662109375, 326.61541748046875, 333.2170104980469, -354.6768798828125, -279.5745849609375, -1.498443603515625, 1453.5205078125, 244.50726318359375, 363.53070068359375, 1531.5438232421875, 191.57571411132812, 290.22998046875, 1110.08984375, -17.8199462890625, 922.7213134765625, 458.6496887207031, 511.51031494140625, 67.7723388671875, 593.961669921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000670.npy"} +{"epoch": 0.9838472834067548, "step": 671, "batch_size": 64, "mean": 397.42791748046875, "std": 700.272216796875, "min": -1929.19873046875, "p10": -219.7996841430664, "median": 349.6846618652344, "p90": 1284.2734130859376, "max": 1926.9512939453125, "pos_frac": 0.765625, "sample": [54.18247985839844, -1929.19873046875, 498.723876953125, 59.278106689453125, -151.50238037109375, 762.86865234375, 234.62863159179688, 322.65350341796875, 746.8355712890625, 333.71002197265625, 411.22821044921875, -202.37271118164062, 168.59449768066406, 1164.57666015625, 1004.6109008789062, 71.61119079589844, 262.5414733886719, 180.68646240234375, 879.8157958984375, 785.6517944335938, 100.91507720947266, 1452.1170654296875, -1659.353759765625, 1300.5736083984375, 817.1222534179688, -43.39649963378906, 1901.861083984375, 446.4599914550781, 230.509521484375, 577.2200317382812, -152.973876953125, -610.5736083984375, -425.4097595214844, 469.51715087890625, 1246.2396240234375, -200.10052490234375, 365.6593017578125, 321.1417541503906, 670.0642700195312, 1385.1365966796875, 1480.15234375, 420.2623596191406, -227.2683868408203, 106.08235931396484, 829.4771728515625, 4.9561004638671875, 266.4088134765625, 981.6412353515625, -443.8924255371094, -84.98233795166016, 729.9176635742188, 448.0680236816406, -94.4286880493164, 223.09735107421875, 1222.2752685546875, 923.553466796875, -167.90127563476562, -676.23095703125, 239.21546936035156, 1462.3822021484375, 516.9559326171875, 1926.9512939453125, 1116.746826171875, 380.0962219238281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000671.npy"} +{"epoch": 0.9853157121879589, "step": 672, "batch_size": 64, "mean": 414.93212890625, "std": 764.0545043945312, "min": -1268.8248291015625, "p10": -526.590249633789, "median": 369.20790100097656, "p90": 1329.1379394531252, "max": 2800.21142578125, "pos_frac": 0.6875, "sample": [442.64849853515625, -2.808197021484375, -333.2720947265625, 171.29269409179688, -228.67042541503906, 78.43763732910156, 866.5220947265625, 391.37744140625, 1696.6351318359375, 385.3525695800781, 121.15605926513672, -123.29688262939453, -419.4098205566406, -258.8343811035156, 615.541259765625, 280.7091369628906, -151.11209106445312, -714.6356201171875, 436.05255126953125, 1756.4425048828125, 87.77911376953125, 842.64892578125, -27.81220245361328, -121.00595092773438, 1274.9669189453125, -1268.8248291015625, 1107.76171875, -953.31103515625, 455.94244384765625, 2800.21142578125, -159.62625122070312, 942.3049926757812, 1034.417724609375, 1286.30224609375, 1139.5968017578125, 898.438232421875, 553.974853515625, -572.5247192382812, 211.73263549804688, 2079.218994140625, -968.449951171875, -59.375511169433594, 353.063232421875, 1224.2401123046875, 849.4878540039062, 593.0718994140625, 908.4149780273438, 199.5380859375, 787.5089111328125, 636.7235107421875, 1495.587890625, 119.44839477539062, -88.49740600585938, 769.8221435546875, 58.279327392578125, 752.1035766601562, 1347.49609375, 82.67949676513672, -52.56712341308594, -758.9764404296875, 594.6610717773438, 333.53924560546875, 1498.9888916015625, -743.45361328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000672.npy"} +{"epoch": 0.986784140969163, "step": 673, "batch_size": 64, "mean": 385.8390808105469, "std": 664.5762939453125, "min": -1409.4495849609375, "p10": -317.77699279785156, "median": 397.81431579589844, "p90": 1088.81224975586, "max": 3374.142578125, "pos_frac": 0.78125, "sample": [-204.2537078857422, -682.4207153320312, 498.5799865722656, 939.5962524414062, 347.63995361328125, 493.44891357421875, 652.58251953125, 393.2644348144531, 1302.0126953125, 420.6733093261719, -182.93821716308594, 1513.52197265625, 499.3146057128906, 635.96484375, 128.9696502685547, 87.09814453125, -453.62432861328125, 801.7442016601562, 1546.114501953125, 336.8306579589844, -322.18621826171875, 373.06329345703125, 355.64825439453125, -93.49716186523438, 131.5520477294922, -1409.4495849609375, 216.40780639648438, 684.9635009765625, -454.7898864746094, 175.82017517089844, 750.9747314453125, 1437.152099609375, 170.7940216064453, -64.46888732910156, 757.7263793945312, 540.9125366210938, 402.36419677734375, -603.2490844726562, 516.857177734375, 861.412109375, 752.8140869140625, 51.60596466064453, 180.78042602539062, 771.72314453125, 405.3548583984375, 75.47284698486328, 562.2779541015625, -177.9340057373047, 477.5632019042969, 3374.142578125, 99.5886459350586, 94.34719848632812, 549.9451904296875, 648.173583984375, -802.0240478515625, 537.3780517578125, 461.79986572265625, 1152.761962890625, 157.77557373046875, -307.4888000488281, -92.5197982788086, 627.3168334960938, 1271.1246337890625, 319.59417724609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000673.npy"} +{"epoch": 0.9882525697503671, "step": 674, "batch_size": 64, "mean": 510.9061584472656, "std": 687.9755249023438, "min": -565.6873779296875, "p10": -278.332844543457, "median": 479.3643035888672, "p90": 1209.1786865234378, "max": 3177.885498046875, "pos_frac": 0.75, "sample": [-104.38787841796875, 38.279781341552734, 849.1300659179688, 739.0770874023438, 531.990966796875, 804.6478271484375, 46.13993835449219, -74.44035339355469, -517.0420532226562, 1237.594970703125, 1598.3402099609375, 952.3455810546875, -12.987651824951172, 537.1049194335938, -233.0305633544922, 508.6763916015625, -152.96231079101562, 2021.409423828125, 635.567138671875, 3177.885498046875, 564.0936889648438, 498.58734130859375, 12.262615203857422, 1018.9518432617188, 531.0086059570312, -297.74810791015625, 1091.99072265625, 460.1412658691406, 693.964599609375, -126.033203125, 1142.8740234375, 2073.91015625, -143.44131469726562, 336.2969665527344, 418.39898681640625, 812.6083984375, -344.46356201171875, -416.35687255859375, -378.0220642089844, 914.9451293945312, 643.401611328125, 434.0415954589844, 301.21087646484375, 1685.6875, 342.6893615722656, 1030.4102783203125, 892.6744384765625, 977.9464721679688, 538.823486328125, 114.69526672363281, -565.6873779296875, 185.2712860107422, 1865.4735107421875, -331.3750305175781, -39.767539978027344, 700.3242797851562, 175.97607421875, 790.462158203125, 414.9536437988281, 585.3206787109375, 147.64834594726562, -170.1992645263672, 193.71786499023438, 336.98724365234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000674.npy"} +{"epoch": 0.9897209985315712, "step": 675, "batch_size": 64, "mean": 518.9458618164062, "std": 716.4024658203125, "min": -820.1660766601562, "p10": -139.90013122558588, "median": 355.6718292236328, "p90": 1495.7395263671879, "max": 2914.3466796875, "pos_frac": 0.765625, "sample": [234.8284454345703, 1538.683837890625, 543.2412719726562, 105.46282958984375, 52.939537048339844, 156.7241668701172, 828.1640014648438, -49.37477111816406, 616.4024047851562, 306.9973449707031, 867.435546875, -187.3704833984375, 602.14013671875, 1150.376220703125, -159.252685546875, -33.821075439453125, 160.7373046875, 1418.8330078125, 477.4484558105469, 482.0223693847656, 746.3930053710938, -13.347999572753906, 2465.978515625, 786.1358642578125, -73.19721221923828, 381.0769348144531, -5.089725494384766, 723.8683471679688, 554.8684692382812, 467.97833251953125, 369.388916015625, 1609.33740234375, 303.9129638671875, 1964.106689453125, 15.567657470703125, 1213.9017333984375, 2914.3466796875, -212.5266876220703, 987.1287231445312, 2168.63623046875, 192.43067932128906, 128.88790893554688, -19.35663604736328, 83.29541015625, 313.72247314453125, -94.74417114257812, -820.1660766601562, 255.06027221679688, 668.0297241210938, 255.41714477539062, 181.71180725097656, 1528.699462890625, 674.8861083984375, -585.2145385742188, -605.2260131835938, 372.9097900390625, 96.597900390625, 735.349609375, -64.36520385742188, 1381.5592041015625, 341.9547424316406, 1169.383056640625, 1030.58056640625, -489.9493713378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000675.npy"} +{"epoch": 0.9911894273127754, "step": 676, "batch_size": 64, "mean": 493.9117431640625, "std": 616.9157104492188, "min": -922.5384521484375, "p10": -124.51998291015623, "median": 424.3007354736328, "p90": 1383.738037109375, "max": 2491.1787109375, "pos_frac": 0.75, "sample": [55.243858337402344, 1594.458740234375, 880.0453491210938, 1046.4659423828125, -63.67198181152344, -101.4223403930664, 416.9076232910156, 755.276123046875, 40.095577239990234, -922.5384521484375, 672.0281982421875, 32.465484619140625, -94.75544738769531, 1234.68603515625, 1755.204833984375, 2491.1787109375, -76.58069610595703, -190.0640106201172, -49.54148864746094, -167.86337280273438, -43.111167907714844, 530.690185546875, 696.5882568359375, -106.73001098632812, -132.14425659179688, 787.7349853515625, 99.44972229003906, 1379.520263671875, 301.8173522949219, 1691.8460693359375, -48.698089599609375, 592.8612060546875, 254.57424926757812, 769.716796875, 118.30017852783203, 857.4393920898438, 1552.1357421875, 452.83905029296875, 614.37939453125, 1431.85986328125, 706.3848876953125, -44.792808532714844, 199.4797821044922, -329.71295166015625, 1306.0018310546875, 967.802001953125, 63.980010986328125, 898.9541625976562, 491.611083984375, 431.69384765625, 951.1079711914062, 595.5706787109375, 263.7566223144531, 388.8267822265625, 59.89081573486328, 250.198974609375, 886.90380859375, -157.8240966796875, 1385.545654296875, 139.9707489013672, 500.4993896484375, 237.46612548828125, -250.9644317626953, 559.3146362304688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000676.npy"} +{"epoch": 0.9926578560939795, "step": 677, "batch_size": 64, "mean": 468.4746398925781, "std": 772.2254028320312, "min": -919.8201904296875, "p10": -254.76405334472653, "median": 343.50836181640625, "p90": 1247.7485229492188, "max": 3883.083251953125, "pos_frac": 0.765625, "sample": [140.1356201171875, 3127.23583984375, 488.5137023925781, 37.013877868652344, 469.8952331542969, -318.7032470703125, -157.57949829101562, -234.44540405273438, 410.3323059082031, -14.8131103515625, 556.4862060546875, 504.8413391113281, 73.94468688964844, 1362.751708984375, 278.8951416015625, 945.0103149414062, 854.889892578125, 47.77070617675781, 267.42669677734375, 245.8472137451172, 1213.613525390625, 245.1867218017578, 1093.252197265625, -264.54534912109375, 360.51312255859375, 1354.7662353515625, 100.73365783691406, 487.7247009277344, 351.49066162109375, 1158.3548583984375, 335.52606201171875, -47.627532958984375, -186.92506408691406, 218.65078735351562, 149.947998046875, -665.2377319335938, 892.7032470703125, 287.11175537109375, 3883.083251953125, 167.71575927734375, 209.10867309570312, 492.56561279296875, 935.0775756835938, 717.9618530273438, -174.0248565673828, 1262.3778076171875, 377.5679016113281, 286.0374450683594, 805.852294921875, 254.53211975097656, 1458.869873046875, -44.9471321105957, 790.9202880859375, 374.6562805175781, 590.5218505859375, -469.3191833496094, -919.8201904296875, 1801.6787109375, 597.7904663085938, 745.2075805664062, 860.1551513671875, -263.4720458984375, -827.5150756835938, -100.89456176757812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000677.npy"} +{"epoch": 0.9941262848751835, "step": 678, "batch_size": 64, "mean": 566.3223876953125, "std": 802.6676635742188, "min": -1907.691650390625, "p10": -194.74255981445307, "median": 501.8385772705078, "p90": 1262.2265869140626, "max": 3234.33447265625, "pos_frac": 0.8125, "sample": [263.1174621582031, 1004.112060546875, 570.8531494140625, 632.111572265625, 552.010986328125, -123.56936645507812, 548.6631469726562, -1907.691650390625, 78.1695556640625, -218.72970581054688, -87.44309997558594, 1230.3099365234375, 40.88481521606445, 1222.3692626953125, -138.77255249023438, 572.41064453125, 892.6360473632812, -340.12420654296875, 935.3771362304688, 889.0678100585938, 1095.89794921875, 1275.9051513671875, 14.811874389648438, 2484.21435546875, 1213.3922119140625, 3234.33447265625, 97.08891296386719, 335.53570556640625, -13.049945831298828, 1186.5733642578125, -598.1207885742188, 154.24337768554688, 225.07232666015625, 813.5177612304688, 202.0040283203125, 1031.4803466796875, 170.56826782226562, 1208.8638916015625, 271.38909912109375, 934.2796020507812, -224.68821716308594, 1021.6325073242188, 1320.594482421875, 1668.0645751953125, 352.48492431640625, 271.047607421875, 2588.108642578125, 2360.363037109375, 127.317626953125, 542.3515625, 347.6397705078125, -650.035400390625, 465.6736145019531, 7.1812286376953125, 54.79766082763672, 113.93861389160156, 643.6760864257812, 1121.6707763671875, 564.2233276367188, 538.0035400390625, 393.7684020996094, -19.64449691772461, 956.877685546875, -274.1750183105469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000678.npy"} +{"epoch": 0.9955947136563876, "step": 679, "batch_size": 64, "mean": 384.89459228515625, "std": 635.8862915039062, "min": -784.001953125, "p10": -308.50939331054684, "median": 288.9309387207031, "p90": 1340.2936767578128, "max": 2074.14306640625, "pos_frac": 0.71875, "sample": [462.353515625, 8.557378768920898, 1276.865234375, 2074.14306640625, 446.2267150878906, 804.1776733398438, 896.5016479492188, 417.8753967285156, 1367.477294921875, 344.7701110839844, 404.8360595703125, 474.731201171875, 412.7898864746094, 135.2190399169922, 1035.573486328125, -390.0237121582031, -163.75375366210938, 102.73104858398438, 238.35003662109375, 915.2022705078125, 712.186767578125, 1551.4110107421875, -144.0060577392578, 110.49420166015625, 113.56715393066406, -318.7541809082031, 420.167236328125, 1571.04443359375, -482.0688781738281, 517.087890625, 832.55224609375, 64.55601501464844, -68.91690063476562, -46.67925262451172, 1133.119140625, 370.61138916015625, 1639.998779296875, 145.99453735351562, -219.40045166015625, 109.37064361572266, 317.3063659667969, -187.8017120361328, 464.9566955566406, -45.286224365234375, -397.2277526855469, 1535.259765625, 260.5555114746094, -284.6048889160156, -784.001953125, -198.92758178710938, 944.6167602539062, -676.5075073242188, 654.1292724609375, -228.3404998779297, 512.62744140625, 259.0180358886719, -141.33731079101562, 256.90728759765625, 724.8121948242188, 2029.886962890625, 234.09091186523438, 669.6287841796875, -692.72998046875, 129.2821807861328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000679.npy"} +{"epoch": 0.9970631424375918, "step": 680, "batch_size": 64, "mean": 513.0979614257812, "std": 775.045654296875, "min": -2445.012451171875, "p10": -207.76476745605464, "median": 387.95489501953125, "p90": 1565.10712890625, "max": 2389.80517578125, "pos_frac": 0.828125, "sample": [1197.55322265625, 392.0730895996094, 418.60906982421875, 1001.1041259765625, 187.37876892089844, 1157.172607421875, 1855.724853515625, 552.0760498046875, 171.79507446289062, 383.8367004394531, 459.1632080078125, 200.8091583251953, 835.004638671875, -443.1797180175781, 1116.8941650390625, 1560.41357421875, 668.5691528320312, -681.72265625, -304.61419677734375, 937.8544921875, 1139.093994140625, 201.60919189453125, -131.1861114501953, 334.4892272949219, -222.12570190429688, 1054.254150390625, 834.87255859375, 182.5266571044922, 718.995361328125, 1527.895263671875, 298.89990234375, 28.68146324157715, 152.1383056640625, 1778.139892578125, 1065.791015625, 195.32652282714844, 200.1466522216797, 520.0185546875, 1984.1297607421875, -726.5140991210938, 144.07537841796875, 310.7897033691406, 484.62139892578125, 628.468017578125, 150.91029357910156, -2445.012451171875, 1826.2982177734375, -174.25592041015625, -593.4236450195312, 1567.11865234375, 656.3828125, 2389.80517578125, 21.136337280273438, 68.02861022949219, 136.61398315429688, 1491.6572265625, 699.1219482421875, 1605.7669677734375, -71.00161743164062, 183.31607055664062, -125.11349487304688, 338.5984191894531, 9.341766357421875, 731.3280639648438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000680.npy"} +{"epoch": 0.9985315712187959, "step": 681, "batch_size": 64, "mean": 404.0209655761719, "std": 667.7122192382812, "min": -895.3251953125, "p10": -432.8394439697265, "median": 383.88702392578125, "p90": 1260.8903320312504, "max": 2256.61181640625, "pos_frac": 0.6875, "sample": [775.3059692382812, -329.9012451171875, 522.7007446289062, 98.95330810546875, 910.8958740234375, -895.3251953125, 52.790557861328125, 415.0904235839844, -458.0098571777344, 210.4754638671875, -205.30389404296875, 352.6836242675781, -453.401123046875, -458.61553955078125, 1299.167724609375, 1739.076416015625, 470.754638671875, -167.08282470703125, -274.99322509765625, 1080.9644775390625, -202.15185546875, -8.690574645996094, 882.35546875, 507.6947937011719, 1753.195556640625, 660.4850463867188, 221.06646728515625, 1071.638671875, 981.1290283203125, 696.8065795898438, 126.0271987915039, 469.9503173828125, 191.9859619140625, -108.75509643554688, -215.70849609375, 259.59234619140625, 229.61740112304688, -692.6953125, 453.6640319824219, -105.44889831542969, -405.04949951171875, 2256.61181640625, -731.147705078125, 922.3756713867188, -83.16683197021484, 557.3666381835938, -444.7494201660156, 205.5796661376953, 61.54408264160156, -338.3641357421875, 1067.67138671875, -335.3456726074219, 940.3013305664062, 562.0034790039062, 1373.5302734375, 527.7026977539062, 1171.576416015625, 981.903564453125, 1035.613525390625, 734.2894287109375, 178.99253845214844, 1393.9927978515625, 1406.0706787109375, 960.0538330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000681.npy"} diff --git a/margin_logs/step_0000001.npy b/margin_logs/step_0000001.npy new file mode 100644 index 0000000..248c095 --- /dev/null +++ b/margin_logs/step_0000001.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb7ed5e9b5d6de6c4e509dd17cf5d9c91337fabd0c174e116c5e60872823ad93 +size 384 diff --git a/margin_logs/step_0000002.npy b/margin_logs/step_0000002.npy new file mode 100644 index 0000000..984e4c2 --- /dev/null +++ b/margin_logs/step_0000002.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc23171824afa57340cda53f69d83aef67c7c0b95175e9ec4a3a7bc3c221bc4f +size 384 diff --git a/margin_logs/step_0000003.npy b/margin_logs/step_0000003.npy new file mode 100644 index 0000000..61e698b --- /dev/null +++ b/margin_logs/step_0000003.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47c27afc27dc292064b2a443398cac088d8462a36437e365530fb68703372a18 +size 384 diff --git a/margin_logs/step_0000004.npy b/margin_logs/step_0000004.npy new file mode 100644 index 0000000..c217e41 --- /dev/null +++ b/margin_logs/step_0000004.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:871a8743dc8549beb14e1694d86ee7de86c398e07359966138a11ff0e38ee6fd +size 384 diff --git a/margin_logs/step_0000005.npy b/margin_logs/step_0000005.npy new file mode 100644 index 0000000..bad4257 --- /dev/null +++ b/margin_logs/step_0000005.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a3308e70112a3bb82d899db3bf4db55d9dba8f4082f421027bf8bf940762b63 +size 384 diff --git a/margin_logs/step_0000006.npy b/margin_logs/step_0000006.npy new file mode 100644 index 0000000..0a02890 --- /dev/null +++ b/margin_logs/step_0000006.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fe6bf5be97f0ed58a6642a12cff72d0b607dfe819b1010ec07c3bf01557f790 +size 384 diff --git a/margin_logs/step_0000007.npy b/margin_logs/step_0000007.npy new file mode 100644 index 0000000..19252fb --- /dev/null +++ b/margin_logs/step_0000007.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afde5b4bd1a76964f0f61b8064db769b23542d5da95e2ad17ff4a2a03bb81cf5 +size 384 diff --git a/margin_logs/step_0000008.npy b/margin_logs/step_0000008.npy new file mode 100644 index 0000000..a3196af --- /dev/null +++ b/margin_logs/step_0000008.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b3f5bdaaeb1653e825b3e53929ac6ceee12c4971d7a73535026c4cbd0c3270b +size 384 diff --git a/margin_logs/step_0000009.npy b/margin_logs/step_0000009.npy new file mode 100644 index 0000000..4028c73 --- /dev/null +++ b/margin_logs/step_0000009.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e85dcf92f5788d50c9e1d2b2f00d661ca3a77bf52059ba32d08812b80dc1b2e +size 384 diff --git a/margin_logs/step_0000010.npy b/margin_logs/step_0000010.npy new file mode 100644 index 0000000..24e0160 --- /dev/null +++ b/margin_logs/step_0000010.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12112d90111690e720c5b65abe83c765aa9e3698a810ac02792036d5ff356311 +size 384 diff --git a/margin_logs/step_0000011.npy b/margin_logs/step_0000011.npy new file mode 100644 index 0000000..3482e37 --- /dev/null +++ b/margin_logs/step_0000011.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:234967732628be6e96333baf6fff6992afe714d302f572889e87fbb17588fbeb +size 384 diff --git a/margin_logs/step_0000012.npy b/margin_logs/step_0000012.npy new file mode 100644 index 0000000..41733dd --- /dev/null +++ b/margin_logs/step_0000012.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56af0a87575cd66624db66f9d3f4bffbf7d4a59f800e8e7b2297dc0ed3cc4b38 +size 384 diff --git a/margin_logs/step_0000013.npy b/margin_logs/step_0000013.npy new file mode 100644 index 0000000..ce60137 --- /dev/null +++ b/margin_logs/step_0000013.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0410fa673e0b2b8ecfb8b42820d007a76565926ab05231c34a1980ab4298893 +size 384 diff --git a/margin_logs/step_0000014.npy b/margin_logs/step_0000014.npy new file mode 100644 index 0000000..fd1f1e5 --- /dev/null +++ b/margin_logs/step_0000014.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef0715f7af94da7ed208d67f6bddad8eaaf35caaea6d78558764bf4f3988a654 +size 384 diff --git a/margin_logs/step_0000015.npy b/margin_logs/step_0000015.npy new file mode 100644 index 0000000..dad6f4e --- /dev/null +++ b/margin_logs/step_0000015.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ac78f47d306a2eda8b1762b6006e8e47c95b708fbf15ca67fb4a316b34ace1a +size 384 diff --git a/margin_logs/step_0000016.npy b/margin_logs/step_0000016.npy new file mode 100644 index 0000000..ca94572 --- /dev/null +++ b/margin_logs/step_0000016.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be99e3a25e655732f38fffa1225162673c78a30619a7bf10c9946d643f8b0fd2 +size 384 diff --git a/margin_logs/step_0000017.npy b/margin_logs/step_0000017.npy new file mode 100644 index 0000000..f9c8ae9 --- /dev/null +++ b/margin_logs/step_0000017.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d77369ec5f857905b0ee2a0cbfb4e4e21022ea8feb947c32fcc26a352318eedf +size 384 diff --git a/margin_logs/step_0000018.npy b/margin_logs/step_0000018.npy new file mode 100644 index 0000000..8ec3537 --- /dev/null +++ b/margin_logs/step_0000018.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54f65a4648e1340c1f65e8d7f73a76429e26bc825147d4a4ef2d21cbc0e93116 +size 384 diff --git a/margin_logs/step_0000019.npy b/margin_logs/step_0000019.npy new file mode 100644 index 0000000..d93a8ae --- /dev/null +++ b/margin_logs/step_0000019.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c019ff603d48b1921962529c5fff9c60f8575f99e049061ddae6ab136ec6ca0f +size 384 diff --git a/margin_logs/step_0000020.npy b/margin_logs/step_0000020.npy new file mode 100644 index 0000000..d7d8044 --- /dev/null +++ b/margin_logs/step_0000020.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24e7cdd7bfbd928273e96cf3063addb9d9bd6beb9c279db2998f1d640604da2f +size 384 diff --git a/margin_logs/step_0000021.npy b/margin_logs/step_0000021.npy new file mode 100644 index 0000000..5229f18 --- /dev/null +++ b/margin_logs/step_0000021.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4363232b8a91eec92277d0ff302d39911367788440626c9107d8d0407229a0ac +size 384 diff --git a/margin_logs/step_0000022.npy b/margin_logs/step_0000022.npy new file mode 100644 index 0000000..493425b --- /dev/null +++ b/margin_logs/step_0000022.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a241bc56c68f992f4f025a7c8aee92a828b7c16648ad9599241f3c7cc6bcfc6 +size 384 diff --git a/margin_logs/step_0000023.npy b/margin_logs/step_0000023.npy new file mode 100644 index 0000000..4b0d523 --- /dev/null +++ b/margin_logs/step_0000023.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2d10cc6768c765a217953fd318ded4edc1d0a441809e373b06245f4d46a5f40 +size 384 diff --git a/margin_logs/step_0000024.npy b/margin_logs/step_0000024.npy new file mode 100644 index 0000000..3bb33d5 --- /dev/null +++ b/margin_logs/step_0000024.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33904344af7282d4078a2f8633a7d08d356f1041af5753be2a8fc8e6afb934f2 +size 384 diff --git a/margin_logs/step_0000025.npy b/margin_logs/step_0000025.npy new file mode 100644 index 0000000..d5a57fa --- /dev/null +++ b/margin_logs/step_0000025.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f658858731e81f1a0c870663ea5e9b46470a021b73be0d32ba71ca2c567438b6 +size 384 diff --git a/margin_logs/step_0000026.npy b/margin_logs/step_0000026.npy new file mode 100644 index 0000000..ff27176 --- /dev/null +++ b/margin_logs/step_0000026.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f5cc0c6b5c97a9a7c1a73b9c451fc006f277ea2cbd52bcf4c4fd6de594122e4 +size 384 diff --git a/margin_logs/step_0000027.npy b/margin_logs/step_0000027.npy new file mode 100644 index 0000000..311f464 --- /dev/null +++ b/margin_logs/step_0000027.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7e2344a1b7ac9397b061a6c491730a64f4336424edf36874e9c915f344320af +size 384 diff --git a/margin_logs/step_0000028.npy b/margin_logs/step_0000028.npy new file mode 100644 index 0000000..c827dd0 --- /dev/null +++ b/margin_logs/step_0000028.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b35774e20970f90ce5c31ac651090e77224f54597e490ac8203e892e008caf6 +size 384 diff --git a/margin_logs/step_0000029.npy b/margin_logs/step_0000029.npy new file mode 100644 index 0000000..1feec2c --- /dev/null +++ b/margin_logs/step_0000029.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:567b7fe8803a873764b0b0d6d3ac058074162d322447e38a1cb5662e3e5049f0 +size 384 diff --git a/margin_logs/step_0000030.npy b/margin_logs/step_0000030.npy new file mode 100644 index 0000000..ddd4c84 --- /dev/null +++ b/margin_logs/step_0000030.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb17463fa68689215db35ebe210784a2dd5151f412225421e856b945ee90c798 +size 384 diff --git a/margin_logs/step_0000031.npy b/margin_logs/step_0000031.npy new file mode 100644 index 0000000..4e7d867 --- /dev/null +++ b/margin_logs/step_0000031.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2973258b5e705de21282c71c5a828d4730a2eba4cf991f573c6994e8dca5834 +size 384 diff --git a/margin_logs/step_0000032.npy b/margin_logs/step_0000032.npy new file mode 100644 index 0000000..f1b5bd9 --- /dev/null +++ b/margin_logs/step_0000032.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:663fcf93532d23dc130eea34058c479dd402a2f60352c604eb76821b3f536b40 +size 384 diff --git a/margin_logs/step_0000033.npy b/margin_logs/step_0000033.npy new file mode 100644 index 0000000..e394d48 --- /dev/null +++ b/margin_logs/step_0000033.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b648a02c6965138e641858f8df13909597d429e905c1fc58c2034bc87092e0e7 +size 384 diff --git a/margin_logs/step_0000034.npy b/margin_logs/step_0000034.npy new file mode 100644 index 0000000..9777650 --- /dev/null +++ b/margin_logs/step_0000034.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ab950a74a7553172f97e7116e79b338bea7b75dce061e16e11f813f68867a65 +size 384 diff --git a/margin_logs/step_0000035.npy b/margin_logs/step_0000035.npy new file mode 100644 index 0000000..e58ab47 --- /dev/null +++ b/margin_logs/step_0000035.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc44fa7fbea911f0f26e35bc96b7ac3f22cb7dd75e46b6a554adfb54b4543dff +size 384 diff --git a/margin_logs/step_0000036.npy b/margin_logs/step_0000036.npy new file mode 100644 index 0000000..70e9e45 --- /dev/null +++ b/margin_logs/step_0000036.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:584101d7dc84012b9efbeb976f45fa00d913f1bf6f4a635af18b9c18520a0564 +size 384 diff --git a/margin_logs/step_0000037.npy b/margin_logs/step_0000037.npy new file mode 100644 index 0000000..841c2aa --- /dev/null +++ b/margin_logs/step_0000037.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d278d94f720bd9df2c14c8fc037b075f3b46f18cb5881648a9e7e7a770554be +size 384 diff --git a/margin_logs/step_0000038.npy b/margin_logs/step_0000038.npy new file mode 100644 index 0000000..20be8ff --- /dev/null +++ b/margin_logs/step_0000038.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4885a629afb4cf830015a26c1a029f093c07e7ff3e20e91144cfe89d41e777ff +size 384 diff --git a/margin_logs/step_0000039.npy b/margin_logs/step_0000039.npy new file mode 100644 index 0000000..92aa79f --- /dev/null +++ b/margin_logs/step_0000039.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e306658264581b4c8f790464b52eff5e6e41e66a337c0cc771e4d14fb7522ca +size 384 diff --git a/margin_logs/step_0000040.npy b/margin_logs/step_0000040.npy new file mode 100644 index 0000000..52a0f4b --- /dev/null +++ b/margin_logs/step_0000040.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c65e3358b60b9d8394d1361f06c8df6eb3a0d8c66a3d6427c35eef18591b9226 +size 384 diff --git a/margin_logs/step_0000041.npy b/margin_logs/step_0000041.npy new file mode 100644 index 0000000..bedc176 --- /dev/null +++ b/margin_logs/step_0000041.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe100ef739135cce904d31d69f174573d769db1f9610b958240ea6b76c98ff1b +size 384 diff --git a/margin_logs/step_0000042.npy b/margin_logs/step_0000042.npy new file mode 100644 index 0000000..742fc72 --- /dev/null +++ b/margin_logs/step_0000042.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d11ad737facf04051206ceeef0c1151be42c45e2542aef925fc59ab996bdd289 +size 384 diff --git a/margin_logs/step_0000043.npy b/margin_logs/step_0000043.npy new file mode 100644 index 0000000..e77b4db --- /dev/null +++ b/margin_logs/step_0000043.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56d83f62ba076b8640cdd878871660dcee3bb96680df0a275d727d35dc058aea +size 384 diff --git a/margin_logs/step_0000044.npy b/margin_logs/step_0000044.npy new file mode 100644 index 0000000..4fd2a26 --- /dev/null +++ b/margin_logs/step_0000044.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ebf083aa129acda4116ed466cf04664cd616a02bb217cd8f796551debc828ee +size 384 diff --git a/margin_logs/step_0000045.npy b/margin_logs/step_0000045.npy new file mode 100644 index 0000000..87080d6 --- /dev/null +++ b/margin_logs/step_0000045.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dcc8d107667e5a5abf88663d48c5522a1cb10c3fd7f7e14a827f1a39dd04552 +size 384 diff --git a/margin_logs/step_0000046.npy b/margin_logs/step_0000046.npy new file mode 100644 index 0000000..f03e84f --- /dev/null +++ b/margin_logs/step_0000046.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92452ff836cefa592a5e20d4b319965592f1b14e4b9088866e2a7f608edc9b58 +size 384 diff --git a/margin_logs/step_0000047.npy b/margin_logs/step_0000047.npy new file mode 100644 index 0000000..2b57f7c --- /dev/null +++ b/margin_logs/step_0000047.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37a7b2c624bd1e49fa3abeb925a1c3a259ea6e5d64d8aa9c0a0e60d21c2ca407 +size 384 diff --git a/margin_logs/step_0000048.npy b/margin_logs/step_0000048.npy new file mode 100644 index 0000000..d7a02ed --- /dev/null +++ b/margin_logs/step_0000048.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cffa9dd2daaa87c5413666d6f79acd714a2e21e496d40ab0364388e5fa50da4 +size 384 diff --git a/margin_logs/step_0000049.npy b/margin_logs/step_0000049.npy new file mode 100644 index 0000000..c322d92 --- /dev/null +++ b/margin_logs/step_0000049.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc40ab49e3ccb3b5552e8b996a411b3e54e300cf8c111a5f8d9c21510043f655 +size 384 diff --git a/margin_logs/step_0000050.npy b/margin_logs/step_0000050.npy new file mode 100644 index 0000000..95dbb23 --- /dev/null +++ b/margin_logs/step_0000050.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b574a88e931a9a2a48ab636ec131bc193d795887687f12a92be61c65ef0fe191 +size 384 diff --git a/margin_logs/step_0000051.npy b/margin_logs/step_0000051.npy new file mode 100644 index 0000000..7618468 --- /dev/null +++ b/margin_logs/step_0000051.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eab39f7d55d0c4c26d65dc454d569c4e526c29a52e36c57825633c63371c4dd7 +size 384 diff --git a/margin_logs/step_0000052.npy b/margin_logs/step_0000052.npy new file mode 100644 index 0000000..3d64829 --- /dev/null +++ b/margin_logs/step_0000052.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ad26bc46e1528e9dfae287bc03f11511b850eb806c6ba317b1c5d18f399f905 +size 384 diff --git a/margin_logs/step_0000053.npy b/margin_logs/step_0000053.npy new file mode 100644 index 0000000..2ab5f2c --- /dev/null +++ b/margin_logs/step_0000053.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:992d9268a6d4d1df2b728425269d92034c01fcd3dab7089d1a8cd3d8899c23a4 +size 384 diff --git a/margin_logs/step_0000054.npy b/margin_logs/step_0000054.npy new file mode 100644 index 0000000..74f5644 --- /dev/null +++ b/margin_logs/step_0000054.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:965b01aed44519e9062fcbbffceb7cf38a077d3fbaeedd530ab93e29edcb6507 +size 384 diff --git a/margin_logs/step_0000055.npy b/margin_logs/step_0000055.npy new file mode 100644 index 0000000..4272f30 --- /dev/null +++ b/margin_logs/step_0000055.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7c79e92e65488c815c5a126d94f552c411514d9d8578c525c5801ad12375707 +size 384 diff --git a/margin_logs/step_0000056.npy b/margin_logs/step_0000056.npy new file mode 100644 index 0000000..6cb83a3 --- /dev/null +++ b/margin_logs/step_0000056.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecc0d1dc0d79a67ca0b48f7515b894018cf6bdbb965adbec2970bbffa9c00495 +size 384 diff --git a/margin_logs/step_0000057.npy b/margin_logs/step_0000057.npy new file mode 100644 index 0000000..dbdbe05 --- /dev/null +++ b/margin_logs/step_0000057.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8aaccfb8b983f6a2c49497c3c95fb3386ace74a398bd0b19ab7569d8c15efaa +size 384 diff --git a/margin_logs/step_0000058.npy b/margin_logs/step_0000058.npy new file mode 100644 index 0000000..bf651be --- /dev/null +++ b/margin_logs/step_0000058.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9f3387219fd581f1aa3b1da99dfa0a625107075f578f2594308f83449a6b279 +size 384 diff --git a/margin_logs/step_0000059.npy b/margin_logs/step_0000059.npy new file mode 100644 index 0000000..cde23b1 --- /dev/null +++ b/margin_logs/step_0000059.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fffe73ca586de6cb15a3f40321c7295113bf66a1b2630d4447c1bed5b5ea29c +size 384 diff --git a/margin_logs/step_0000060.npy b/margin_logs/step_0000060.npy new file mode 100644 index 0000000..6486a87 --- /dev/null +++ b/margin_logs/step_0000060.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aacfdcfb8a3d3516187a948bdc7a87d8869a0327b9427f229339c397080ec33d +size 384 diff --git a/margin_logs/step_0000061.npy b/margin_logs/step_0000061.npy new file mode 100644 index 0000000..b56d69b --- /dev/null +++ b/margin_logs/step_0000061.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c72a4e8b88178c5034bc35459738d9a358fd127218353c32e81d8fede7a90bb7 +size 384 diff --git a/margin_logs/step_0000062.npy b/margin_logs/step_0000062.npy new file mode 100644 index 0000000..844daf8 --- /dev/null +++ b/margin_logs/step_0000062.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15727929dfe0facf83140a8abffd22cfa86cdb1d1463828d757d8e8568fb74b8 +size 384 diff --git a/margin_logs/step_0000063.npy b/margin_logs/step_0000063.npy new file mode 100644 index 0000000..f61e8c4 --- /dev/null +++ b/margin_logs/step_0000063.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28d6f0d89572f8c57a75e08f9e3234e465494877dffb225b22c0f4b6f1684a62 +size 384 diff --git a/margin_logs/step_0000064.npy b/margin_logs/step_0000064.npy new file mode 100644 index 0000000..a794ec0 --- /dev/null +++ b/margin_logs/step_0000064.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b15da566e52f57e340e451f5dc429999b44b4cad92f92b72bb95dd24981ff0f +size 384 diff --git a/margin_logs/step_0000065.npy b/margin_logs/step_0000065.npy new file mode 100644 index 0000000..8c1696d --- /dev/null +++ b/margin_logs/step_0000065.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ad54ea6358af158e4c7232d9cc5ee5f140c0579a27db084c40d22a98ff032cb +size 384 diff --git a/margin_logs/step_0000066.npy b/margin_logs/step_0000066.npy new file mode 100644 index 0000000..20d60dc --- /dev/null +++ b/margin_logs/step_0000066.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1b948cb413609e7e64834ea43fd6f40d0edb3cf2d772dd6f15fc68661b7542f +size 384 diff --git a/margin_logs/step_0000067.npy b/margin_logs/step_0000067.npy new file mode 100644 index 0000000..6512045 --- /dev/null +++ b/margin_logs/step_0000067.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a6151ffbe152505611a39df21ce9a8c12944a9178fed31d10aad6b311b59680 +size 384 diff --git a/margin_logs/step_0000068.npy b/margin_logs/step_0000068.npy new file mode 100644 index 0000000..b4c1503 --- /dev/null +++ b/margin_logs/step_0000068.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f52e3c472024fc11e6d8c1398fa0dadb2dda009fe95d7099d1c5210df7c95c97 +size 384 diff --git a/margin_logs/step_0000069.npy b/margin_logs/step_0000069.npy new file mode 100644 index 0000000..85d021d --- /dev/null +++ b/margin_logs/step_0000069.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aae92d94c3a2618e60767a0309dd867936ee65bf94766817f5b1650ac8ff6a8d +size 384 diff --git a/margin_logs/step_0000070.npy b/margin_logs/step_0000070.npy new file mode 100644 index 0000000..5676104 --- /dev/null +++ b/margin_logs/step_0000070.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc63cf6ee66147c58d6b2a762c6b13f5171c3802dca70a2dcb1f9d12cd37d5eb +size 384 diff --git a/margin_logs/step_0000071.npy b/margin_logs/step_0000071.npy new file mode 100644 index 0000000..44b3438 --- /dev/null +++ b/margin_logs/step_0000071.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05939c086ae1bf5d926bb260fe160a25e19801ec6adb2017ba840e078855e567 +size 384 diff --git a/margin_logs/step_0000072.npy b/margin_logs/step_0000072.npy new file mode 100644 index 0000000..922a8a0 --- /dev/null +++ b/margin_logs/step_0000072.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79650f4c1d36a419ee479f86ecb070dec46764e45d4fd13ae93f8b7d79f09246 +size 384 diff --git a/margin_logs/step_0000073.npy b/margin_logs/step_0000073.npy new file mode 100644 index 0000000..3d2037a --- /dev/null +++ b/margin_logs/step_0000073.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:895ef4065c704f0845da46dc07e215f2bc1033692a9023b24d894ce622e24cb3 +size 384 diff --git a/margin_logs/step_0000074.npy b/margin_logs/step_0000074.npy new file mode 100644 index 0000000..ca5d9a5 --- /dev/null +++ b/margin_logs/step_0000074.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c68b94e01f1e0303a0de23d4627f7dfbe3c26f7cd013fca27ab9db80ac5c7a5 +size 384 diff --git a/margin_logs/step_0000075.npy b/margin_logs/step_0000075.npy new file mode 100644 index 0000000..ad0377e --- /dev/null +++ b/margin_logs/step_0000075.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d73aa472b197030595ac731f41c129293eaa5c09b00569781615a01545fcec0 +size 384 diff --git a/margin_logs/step_0000076.npy b/margin_logs/step_0000076.npy new file mode 100644 index 0000000..da76b07 --- /dev/null +++ b/margin_logs/step_0000076.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4c55117e455d4e375e2a8dfa0ac7f6a5e4dd04cfc2c3e02004fe2530d0bdf21 +size 384 diff --git a/margin_logs/step_0000077.npy b/margin_logs/step_0000077.npy new file mode 100644 index 0000000..b6026a0 --- /dev/null +++ b/margin_logs/step_0000077.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28d8284b02942e844d064ee91c3c08be2c764203d44c9cdfa469daa62c18140e +size 384 diff --git a/margin_logs/step_0000078.npy b/margin_logs/step_0000078.npy new file mode 100644 index 0000000..86fec46 --- /dev/null +++ b/margin_logs/step_0000078.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:401bb8c8fe46b322aa6bc5bf07c6b47e96c87849974e60ef4a5e304c2ad51958 +size 384 diff --git a/margin_logs/step_0000079.npy b/margin_logs/step_0000079.npy new file mode 100644 index 0000000..95bcfe3 --- /dev/null +++ b/margin_logs/step_0000079.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34430ab4eaec32822c38a3041e9a8b6936f26cdd20b414ef4657157327d462b1 +size 384 diff --git a/margin_logs/step_0000080.npy b/margin_logs/step_0000080.npy new file mode 100644 index 0000000..35a0ae4 --- /dev/null +++ b/margin_logs/step_0000080.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eb2c4c60bc8b2f03c7f3320513fc7a5214fd060887ace5d96efab36bd524d0f +size 384 diff --git a/margin_logs/step_0000081.npy b/margin_logs/step_0000081.npy new file mode 100644 index 0000000..3fb0992 --- /dev/null +++ b/margin_logs/step_0000081.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e365ef4729a705f7c4acc419a5372b4dd9aa45d459be0acd308a8af47c184404 +size 384 diff --git a/margin_logs/step_0000082.npy b/margin_logs/step_0000082.npy new file mode 100644 index 0000000..1580ff9 --- /dev/null +++ b/margin_logs/step_0000082.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86960fca5f7c0dfdf430045843082135abb94c025117c798929760217ad513c7 +size 384 diff --git a/margin_logs/step_0000083.npy b/margin_logs/step_0000083.npy new file mode 100644 index 0000000..84b7a0c --- /dev/null +++ b/margin_logs/step_0000083.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88cc68110b33af480c9b8145d08aaa8ea090ba077ed296e17eed477129e48906 +size 384 diff --git a/margin_logs/step_0000084.npy b/margin_logs/step_0000084.npy new file mode 100644 index 0000000..64c21e6 --- /dev/null +++ b/margin_logs/step_0000084.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07a1efdd52d8e14fa08f74a8fbe17dfcfee44586894d751ce951db5b6979c5f0 +size 384 diff --git a/margin_logs/step_0000085.npy b/margin_logs/step_0000085.npy new file mode 100644 index 0000000..0fcc7fc --- /dev/null +++ b/margin_logs/step_0000085.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c776a2f574fcb572a2f7ad509a94a271b926f6adc7c4ff07d75e01ba631f6842 +size 384 diff --git a/margin_logs/step_0000086.npy b/margin_logs/step_0000086.npy new file mode 100644 index 0000000..95c75af --- /dev/null +++ b/margin_logs/step_0000086.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5865e4e2b968c36c755d6dfd4028cf574403a2e30603a31c301706709189c6a8 +size 384 diff --git a/margin_logs/step_0000087.npy b/margin_logs/step_0000087.npy new file mode 100644 index 0000000..339f105 --- /dev/null +++ b/margin_logs/step_0000087.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1c0812b86e8ea58f19391caca7b1857ec5431bb6a361bd6c2e70de013f9624d +size 384 diff --git a/margin_logs/step_0000088.npy b/margin_logs/step_0000088.npy new file mode 100644 index 0000000..b17bba4 --- /dev/null +++ b/margin_logs/step_0000088.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67589d3875c2671745efffb32b0345023cd224716c0caf22ab9ab95c8c9dbc43 +size 384 diff --git a/margin_logs/step_0000089.npy b/margin_logs/step_0000089.npy new file mode 100644 index 0000000..9cad4d7 --- /dev/null +++ b/margin_logs/step_0000089.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d90a943ab713108f64f8a28bae4af0aaab98e6e1bc4a6cc248d7b5a932c4ad22 +size 384 diff --git a/margin_logs/step_0000090.npy b/margin_logs/step_0000090.npy new file mode 100644 index 0000000..186894f --- /dev/null +++ b/margin_logs/step_0000090.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21c6467115e5ce6afa987a7560fdc660258172595aa8f3c9f38c3554a1ff03a8 +size 384 diff --git a/margin_logs/step_0000091.npy b/margin_logs/step_0000091.npy new file mode 100644 index 0000000..42e8686 --- /dev/null +++ b/margin_logs/step_0000091.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0eaa9749dd9d5cfb98115d16bd45805f1fc58deff62f32c8a709e82c8c20e226 +size 384 diff --git a/margin_logs/step_0000092.npy b/margin_logs/step_0000092.npy new file mode 100644 index 0000000..4145d5a --- /dev/null +++ b/margin_logs/step_0000092.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d488a091f43e7ce46941f52732564bcef6901003f9e9f0f9ab5cb50370c6edd +size 384 diff --git a/margin_logs/step_0000093.npy b/margin_logs/step_0000093.npy new file mode 100644 index 0000000..abad147 --- /dev/null +++ b/margin_logs/step_0000093.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44b51f9990f79519981c0a5ab220b67315b78d6ff3ac490b5531b112b124120e +size 384 diff --git a/margin_logs/step_0000094.npy b/margin_logs/step_0000094.npy new file mode 100644 index 0000000..64d1582 --- /dev/null +++ b/margin_logs/step_0000094.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58259cb4b338f39da68d8f8620f1b0c95043017c3aa0be32a91f64bab0c06928 +size 384 diff --git a/margin_logs/step_0000095.npy b/margin_logs/step_0000095.npy new file mode 100644 index 0000000..0b0c8e4 --- /dev/null +++ b/margin_logs/step_0000095.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62bce5b82b38f336fc62b068408ba98fd996cda5341967cdd7250de06d933cd2 +size 384 diff --git a/margin_logs/step_0000096.npy b/margin_logs/step_0000096.npy new file mode 100644 index 0000000..2da0254 --- /dev/null +++ b/margin_logs/step_0000096.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f934a0533c8dcfa24b957bb268887173775470a740fe99e55f165934ff441009 +size 384 diff --git a/margin_logs/step_0000097.npy b/margin_logs/step_0000097.npy new file mode 100644 index 0000000..8f8251f --- /dev/null +++ b/margin_logs/step_0000097.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a479588893e48bf4cda96cc982829ece4932a0c83bf1ec8a35ee44167685168 +size 384 diff --git a/margin_logs/step_0000098.npy b/margin_logs/step_0000098.npy new file mode 100644 index 0000000..6d13a51 --- /dev/null +++ b/margin_logs/step_0000098.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1baa3d4f08d59ef2b75fb9dd2d19a141d5763f474f1f01d38176c348cb496dd +size 384 diff --git a/margin_logs/step_0000099.npy b/margin_logs/step_0000099.npy new file mode 100644 index 0000000..29202d9 --- /dev/null +++ b/margin_logs/step_0000099.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d29f421f6dcebc2d23f365747cab5e3a06c4e565080dcd30dc8ccfb884755a98 +size 384 diff --git a/margin_logs/step_0000100.npy b/margin_logs/step_0000100.npy new file mode 100644 index 0000000..bbac88a --- /dev/null +++ b/margin_logs/step_0000100.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13561cfb66f02352f37201560a6edd53d40f227ebf592edb57a353d798a4032e +size 384 diff --git a/margin_logs/step_0000101.npy b/margin_logs/step_0000101.npy new file mode 100644 index 0000000..4486cab --- /dev/null +++ b/margin_logs/step_0000101.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64b12c727208c9ba597e3fd42c0ebaa535d41d2bfcc073e036b1655702e3d576 +size 384 diff --git a/margin_logs/step_0000102.npy b/margin_logs/step_0000102.npy new file mode 100644 index 0000000..601c45e --- /dev/null +++ b/margin_logs/step_0000102.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16003a200c1649b9b6e917b508ba9e83dad7eecf69183931844b0e36513238f1 +size 384 diff --git a/margin_logs/step_0000103.npy b/margin_logs/step_0000103.npy new file mode 100644 index 0000000..70c1559 --- /dev/null +++ b/margin_logs/step_0000103.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94bb654fe0ca23bd6e9ff7858ba2b8a09ed582b4c4f6296165fa695413ee2b35 +size 384 diff --git a/margin_logs/step_0000104.npy b/margin_logs/step_0000104.npy new file mode 100644 index 0000000..fb7f0b2 --- /dev/null +++ b/margin_logs/step_0000104.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b8b887f8636b118818bb45fb4d2787a160322abf4ec064efdf29fe9c37da277 +size 384 diff --git a/margin_logs/step_0000105.npy b/margin_logs/step_0000105.npy new file mode 100644 index 0000000..5ea2db0 --- /dev/null +++ b/margin_logs/step_0000105.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e53c34867bef4f81d26c26eba642b1a88c18d00dabd2751dbd603f572dea6dd +size 384 diff --git a/margin_logs/step_0000106.npy b/margin_logs/step_0000106.npy new file mode 100644 index 0000000..8fca334 --- /dev/null +++ b/margin_logs/step_0000106.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b7479a56c2ad6ce1d219c7e0aca76abe1dde0f3b366730b2affcccff7412743 +size 384 diff --git a/margin_logs/step_0000107.npy b/margin_logs/step_0000107.npy new file mode 100644 index 0000000..c02d5be --- /dev/null +++ b/margin_logs/step_0000107.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64bb6f00c3077baf90dcd532b42accdfd2756e111bafc9ffed94d822e76bad81 +size 384 diff --git a/margin_logs/step_0000108.npy b/margin_logs/step_0000108.npy new file mode 100644 index 0000000..ba86429 --- /dev/null +++ b/margin_logs/step_0000108.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9655eea75d7d22d50913c2f42f0c838d6af3b18bc4f56279bf45d6a19dd8b6e +size 384 diff --git a/margin_logs/step_0000109.npy b/margin_logs/step_0000109.npy new file mode 100644 index 0000000..a3d31ac --- /dev/null +++ b/margin_logs/step_0000109.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1badfd2e2e74961aa8b7105077644b120d94affbb82ab798851b1c0a70bbfc8e +size 384 diff --git a/margin_logs/step_0000110.npy b/margin_logs/step_0000110.npy new file mode 100644 index 0000000..147dd94 --- /dev/null +++ b/margin_logs/step_0000110.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e1662fb11cadb589c8939f334de572fc4cd0b3a379e40d853f007c114216248 +size 384 diff --git a/margin_logs/step_0000111.npy b/margin_logs/step_0000111.npy new file mode 100644 index 0000000..ce30a19 --- /dev/null +++ b/margin_logs/step_0000111.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e34e405a45d249abaea7289a880425e645f5654ea8f4f21d97fdd7f19bf1df3 +size 384 diff --git a/margin_logs/step_0000112.npy b/margin_logs/step_0000112.npy new file mode 100644 index 0000000..7a90f17 --- /dev/null +++ b/margin_logs/step_0000112.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26e947f388f6efbb957f9408b6ceb0a72ef2b8d7484159be79130205930ab074 +size 384 diff --git a/margin_logs/step_0000113.npy b/margin_logs/step_0000113.npy new file mode 100644 index 0000000..377a469 --- /dev/null +++ b/margin_logs/step_0000113.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38cec6c689260a06e1856cc15820bfba288dc1d55867a49df4dbe8c918659556 +size 384 diff --git a/margin_logs/step_0000114.npy b/margin_logs/step_0000114.npy new file mode 100644 index 0000000..2555fa0 --- /dev/null +++ b/margin_logs/step_0000114.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c61f8ad70c3d8c93120fc3ab2b950ab213bf66441c5871757dd9d311dc3a284 +size 384 diff --git a/margin_logs/step_0000115.npy b/margin_logs/step_0000115.npy new file mode 100644 index 0000000..866f652 --- /dev/null +++ b/margin_logs/step_0000115.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e54e1bd3b94ed637387e29f0ee891fbf4a98f2642776c37c20380c63989b0c2 +size 384 diff --git a/margin_logs/step_0000116.npy b/margin_logs/step_0000116.npy new file mode 100644 index 0000000..260c250 --- /dev/null +++ b/margin_logs/step_0000116.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bf0dc6fa6a27c0e95178da2dc38d9823a68cdabedc51597dcbd7da6c0e2ffb0 +size 384 diff --git a/margin_logs/step_0000117.npy b/margin_logs/step_0000117.npy new file mode 100644 index 0000000..448ace9 --- /dev/null +++ b/margin_logs/step_0000117.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:500f1a8afcd9ba6ed4405344006a203cf709747d39b1e85d45781f60613cc66a +size 384 diff --git a/margin_logs/step_0000118.npy b/margin_logs/step_0000118.npy new file mode 100644 index 0000000..935b3ed --- /dev/null +++ b/margin_logs/step_0000118.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:405905a2f9b46b350aee1c849e4d70c6e823e9567f6727e8ce072bfb8383e57b +size 384 diff --git a/margin_logs/step_0000119.npy b/margin_logs/step_0000119.npy new file mode 100644 index 0000000..3e04773 --- /dev/null +++ b/margin_logs/step_0000119.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fc12ff425d3c15f61f3108c33f7fda98dc845f484950502eef41f7f1a150522 +size 384 diff --git a/margin_logs/step_0000120.npy b/margin_logs/step_0000120.npy new file mode 100644 index 0000000..e93258b --- /dev/null +++ b/margin_logs/step_0000120.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29f2011cc5c80e5608845be17d30cc3ebd71258e9abc41027f4c84af5b2ec761 +size 384 diff --git a/margin_logs/step_0000121.npy b/margin_logs/step_0000121.npy new file mode 100644 index 0000000..c86ca43 --- /dev/null +++ b/margin_logs/step_0000121.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:172dd35ad29c2380b39e01e09762ec95fc92534756ce45169b89919080416a28 +size 384 diff --git a/margin_logs/step_0000122.npy b/margin_logs/step_0000122.npy new file mode 100644 index 0000000..476a3e9 --- /dev/null +++ b/margin_logs/step_0000122.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:943893f294a75c71fb144ce65140f08acc0bf0f504ce44e230ca1a360371794d +size 384 diff --git a/margin_logs/step_0000123.npy b/margin_logs/step_0000123.npy new file mode 100644 index 0000000..843d43c --- /dev/null +++ b/margin_logs/step_0000123.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:136dd6a4cc8a9e6c8833bea202667ac086273bbf4fcbc245655e7cfb31c77c1b +size 384 diff --git a/margin_logs/step_0000124.npy b/margin_logs/step_0000124.npy new file mode 100644 index 0000000..c59ea7e --- /dev/null +++ b/margin_logs/step_0000124.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0def34394806d99b099c343cc83b3fa3abb181a51e20a7a09baa442ae5f50ec +size 384 diff --git a/margin_logs/step_0000125.npy b/margin_logs/step_0000125.npy new file mode 100644 index 0000000..c087013 --- /dev/null +++ b/margin_logs/step_0000125.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f24a38dd24b19a745dc39a8ff01c47bcb40490e79c448f37b6a7111b7fb34367 +size 384 diff --git a/margin_logs/step_0000126.npy b/margin_logs/step_0000126.npy new file mode 100644 index 0000000..96e7726 --- /dev/null +++ b/margin_logs/step_0000126.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7c5f1bf96ed7ea1518b7925447c60047f66833b2d5dd227b5096fbc6400b0dd +size 384 diff --git a/margin_logs/step_0000127.npy b/margin_logs/step_0000127.npy new file mode 100644 index 0000000..43df6bb --- /dev/null +++ b/margin_logs/step_0000127.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8824b6293aa13d831b5eb662d243e1326f1e3f2dabf0f53caa85ee4a3bd7fd3 +size 384 diff --git a/margin_logs/step_0000128.npy b/margin_logs/step_0000128.npy new file mode 100644 index 0000000..295a2e4 --- /dev/null +++ b/margin_logs/step_0000128.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd558ef48449f532e665cc3e578895cb17c93d03cdfd6641edb54cb3d6d5021a +size 384 diff --git a/margin_logs/step_0000129.npy b/margin_logs/step_0000129.npy new file mode 100644 index 0000000..77cc8bf --- /dev/null +++ b/margin_logs/step_0000129.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47e679eeafa7d6a48251ec5fd20803abd1274fb18740a61f5f1005569c86efcd +size 384 diff --git a/margin_logs/step_0000130.npy b/margin_logs/step_0000130.npy new file mode 100644 index 0000000..c11baa4 --- /dev/null +++ b/margin_logs/step_0000130.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c2d3852774367ec60ae6b671ca62d05edc24ae195418ed117d20a509ef65fba +size 384 diff --git a/margin_logs/step_0000131.npy b/margin_logs/step_0000131.npy new file mode 100644 index 0000000..8143e32 --- /dev/null +++ b/margin_logs/step_0000131.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14bb7a368342825b06e055133439afd34deca9233c5fb0485d9c3ea24a7cfcd1 +size 384 diff --git a/margin_logs/step_0000132.npy b/margin_logs/step_0000132.npy new file mode 100644 index 0000000..1f93d1f --- /dev/null +++ b/margin_logs/step_0000132.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0c465fd1a5f425b8c58c9d2817fc51a49845c42d1f7796e55d153c13e60388e +size 384 diff --git a/margin_logs/step_0000133.npy b/margin_logs/step_0000133.npy new file mode 100644 index 0000000..7c24539 --- /dev/null +++ b/margin_logs/step_0000133.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fedf2bc062be7009f11ee381614b48b0bcf9438034c2ade577b2fb0762f9dcc4 +size 384 diff --git a/margin_logs/step_0000134.npy b/margin_logs/step_0000134.npy new file mode 100644 index 0000000..f3bfc49 --- /dev/null +++ b/margin_logs/step_0000134.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7e3fedc65fdf1a97beb37791c3f5f2881dcdc312725fc71a87f545354fa711f +size 384 diff --git a/margin_logs/step_0000135.npy b/margin_logs/step_0000135.npy new file mode 100644 index 0000000..2b268c8 --- /dev/null +++ b/margin_logs/step_0000135.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef14cd2a448a3915471d80dade8ca975e9d23a4030f9c825439e8f2a0cab1e7c +size 384 diff --git a/margin_logs/step_0000136.npy b/margin_logs/step_0000136.npy new file mode 100644 index 0000000..01fa19e --- /dev/null +++ b/margin_logs/step_0000136.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49635d27c99cf5edd045882309c2c30ea2aa65a63e582cd67081ab029cf99055 +size 384 diff --git a/margin_logs/step_0000137.npy b/margin_logs/step_0000137.npy new file mode 100644 index 0000000..ba43aa0 --- /dev/null +++ b/margin_logs/step_0000137.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5605b9fb2b0f0ace767c3693862b0d78882393fbca6ab16cb6aef3b38a953c3c +size 384 diff --git a/margin_logs/step_0000138.npy b/margin_logs/step_0000138.npy new file mode 100644 index 0000000..ec49e9e --- /dev/null +++ b/margin_logs/step_0000138.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd9f0aac2abbc107730c1197490e5e0e599a81820a90de02e678f1d64eecf1b9 +size 384 diff --git a/margin_logs/step_0000139.npy b/margin_logs/step_0000139.npy new file mode 100644 index 0000000..191f435 --- /dev/null +++ b/margin_logs/step_0000139.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08f8ee6ec5463374c85436885d177a701223e5b51345ffe568b348c50dff4f25 +size 384 diff --git a/margin_logs/step_0000140.npy b/margin_logs/step_0000140.npy new file mode 100644 index 0000000..7ed65b9 --- /dev/null +++ b/margin_logs/step_0000140.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0fc4458f26f246f4bd76a3709bb5a7079b97ef198e4d21194da5ca9a589eadf +size 384 diff --git a/margin_logs/step_0000141.npy b/margin_logs/step_0000141.npy new file mode 100644 index 0000000..6e89e71 --- /dev/null +++ b/margin_logs/step_0000141.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:614880ae40768d156c42c509c4b85977a8f2ff621f0ba9aa3d95d825a4283f31 +size 384 diff --git a/margin_logs/step_0000142.npy b/margin_logs/step_0000142.npy new file mode 100644 index 0000000..508531d --- /dev/null +++ b/margin_logs/step_0000142.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1612d644410ab1061c8edbb99550ee483cfe571e91b79ff02e9d81f65f3c3c0a +size 384 diff --git a/margin_logs/step_0000143.npy b/margin_logs/step_0000143.npy new file mode 100644 index 0000000..32e119c --- /dev/null +++ b/margin_logs/step_0000143.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca2ed454cc60402b3ffd6784dc41ec9c26d8e0c4d42782996071316a23416f7c +size 384 diff --git a/margin_logs/step_0000144.npy b/margin_logs/step_0000144.npy new file mode 100644 index 0000000..d2e5c99 --- /dev/null +++ b/margin_logs/step_0000144.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:264ff704665899499cc6c78dfc07cf249201a127cf40e2c092b7bf64cf16c5e0 +size 384 diff --git a/margin_logs/step_0000145.npy b/margin_logs/step_0000145.npy new file mode 100644 index 0000000..1f8701f --- /dev/null +++ b/margin_logs/step_0000145.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f51573f9c438b5a3b65b66cafa6205059dbbb69339a75d5da7c865afbea2c5bc +size 384 diff --git a/margin_logs/step_0000146.npy b/margin_logs/step_0000146.npy new file mode 100644 index 0000000..fae2a0e --- /dev/null +++ b/margin_logs/step_0000146.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec189783cd1054dc7c2f1bcd1190d00feb9465f6fe56911c567dac8d01b137d0 +size 384 diff --git a/margin_logs/step_0000147.npy b/margin_logs/step_0000147.npy new file mode 100644 index 0000000..b8a9534 --- /dev/null +++ b/margin_logs/step_0000147.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d74004a7d838504fa3e4bd8b592b1d6dbe48d38ffb9bc7f986cd33a247da21b +size 384 diff --git a/margin_logs/step_0000148.npy b/margin_logs/step_0000148.npy new file mode 100644 index 0000000..26df7cf --- /dev/null +++ b/margin_logs/step_0000148.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:191ebab0a722e0efbf823255493d0d6d28284f5a965223ba7bb486381cfbad8e +size 384 diff --git a/margin_logs/step_0000149.npy b/margin_logs/step_0000149.npy new file mode 100644 index 0000000..4109092 --- /dev/null +++ b/margin_logs/step_0000149.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21ac938ba9a7f9b4151cdb3d533b0f5524a5a754e4173b3c8f2080981c8f6061 +size 384 diff --git a/margin_logs/step_0000150.npy b/margin_logs/step_0000150.npy new file mode 100644 index 0000000..55b2b71 --- /dev/null +++ b/margin_logs/step_0000150.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64c985c772153cf10bee9a30af817fdd4f7cc7dbe00fc0a6ebebc7616787a3a5 +size 384 diff --git a/margin_logs/step_0000151.npy b/margin_logs/step_0000151.npy new file mode 100644 index 0000000..aeb7c2a --- /dev/null +++ b/margin_logs/step_0000151.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:445d432a7ce537f4860ddb9c8c6ffb4fc3ae0718075fa94542ccf5c27855d03b +size 384 diff --git a/margin_logs/step_0000152.npy b/margin_logs/step_0000152.npy new file mode 100644 index 0000000..85575f4 --- /dev/null +++ b/margin_logs/step_0000152.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f172c54481329a0991b0c6a20c53320f7879b194ae12ccb56beaba071fe9ce17 +size 384 diff --git a/margin_logs/step_0000153.npy b/margin_logs/step_0000153.npy new file mode 100644 index 0000000..b952059 --- /dev/null +++ b/margin_logs/step_0000153.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3101fc352b13f3c8e9e930932334444310b1388230ad8dd85122a8a8f92ffd52 +size 384 diff --git a/margin_logs/step_0000154.npy b/margin_logs/step_0000154.npy new file mode 100644 index 0000000..38bdc4f --- /dev/null +++ b/margin_logs/step_0000154.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cbaceff5b3d067079fd054c89c06f8e289942d774902a7696ea990acd9ddca8 +size 384 diff --git a/margin_logs/step_0000155.npy b/margin_logs/step_0000155.npy new file mode 100644 index 0000000..bb14389 --- /dev/null +++ b/margin_logs/step_0000155.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed7c71378fa2bf458504f0c1f555e7599ed4c8052633f68ce0278b72949bc40d +size 384 diff --git a/margin_logs/step_0000156.npy b/margin_logs/step_0000156.npy new file mode 100644 index 0000000..757338b --- /dev/null +++ b/margin_logs/step_0000156.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98ef8bebce49a695098e40779a30db91a90da7ddbc03705ca79fe849f060d443 +size 384 diff --git a/margin_logs/step_0000157.npy b/margin_logs/step_0000157.npy new file mode 100644 index 0000000..3dbcb19 --- /dev/null +++ b/margin_logs/step_0000157.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf16e4a253431eb00592bbec1ca020a7ce85769360bb07f2bea1e818b58d5602 +size 384 diff --git a/margin_logs/step_0000158.npy b/margin_logs/step_0000158.npy new file mode 100644 index 0000000..490bcb4 --- /dev/null +++ b/margin_logs/step_0000158.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55219ca7cb8a54e60c95e0aaf79596334f967d8f96ec1d5bff0eee5c16817349 +size 384 diff --git a/margin_logs/step_0000159.npy b/margin_logs/step_0000159.npy new file mode 100644 index 0000000..7c77d90 --- /dev/null +++ b/margin_logs/step_0000159.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed0ac0ea48c36e509b7f28a16175d004a4839769aa3413b44c2b2480198e28e0 +size 384 diff --git a/margin_logs/step_0000160.npy b/margin_logs/step_0000160.npy new file mode 100644 index 0000000..ff81498 --- /dev/null +++ b/margin_logs/step_0000160.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a942b15327f3cc1c804845b96fff1c96a9c4dd1ef99c3440fc4e0253b6c440c0 +size 384 diff --git a/margin_logs/step_0000161.npy b/margin_logs/step_0000161.npy new file mode 100644 index 0000000..5215189 --- /dev/null +++ b/margin_logs/step_0000161.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fee979d2379bd4a293cd4ad76d71a174d24008585c819d8d4e5f502f619dc766 +size 384 diff --git a/margin_logs/step_0000162.npy b/margin_logs/step_0000162.npy new file mode 100644 index 0000000..f166586 --- /dev/null +++ b/margin_logs/step_0000162.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b86b3158770984bb40ffb926b1176b35756274a54ec087d08c37991f95fd067b +size 384 diff --git a/margin_logs/step_0000163.npy b/margin_logs/step_0000163.npy new file mode 100644 index 0000000..26e734a --- /dev/null +++ b/margin_logs/step_0000163.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:109a5e060d72d67d6d1fba7c2334b433a54147665d695d8022571f38ae497ea6 +size 384 diff --git a/margin_logs/step_0000164.npy b/margin_logs/step_0000164.npy new file mode 100644 index 0000000..3faff94 --- /dev/null +++ b/margin_logs/step_0000164.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41204e09682557e9673952011a9ba822ea32446913dc365caf81a676d9996795 +size 384 diff --git a/margin_logs/step_0000165.npy b/margin_logs/step_0000165.npy new file mode 100644 index 0000000..6f5219a --- /dev/null +++ b/margin_logs/step_0000165.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c69b72f30a1b6706bc89f248673e5b3cbffcfbafb679aceecfd7d2e949ecbaf +size 384 diff --git a/margin_logs/step_0000166.npy b/margin_logs/step_0000166.npy new file mode 100644 index 0000000..5e93dd4 --- /dev/null +++ b/margin_logs/step_0000166.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a95b3425abd15d29f00c669eb34c98b04c9df6feac49cd02b03d61e80e0a7a5 +size 384 diff --git a/margin_logs/step_0000167.npy b/margin_logs/step_0000167.npy new file mode 100644 index 0000000..b5b5745 --- /dev/null +++ b/margin_logs/step_0000167.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d6a5a275760065f95e479f2256ce555eb06a4a294653ed7ebe48b50affc5b51 +size 384 diff --git a/margin_logs/step_0000168.npy b/margin_logs/step_0000168.npy new file mode 100644 index 0000000..2ee3f42 --- /dev/null +++ b/margin_logs/step_0000168.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f8428d31c6061c0d91f41ec99f162937bb9bba80002be0d6b02e0055beb9b02 +size 384 diff --git a/margin_logs/step_0000169.npy b/margin_logs/step_0000169.npy new file mode 100644 index 0000000..2cf131b --- /dev/null +++ b/margin_logs/step_0000169.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:008066b3a6930a091d5334b53d442139b5f5316a3a4d1f3e40b779dd6266d227 +size 384 diff --git a/margin_logs/step_0000170.npy b/margin_logs/step_0000170.npy new file mode 100644 index 0000000..2cc1409 --- /dev/null +++ b/margin_logs/step_0000170.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e8f18f390591112997315a611bceb033deafc80074e43438fa2c64a6fc1f5da +size 384 diff --git a/margin_logs/step_0000171.npy b/margin_logs/step_0000171.npy new file mode 100644 index 0000000..fe6cf10 --- /dev/null +++ b/margin_logs/step_0000171.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b4f350a69a115ae1eafe52d5c007524dcf0e6ef8808f00c1d47749d7d619386 +size 384 diff --git a/margin_logs/step_0000172.npy b/margin_logs/step_0000172.npy new file mode 100644 index 0000000..2290206 --- /dev/null +++ b/margin_logs/step_0000172.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05f381150f04141f8d90a38e5a39ddf5c305e4d34e9717744a8fced755bcdd86 +size 384 diff --git a/margin_logs/step_0000173.npy b/margin_logs/step_0000173.npy new file mode 100644 index 0000000..4f38a80 --- /dev/null +++ b/margin_logs/step_0000173.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4230dd02a2f9c3e91a8ce0b99257c9704a8e017dfcfc78438a869178a8b30d8a +size 384 diff --git a/margin_logs/step_0000174.npy b/margin_logs/step_0000174.npy new file mode 100644 index 0000000..4e28fea --- /dev/null +++ b/margin_logs/step_0000174.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3ac02c33d712080705dd3f2578bb92c4a243e00ce6a41041255d05f4bf26381 +size 384 diff --git a/margin_logs/step_0000175.npy b/margin_logs/step_0000175.npy new file mode 100644 index 0000000..8e7340b --- /dev/null +++ b/margin_logs/step_0000175.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9717df45bdea1a4d1a3c12fb9ea588b0c65b7f4baba989f2584cf8fffebce13e +size 384 diff --git a/margin_logs/step_0000176.npy b/margin_logs/step_0000176.npy new file mode 100644 index 0000000..9dbd33c --- /dev/null +++ b/margin_logs/step_0000176.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca34bb763c838cd58440dc392683cd6bfe9009b320f1328c4d82b1f4cd386e97 +size 384 diff --git a/margin_logs/step_0000177.npy b/margin_logs/step_0000177.npy new file mode 100644 index 0000000..d0315a4 --- /dev/null +++ b/margin_logs/step_0000177.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32776e695c3ecef8a6484dcb8b6f074ab2e3aff081f1ec7b22be195c1653e40c +size 384 diff --git a/margin_logs/step_0000178.npy b/margin_logs/step_0000178.npy new file mode 100644 index 0000000..319a2bb --- /dev/null +++ b/margin_logs/step_0000178.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:217a86a1052cdb7766440cb3d3fb94eb769975fb7a8f3a9304271d1512794d57 +size 384 diff --git a/margin_logs/step_0000179.npy b/margin_logs/step_0000179.npy new file mode 100644 index 0000000..454291e --- /dev/null +++ b/margin_logs/step_0000179.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9857d9009fd9a0c942fc8e715fa1b0eb45216828bfdd665e95b8266e4105031 +size 384 diff --git a/margin_logs/step_0000180.npy b/margin_logs/step_0000180.npy new file mode 100644 index 0000000..8c8fd34 --- /dev/null +++ b/margin_logs/step_0000180.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66cda5a9f1eca8358ec896ece8a558df1e89de58f3e0023fd9c01d2237d7c564 +size 384 diff --git a/margin_logs/step_0000181.npy b/margin_logs/step_0000181.npy new file mode 100644 index 0000000..bc0efdc --- /dev/null +++ b/margin_logs/step_0000181.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98fc4ec707d3cb818f4b30bf50814dcd64957b36597d5a920ed6c359939fe5af +size 384 diff --git a/margin_logs/step_0000182.npy b/margin_logs/step_0000182.npy new file mode 100644 index 0000000..4f59b05 --- /dev/null +++ b/margin_logs/step_0000182.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01bcba2ca71fdc9686ca27f5e74be34b184d710850ff0175487054dde17bc369 +size 384 diff --git a/margin_logs/step_0000183.npy b/margin_logs/step_0000183.npy new file mode 100644 index 0000000..a37b924 --- /dev/null +++ b/margin_logs/step_0000183.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa81336475f092bcb4f6452a60b78295e661a6f4ef32d49573b005cbbc0fcd39 +size 384 diff --git a/margin_logs/step_0000184.npy b/margin_logs/step_0000184.npy new file mode 100644 index 0000000..1c89970 --- /dev/null +++ b/margin_logs/step_0000184.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7437b2e8a8da261a1060d253ee4053058b1d68d3e70161e26d42c70badb2338e +size 384 diff --git a/margin_logs/step_0000185.npy b/margin_logs/step_0000185.npy new file mode 100644 index 0000000..58bad80 --- /dev/null +++ b/margin_logs/step_0000185.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d17d35c4d70e45484425408c6f35a1b4539286bd50e20aa164aece7b4405c868 +size 384 diff --git a/margin_logs/step_0000186.npy b/margin_logs/step_0000186.npy new file mode 100644 index 0000000..da4d6be --- /dev/null +++ b/margin_logs/step_0000186.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7accacdf40fada6b9f04adfec9c92125bf691e3a4f2c63deff0e9be08c1ce86b +size 384 diff --git a/margin_logs/step_0000187.npy b/margin_logs/step_0000187.npy new file mode 100644 index 0000000..ba72654 --- /dev/null +++ b/margin_logs/step_0000187.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:177e0a02d866b9ad28c7e9575e64366fb0c4c045a7c515f79cbebc8df01e025f +size 384 diff --git a/margin_logs/step_0000188.npy b/margin_logs/step_0000188.npy new file mode 100644 index 0000000..d0bd555 --- /dev/null +++ b/margin_logs/step_0000188.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aebca4307e7c8feb77d559bef29f06fb3cef067b1b8a9a889e6f07d926f4701 +size 384 diff --git a/margin_logs/step_0000189.npy b/margin_logs/step_0000189.npy new file mode 100644 index 0000000..995231d --- /dev/null +++ b/margin_logs/step_0000189.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d1e5dea7eb94d05ddc485002eeee01a72fe9f592b0e2f201a929f988a3b024d +size 384 diff --git a/margin_logs/step_0000190.npy b/margin_logs/step_0000190.npy new file mode 100644 index 0000000..f5af017 --- /dev/null +++ b/margin_logs/step_0000190.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b99904e7bfe3d705271b25d96997603460f457af3d12a5690d020280d68f96ad +size 384 diff --git a/margin_logs/step_0000191.npy b/margin_logs/step_0000191.npy new file mode 100644 index 0000000..ef96b75 --- /dev/null +++ b/margin_logs/step_0000191.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2bf08c510302445928a1673e5ae804a2d6c139ae6dcf86275aff4d3ffd3c030 +size 384 diff --git a/margin_logs/step_0000192.npy b/margin_logs/step_0000192.npy new file mode 100644 index 0000000..8e17ef2 --- /dev/null +++ b/margin_logs/step_0000192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb417e3ac897b4692d495ad3d7a3ba9c24b1b6451733039f5dd438b068828cb0 +size 384 diff --git a/margin_logs/step_0000193.npy b/margin_logs/step_0000193.npy new file mode 100644 index 0000000..06dccd1 --- /dev/null +++ b/margin_logs/step_0000193.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65bca2a959f734526e286fbdaf5b58acb6d6406dafed3b782afc1fe94d946df1 +size 384 diff --git a/margin_logs/step_0000194.npy b/margin_logs/step_0000194.npy new file mode 100644 index 0000000..306976b --- /dev/null +++ b/margin_logs/step_0000194.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71c2eb11802218ed887bfe52559d381417e590ba4d04627327b212f60f3044f5 +size 384 diff --git a/margin_logs/step_0000195.npy b/margin_logs/step_0000195.npy new file mode 100644 index 0000000..39be70e --- /dev/null +++ b/margin_logs/step_0000195.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:831f80b4ee08df3a894411eebb68e838ca7448389616c7b57b586b36338bd14b +size 384 diff --git a/margin_logs/step_0000196.npy b/margin_logs/step_0000196.npy new file mode 100644 index 0000000..9331938 --- /dev/null +++ b/margin_logs/step_0000196.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92b79a2ba292abeb627626700434057d1eeb9dfefab3ee79bc760ae8b6d0182c +size 384 diff --git a/margin_logs/step_0000197.npy b/margin_logs/step_0000197.npy new file mode 100644 index 0000000..e4d41b9 --- /dev/null +++ b/margin_logs/step_0000197.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f76f9ed276924dc974d3e394c53285d95e2cfa24e0bb79471498857140263d96 +size 384 diff --git a/margin_logs/step_0000198.npy b/margin_logs/step_0000198.npy new file mode 100644 index 0000000..8cbb15f --- /dev/null +++ b/margin_logs/step_0000198.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9118188a9157ccff7c56f7d893f35743b0ea86fbb3350d13976ce13d246f8de6 +size 384 diff --git a/margin_logs/step_0000199.npy b/margin_logs/step_0000199.npy new file mode 100644 index 0000000..f126119 --- /dev/null +++ b/margin_logs/step_0000199.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32b9599d381574dd55c7c43212e04ec86287746b7439ff186370be7a9235b600 +size 384 diff --git a/margin_logs/step_0000200.npy b/margin_logs/step_0000200.npy new file mode 100644 index 0000000..f1f578a --- /dev/null +++ b/margin_logs/step_0000200.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a6839da3c8bf73521df8d624b616950b06aa607f24b59dab64379a8c8bb1207 +size 384 diff --git a/margin_logs/step_0000201.npy b/margin_logs/step_0000201.npy new file mode 100644 index 0000000..df51ceb --- /dev/null +++ b/margin_logs/step_0000201.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cc6120a714be9395dff776b61eeabb9748e1dfdced7300ea75498ca26b020c7 +size 384 diff --git a/margin_logs/step_0000202.npy b/margin_logs/step_0000202.npy new file mode 100644 index 0000000..20ec493 --- /dev/null +++ b/margin_logs/step_0000202.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77a81b851c2dae18c66d65e7267b43b7bf768ae2a2ee02588f6126ac150ad562 +size 384 diff --git a/margin_logs/step_0000203.npy b/margin_logs/step_0000203.npy new file mode 100644 index 0000000..fbb5bac --- /dev/null +++ b/margin_logs/step_0000203.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:667ab455e0f9984804c25a82cb51517f10bb346e960fa35d3695ae9cad4fdd17 +size 384 diff --git a/margin_logs/step_0000204.npy b/margin_logs/step_0000204.npy new file mode 100644 index 0000000..d504bb5 --- /dev/null +++ b/margin_logs/step_0000204.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cccab36bf3951ee3a9b8c2226f2d3aa40275a7434c769a6876a81d35347fc67a +size 384 diff --git a/margin_logs/step_0000205.npy b/margin_logs/step_0000205.npy new file mode 100644 index 0000000..a1798af --- /dev/null +++ b/margin_logs/step_0000205.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bc38bd475b0e6d5c1bb83e9d15faf25a5b38a93ba11b6492587fa70782c83fa +size 384 diff --git a/margin_logs/step_0000206.npy b/margin_logs/step_0000206.npy new file mode 100644 index 0000000..f55fc82 --- /dev/null +++ b/margin_logs/step_0000206.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5b56fc00d755792b5599e947b4f23973917bacd19429779ba7c25fdaac3bb88 +size 384 diff --git a/margin_logs/step_0000207.npy b/margin_logs/step_0000207.npy new file mode 100644 index 0000000..0b69a8f --- /dev/null +++ b/margin_logs/step_0000207.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13adc804fc9f6b0df11c8c347153246da8fc5e4d3a0749fffc859cf18d863b57 +size 384 diff --git a/margin_logs/step_0000208.npy b/margin_logs/step_0000208.npy new file mode 100644 index 0000000..19ed7af --- /dev/null +++ b/margin_logs/step_0000208.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2029829a81a260a54fba3bd079190fb61ec9dce78775f106ed6c960f95d0905 +size 384 diff --git a/margin_logs/step_0000209.npy b/margin_logs/step_0000209.npy new file mode 100644 index 0000000..e121a0e --- /dev/null +++ b/margin_logs/step_0000209.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94e4f1658373bba426b34991198abb85ea5ed9ffd1205707932ecaeadb1d582c +size 384 diff --git a/margin_logs/step_0000210.npy b/margin_logs/step_0000210.npy new file mode 100644 index 0000000..d653a5d --- /dev/null +++ b/margin_logs/step_0000210.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be7546979b5337d6bb49ae9a6ff1c249882627954b0cb413a4160d109f6b759a +size 384 diff --git a/margin_logs/step_0000211.npy b/margin_logs/step_0000211.npy new file mode 100644 index 0000000..416404f --- /dev/null +++ b/margin_logs/step_0000211.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc5400c895e8fe31c69b2875e8a3540d3b5a3cc4d26e6d67d5e4bb5396f63451 +size 384 diff --git a/margin_logs/step_0000212.npy b/margin_logs/step_0000212.npy new file mode 100644 index 0000000..b9065d7 --- /dev/null +++ b/margin_logs/step_0000212.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f29c3f4709956dc392c32e496a332c6e39ecb5fa68f8e922771f6ad882a05d3c +size 384 diff --git a/margin_logs/step_0000213.npy b/margin_logs/step_0000213.npy new file mode 100644 index 0000000..8d7f64e --- /dev/null +++ b/margin_logs/step_0000213.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20166f4a0db9d01558bfacff21686e4023d830bcc213bb302ef4f271cedb473a +size 384 diff --git a/margin_logs/step_0000214.npy b/margin_logs/step_0000214.npy new file mode 100644 index 0000000..27b867a --- /dev/null +++ b/margin_logs/step_0000214.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93d2eee9509cb2345bba4981f9fd8bf3d451e12df8bcdde93a3733d0fdbfc31c +size 384 diff --git a/margin_logs/step_0000215.npy b/margin_logs/step_0000215.npy new file mode 100644 index 0000000..aa2f2ac --- /dev/null +++ b/margin_logs/step_0000215.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09b518b4ebce1a00f161a2fdae4f8ced8da8ff73d43de84688af8b250aa23fb4 +size 384 diff --git a/margin_logs/step_0000216.npy b/margin_logs/step_0000216.npy new file mode 100644 index 0000000..3b60bd4 --- /dev/null +++ b/margin_logs/step_0000216.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84c3c00c8aca06bebdf6c73551cdf1542a9266914274715e4384ee9a352aaf61 +size 384 diff --git a/margin_logs/step_0000217.npy b/margin_logs/step_0000217.npy new file mode 100644 index 0000000..ab35642 --- /dev/null +++ b/margin_logs/step_0000217.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d9323637bddc689d72bf0475d99ee55347a41eda307671bf965933f60bd2005 +size 384 diff --git a/margin_logs/step_0000218.npy b/margin_logs/step_0000218.npy new file mode 100644 index 0000000..9560e51 --- /dev/null +++ b/margin_logs/step_0000218.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:393dccd08f1ec16cd4c003bcd1f14351989c2bb75e622a26faa7d679963de94f +size 384 diff --git a/margin_logs/step_0000219.npy b/margin_logs/step_0000219.npy new file mode 100644 index 0000000..2611c79 --- /dev/null +++ b/margin_logs/step_0000219.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a5e8fa147ae4f00dc5ea09b80a65319c7681e5b39f2cb108dbf0ff7b829b45b +size 384 diff --git a/margin_logs/step_0000220.npy b/margin_logs/step_0000220.npy new file mode 100644 index 0000000..ff71d57 --- /dev/null +++ b/margin_logs/step_0000220.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd5edf949c91d6cda52df7e91ce5803aeb699509faa3c5e710c72b68996dfbee +size 384 diff --git a/margin_logs/step_0000221.npy b/margin_logs/step_0000221.npy new file mode 100644 index 0000000..aebb571 --- /dev/null +++ b/margin_logs/step_0000221.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40851d2769b9a4d9da4d3e17ccfc85a03a2a434dbc964356d5c2449768cf6ab0 +size 384 diff --git a/margin_logs/step_0000222.npy b/margin_logs/step_0000222.npy new file mode 100644 index 0000000..aba1f88 --- /dev/null +++ b/margin_logs/step_0000222.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb4bb86aeaeecadc0cb1abbd457474170fa874e12cf04f5730474fc31ce3c61f +size 384 diff --git a/margin_logs/step_0000223.npy b/margin_logs/step_0000223.npy new file mode 100644 index 0000000..f690934 --- /dev/null +++ b/margin_logs/step_0000223.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4462f9eb15f81dc5a0763fbe9669d3e390685363fa59587677183a50374b93cb +size 384 diff --git a/margin_logs/step_0000224.npy b/margin_logs/step_0000224.npy new file mode 100644 index 0000000..f16be00 --- /dev/null +++ b/margin_logs/step_0000224.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27da83aedcffd92925ce0c0bd2498199cab6a56030305aba4683ddfcc871e09a +size 384 diff --git a/margin_logs/step_0000225.npy b/margin_logs/step_0000225.npy new file mode 100644 index 0000000..506a865 --- /dev/null +++ b/margin_logs/step_0000225.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4423bff792b0be92ad57f3beae6b6db945417e9a2b5d86d4eca31970b28256ef +size 384 diff --git a/margin_logs/step_0000226.npy b/margin_logs/step_0000226.npy new file mode 100644 index 0000000..58d5ee9 --- /dev/null +++ b/margin_logs/step_0000226.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:490ff67da0d07213d400a01ff6bc138142ba730a6e25cfe8283afba59c7fd361 +size 384 diff --git a/margin_logs/step_0000227.npy b/margin_logs/step_0000227.npy new file mode 100644 index 0000000..1097aaf --- /dev/null +++ b/margin_logs/step_0000227.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e7dcd27a5f711627341b314419be204638bdc043648c6dc69dc083a83ed78ab +size 384 diff --git a/margin_logs/step_0000228.npy b/margin_logs/step_0000228.npy new file mode 100644 index 0000000..2a53ad0 --- /dev/null +++ b/margin_logs/step_0000228.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4abbccfb2f235fbb0748852ea8c92a2c31cc013ac5aa6f519b2454319e175273 +size 384 diff --git a/margin_logs/step_0000229.npy b/margin_logs/step_0000229.npy new file mode 100644 index 0000000..f444a3e --- /dev/null +++ b/margin_logs/step_0000229.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2ba2ee8c6148b1e0d7d6cb34de0a045d3262f977e8cda648678371b5c3bd5a0 +size 384 diff --git a/margin_logs/step_0000230.npy b/margin_logs/step_0000230.npy new file mode 100644 index 0000000..5ece450 --- /dev/null +++ b/margin_logs/step_0000230.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef3ce135e13afe8c9baffad8bed0f77f4eabaf7680940772ca536e7df1358aff +size 384 diff --git a/margin_logs/step_0000231.npy b/margin_logs/step_0000231.npy new file mode 100644 index 0000000..a90bd3b --- /dev/null +++ b/margin_logs/step_0000231.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd2ccaadfdeb8f57b3f3a2dd9f055cd2c84d066477d3e713a5699c53fea403fc +size 384 diff --git a/margin_logs/step_0000232.npy b/margin_logs/step_0000232.npy new file mode 100644 index 0000000..c89662b --- /dev/null +++ b/margin_logs/step_0000232.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b54f760925ff9c3b03e335c019d517dbafde0c30b72304414d3779d02e6f36f +size 384 diff --git a/margin_logs/step_0000233.npy b/margin_logs/step_0000233.npy new file mode 100644 index 0000000..f5c0eb1 --- /dev/null +++ b/margin_logs/step_0000233.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40f70b9e4df019cd6db13c5d6969bd1f6505290eeab515a3134b54dfe7acd4da +size 384 diff --git a/margin_logs/step_0000234.npy b/margin_logs/step_0000234.npy new file mode 100644 index 0000000..fbe84e0 --- /dev/null +++ b/margin_logs/step_0000234.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d858852fe5ddaadb16b33e970473c049b0154c8f45b2b818029eaa3c464dffe +size 384 diff --git a/margin_logs/step_0000235.npy b/margin_logs/step_0000235.npy new file mode 100644 index 0000000..73f8b12 --- /dev/null +++ b/margin_logs/step_0000235.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe09b1bb2d09a903370554cb362adb51e4856a90adcf477205f4cf18e8bbc0a2 +size 384 diff --git a/margin_logs/step_0000236.npy b/margin_logs/step_0000236.npy new file mode 100644 index 0000000..d95f4f4 --- /dev/null +++ b/margin_logs/step_0000236.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48f2028f4e7037f47b9914ad963ce93602c711a4b6b3ea32b9d34d10d31563ff +size 384 diff --git a/margin_logs/step_0000237.npy b/margin_logs/step_0000237.npy new file mode 100644 index 0000000..b5a22ec --- /dev/null +++ b/margin_logs/step_0000237.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f674da21a440e308d51fedfcb23ea762ad7e3f3829c3ce7fbf8a6ae08d07c1e7 +size 384 diff --git a/margin_logs/step_0000238.npy b/margin_logs/step_0000238.npy new file mode 100644 index 0000000..9551e19 --- /dev/null +++ b/margin_logs/step_0000238.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7af1c8ae1ff5caebde140d24ddbc4028db783dab1afb9580416665bf7cb991a1 +size 384 diff --git a/margin_logs/step_0000239.npy b/margin_logs/step_0000239.npy new file mode 100644 index 0000000..d2027f3 --- /dev/null +++ b/margin_logs/step_0000239.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a1052924c337c8d36893867f784290dacd15ed41891143b24eb4ab78aed0720 +size 384 diff --git a/margin_logs/step_0000240.npy b/margin_logs/step_0000240.npy new file mode 100644 index 0000000..7363739 --- /dev/null +++ b/margin_logs/step_0000240.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8fdeb7ac42686b9226133c8ab676f8c25e24539864022f76f7897f8de87132f +size 384 diff --git a/margin_logs/step_0000241.npy b/margin_logs/step_0000241.npy new file mode 100644 index 0000000..24f26ff --- /dev/null +++ b/margin_logs/step_0000241.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6fd9fd2e9a0f0eee0aff6cf8742d14757e92ae387bd72805cc4a9add4f88d0b +size 384 diff --git a/margin_logs/step_0000242.npy b/margin_logs/step_0000242.npy new file mode 100644 index 0000000..89c3da1 --- /dev/null +++ b/margin_logs/step_0000242.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:845cb195895fbd05555650aafc8b7e4072fda4ba94bc307101dd60f514d703bb +size 384 diff --git a/margin_logs/step_0000243.npy b/margin_logs/step_0000243.npy new file mode 100644 index 0000000..b86c13e --- /dev/null +++ b/margin_logs/step_0000243.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33ae93a749439e2a92682fe1d9f5914a9715b82de68919ed148bffa20ad5a903 +size 384 diff --git a/margin_logs/step_0000244.npy b/margin_logs/step_0000244.npy new file mode 100644 index 0000000..1b0bdd0 --- /dev/null +++ b/margin_logs/step_0000244.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbfb94dbd39497510a80bc863ab1771b8dbcd06216bb36a8379361e0bfa6552c +size 384 diff --git a/margin_logs/step_0000245.npy b/margin_logs/step_0000245.npy new file mode 100644 index 0000000..70892e3 --- /dev/null +++ b/margin_logs/step_0000245.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aced537fc118cccea08ddcb782e77a13e29bd6bf60e423bfa1b19c828edc0e55 +size 384 diff --git a/margin_logs/step_0000246.npy b/margin_logs/step_0000246.npy new file mode 100644 index 0000000..22cffec --- /dev/null +++ b/margin_logs/step_0000246.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cfe1451bfc60f3e0cbcabd122659c0d623d8d79e042536613563dbe8a762166 +size 384 diff --git a/margin_logs/step_0000247.npy b/margin_logs/step_0000247.npy new file mode 100644 index 0000000..8644b08 --- /dev/null +++ b/margin_logs/step_0000247.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c252cd58b07982a87ec017055bd9de9a375e6ad10efa3430ebd103c729bc8c55 +size 384 diff --git a/margin_logs/step_0000248.npy b/margin_logs/step_0000248.npy new file mode 100644 index 0000000..b634569 --- /dev/null +++ b/margin_logs/step_0000248.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad95688799f7736857c87500047c2d14693e57ced9933b8671e2ddab24b992f3 +size 384 diff --git a/margin_logs/step_0000249.npy b/margin_logs/step_0000249.npy new file mode 100644 index 0000000..90574ba --- /dev/null +++ b/margin_logs/step_0000249.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35bd0c5257ac0faa6a5981e07ee663956dc21073f26c6cf00f7379e715475702 +size 384 diff --git a/margin_logs/step_0000250.npy b/margin_logs/step_0000250.npy new file mode 100644 index 0000000..2aea4ab --- /dev/null +++ b/margin_logs/step_0000250.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c55847704357c7482ae263f2436c911b037ba3e0419030dee48fd985a93780e +size 384 diff --git a/margin_logs/step_0000251.npy b/margin_logs/step_0000251.npy new file mode 100644 index 0000000..44bf961 --- /dev/null +++ b/margin_logs/step_0000251.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:504e875a53e2d2470435d1ccc19d07ecdc628dfc3151726eefe215bc7a651d39 +size 384 diff --git a/margin_logs/step_0000252.npy b/margin_logs/step_0000252.npy new file mode 100644 index 0000000..dc53285 --- /dev/null +++ b/margin_logs/step_0000252.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a92f9922bb8afef0a5219a447ad047ea841d009181d3d776536ad4ae15c5cad4 +size 384 diff --git a/margin_logs/step_0000253.npy b/margin_logs/step_0000253.npy new file mode 100644 index 0000000..a68070e --- /dev/null +++ b/margin_logs/step_0000253.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96a1d8fd63ca0ee60c758c8a0090c0d5603ac16a6b5e9ccbede44dcfe2ecb5e7 +size 384 diff --git a/margin_logs/step_0000254.npy b/margin_logs/step_0000254.npy new file mode 100644 index 0000000..942d3dd --- /dev/null +++ b/margin_logs/step_0000254.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e8f18a934bcc5d162b86a72910767bc49a9ec6fc72f6d3d5244239867fcc597 +size 384 diff --git a/margin_logs/step_0000255.npy b/margin_logs/step_0000255.npy new file mode 100644 index 0000000..18cc482 --- /dev/null +++ b/margin_logs/step_0000255.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a28ad4d482513576f5c19d990b6e79a89117f2abe9fb6033f14dc1b8f4a498aa +size 384 diff --git a/margin_logs/step_0000256.npy b/margin_logs/step_0000256.npy new file mode 100644 index 0000000..bd3a682 --- /dev/null +++ b/margin_logs/step_0000256.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5e369b1d2eaa2590ab3d3479b469e7dbe610a0622a2a62989054b0297139af +size 384 diff --git a/margin_logs/step_0000257.npy b/margin_logs/step_0000257.npy new file mode 100644 index 0000000..bf93c4d --- /dev/null +++ b/margin_logs/step_0000257.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10632bf7be7477025b377db29ae776414fd9e2970b263c9367a84dfa52103a25 +size 384 diff --git a/margin_logs/step_0000258.npy b/margin_logs/step_0000258.npy new file mode 100644 index 0000000..136fb18 --- /dev/null +++ b/margin_logs/step_0000258.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acacae5c4776523e80e03abf37edf8234d0243b18f41ee475aa2403ada91b9a5 +size 384 diff --git a/margin_logs/step_0000259.npy b/margin_logs/step_0000259.npy new file mode 100644 index 0000000..b23372b --- /dev/null +++ b/margin_logs/step_0000259.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:491e8af1fa9e4929d99886d3fb478ade72455c145b42bcac79b4bbab417abe15 +size 384 diff --git a/margin_logs/step_0000260.npy b/margin_logs/step_0000260.npy new file mode 100644 index 0000000..d325fd5 --- /dev/null +++ b/margin_logs/step_0000260.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae5e44244a34d303a95eeb7a08d2c20ccd981f028c1a88b5bea34d7cb40bce55 +size 384 diff --git a/margin_logs/step_0000261.npy b/margin_logs/step_0000261.npy new file mode 100644 index 0000000..907c30b --- /dev/null +++ b/margin_logs/step_0000261.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3855a1eda6cd5c7918ece61e74d9af01021dfb2e989c40d7aa09a65ec9c2b49a +size 384 diff --git a/margin_logs/step_0000262.npy b/margin_logs/step_0000262.npy new file mode 100644 index 0000000..0d16b1e --- /dev/null +++ b/margin_logs/step_0000262.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95fe58ac986b4ebeeb1bef3793c2458d27a71668653e8ba410dd598d87015ac8 +size 384 diff --git a/margin_logs/step_0000263.npy b/margin_logs/step_0000263.npy new file mode 100644 index 0000000..10e17ad --- /dev/null +++ b/margin_logs/step_0000263.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9a1d6518be3be8f483dda91bf41a3006cde407d9bfb7d7b81fe711ad3600b04 +size 384 diff --git a/margin_logs/step_0000264.npy b/margin_logs/step_0000264.npy new file mode 100644 index 0000000..f51b133 --- /dev/null +++ b/margin_logs/step_0000264.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0787972a2674c169cd0af51f27c241783fc0d77c2f1154065420b20f07818016 +size 384 diff --git a/margin_logs/step_0000265.npy b/margin_logs/step_0000265.npy new file mode 100644 index 0000000..ab756c4 --- /dev/null +++ b/margin_logs/step_0000265.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d586bcef2745bea817a4363839f97284ab14ca8eed29bc71f14b0154b88788c9 +size 384 diff --git a/margin_logs/step_0000266.npy b/margin_logs/step_0000266.npy new file mode 100644 index 0000000..4c0c0b5 --- /dev/null +++ b/margin_logs/step_0000266.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fcf3576cb303ff02ffa3ce7b85a2fb569a2b623ece0eb7b760d404a62d22aa4 +size 384 diff --git a/margin_logs/step_0000267.npy b/margin_logs/step_0000267.npy new file mode 100644 index 0000000..34da4dc --- /dev/null +++ b/margin_logs/step_0000267.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83c51380ce028237f39fe0692241844797973c5489d5cb19d94845f0b974fc19 +size 384 diff --git a/margin_logs/step_0000268.npy b/margin_logs/step_0000268.npy new file mode 100644 index 0000000..0f94b78 --- /dev/null +++ b/margin_logs/step_0000268.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b4574426c443809f03f3dad31b650ee12a2ce261b46e3d7b146fda32a4b61b5 +size 384 diff --git a/margin_logs/step_0000269.npy b/margin_logs/step_0000269.npy new file mode 100644 index 0000000..7241063 --- /dev/null +++ b/margin_logs/step_0000269.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ff4fa6da1cff6afe798117b9602b7b47441aec3e3a7534c536bbc2832206f56 +size 384 diff --git a/margin_logs/step_0000270.npy b/margin_logs/step_0000270.npy new file mode 100644 index 0000000..dcee26d --- /dev/null +++ b/margin_logs/step_0000270.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4be316d2dde892870321c480940252350f0532abf05ea377fefccb956c7370cb +size 384 diff --git a/margin_logs/step_0000271.npy b/margin_logs/step_0000271.npy new file mode 100644 index 0000000..5f05da3 --- /dev/null +++ b/margin_logs/step_0000271.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9a8852340ea5770043dc810953f7170f1816feed9641bd85d118bf456a1fc8c +size 384 diff --git a/margin_logs/step_0000272.npy b/margin_logs/step_0000272.npy new file mode 100644 index 0000000..726893c --- /dev/null +++ b/margin_logs/step_0000272.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:316d0eb5c4417c3a6ee8f09996469a17048b6e42c1bc42948f1fca4d9f21350a +size 384 diff --git a/margin_logs/step_0000273.npy b/margin_logs/step_0000273.npy new file mode 100644 index 0000000..e2c4671 --- /dev/null +++ b/margin_logs/step_0000273.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:368f6eedd43b238864185c94c2f669c3837c947c46dc689f2e194ef1b77242aa +size 384 diff --git a/margin_logs/step_0000274.npy b/margin_logs/step_0000274.npy new file mode 100644 index 0000000..195e3d7 --- /dev/null +++ b/margin_logs/step_0000274.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e32587e6b66ae766015bbb6e518de81c02cb02cbd6b0ce5e8ab651cbee124501 +size 384 diff --git a/margin_logs/step_0000275.npy b/margin_logs/step_0000275.npy new file mode 100644 index 0000000..c0237ac --- /dev/null +++ b/margin_logs/step_0000275.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fe1b65497e23be1a1ff9b288c947dcd090b1011cf720151d6302d7275ee71a2 +size 384 diff --git a/margin_logs/step_0000276.npy b/margin_logs/step_0000276.npy new file mode 100644 index 0000000..f54db0e --- /dev/null +++ b/margin_logs/step_0000276.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f5dc59503ec091009494aa983a9117520904827ef4cd15ae0e93f54396cfcc4 +size 384 diff --git a/margin_logs/step_0000277.npy b/margin_logs/step_0000277.npy new file mode 100644 index 0000000..b24ef97 --- /dev/null +++ b/margin_logs/step_0000277.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61838438f8efd180a13d18b6dbcaafe011daffcf92f89a52299915e8a8673648 +size 384 diff --git a/margin_logs/step_0000278.npy b/margin_logs/step_0000278.npy new file mode 100644 index 0000000..1601542 --- /dev/null +++ b/margin_logs/step_0000278.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11e10f2e731e3860c24c6109c409674cfb6f824cc1c03c79a3ac11531383551a +size 384 diff --git a/margin_logs/step_0000279.npy b/margin_logs/step_0000279.npy new file mode 100644 index 0000000..111aaaa --- /dev/null +++ b/margin_logs/step_0000279.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40e0acae3c625f4a434fe2888bc9bc82a22fc436c7a11c862a921075fd9d115e +size 384 diff --git a/margin_logs/step_0000280.npy b/margin_logs/step_0000280.npy new file mode 100644 index 0000000..c4a73a3 --- /dev/null +++ b/margin_logs/step_0000280.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69abeb2983cdf24eb1885e9abd837bc52c101c15c9c95554229e10bafc6670e9 +size 384 diff --git a/margin_logs/step_0000281.npy b/margin_logs/step_0000281.npy new file mode 100644 index 0000000..ef5babc --- /dev/null +++ b/margin_logs/step_0000281.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a037d771b480e1f4aab3880fb896c5bd38076bceb41bb6d404ae3acd02ade08 +size 384 diff --git a/margin_logs/step_0000282.npy b/margin_logs/step_0000282.npy new file mode 100644 index 0000000..fc7d3e0 --- /dev/null +++ b/margin_logs/step_0000282.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3669990c19dc2fed8f49a43d214628fd821f0c53cf29f2fcf335bb949e130ae +size 384 diff --git a/margin_logs/step_0000283.npy b/margin_logs/step_0000283.npy new file mode 100644 index 0000000..14104ab --- /dev/null +++ b/margin_logs/step_0000283.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:298f0fe2f59967cee1bfe1579fac19f620dde500edf6a056c4a6bdae4c406e79 +size 384 diff --git a/margin_logs/step_0000284.npy b/margin_logs/step_0000284.npy new file mode 100644 index 0000000..0cf753d --- /dev/null +++ b/margin_logs/step_0000284.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81c4d7845be80808a4342d6d8637149f082e38dcbfa05275b58141f6c0f4be88 +size 384 diff --git a/margin_logs/step_0000285.npy b/margin_logs/step_0000285.npy new file mode 100644 index 0000000..85cde8a --- /dev/null +++ b/margin_logs/step_0000285.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a651c036663ec50678495ea572395758cf38097a3a8451fc0655d0f91b296e2 +size 384 diff --git a/margin_logs/step_0000286.npy b/margin_logs/step_0000286.npy new file mode 100644 index 0000000..d55e22d --- /dev/null +++ b/margin_logs/step_0000286.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4302fc7d7895b14bc28f39bc09f9724a2aa17eb6ddf9788fd40728502f778f37 +size 384 diff --git a/margin_logs/step_0000287.npy b/margin_logs/step_0000287.npy new file mode 100644 index 0000000..06d2bbb --- /dev/null +++ b/margin_logs/step_0000287.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dcd55a7dff83e69791817aa6f93b885ec1b06d3502f797b47c456a7476875c1 +size 384 diff --git a/margin_logs/step_0000288.npy b/margin_logs/step_0000288.npy new file mode 100644 index 0000000..5984216 --- /dev/null +++ b/margin_logs/step_0000288.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d1e58a912d9e3b6f658605498e01052c57bf20db776f48b4381c5171a8a825f +size 384 diff --git a/margin_logs/step_0000289.npy b/margin_logs/step_0000289.npy new file mode 100644 index 0000000..5b16390 --- /dev/null +++ b/margin_logs/step_0000289.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ef01105710bf08dc4205be5da06406d4c2be069c6a4b463bb87e2eafbbb6d96 +size 384 diff --git a/margin_logs/step_0000290.npy b/margin_logs/step_0000290.npy new file mode 100644 index 0000000..39800b0 --- /dev/null +++ b/margin_logs/step_0000290.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a96103fb0b42f55a70ae9e64683428f5def6420f4468159f657ad122560ec31 +size 384 diff --git a/margin_logs/step_0000291.npy b/margin_logs/step_0000291.npy new file mode 100644 index 0000000..5503365 --- /dev/null +++ b/margin_logs/step_0000291.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:069de8b84b87ea9e6b8256bc8da980d0286baa774012985d00bbe9d65611b071 +size 384 diff --git a/margin_logs/step_0000292.npy b/margin_logs/step_0000292.npy new file mode 100644 index 0000000..aca2ffd --- /dev/null +++ b/margin_logs/step_0000292.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:562bf013de2635365288eef624baef900eaca292c507631e27eab90d8b1b5fcc +size 384 diff --git a/margin_logs/step_0000293.npy b/margin_logs/step_0000293.npy new file mode 100644 index 0000000..f6308b1 --- /dev/null +++ b/margin_logs/step_0000293.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6359f933ed67559df1847db0d745fbc90878f9baa4e9df8b8ab756bc82a998e5 +size 384 diff --git a/margin_logs/step_0000294.npy b/margin_logs/step_0000294.npy new file mode 100644 index 0000000..165a797 --- /dev/null +++ b/margin_logs/step_0000294.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc55437e597f1e8599a227054158aea16f57269d495b644740daed9f2a1700ba +size 384 diff --git a/margin_logs/step_0000295.npy b/margin_logs/step_0000295.npy new file mode 100644 index 0000000..dd1a8a0 --- /dev/null +++ b/margin_logs/step_0000295.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7426f978dceec633d31d5eb665d6977d6acc0d11cce8d29e49572f2680f632c +size 384 diff --git a/margin_logs/step_0000296.npy b/margin_logs/step_0000296.npy new file mode 100644 index 0000000..bfe37e7 --- /dev/null +++ b/margin_logs/step_0000296.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f870b0d86a7f804cfa1cb24331169ba8b4833098b57f9cc2d0c04de885911dd6 +size 384 diff --git a/margin_logs/step_0000297.npy b/margin_logs/step_0000297.npy new file mode 100644 index 0000000..fcaac67 --- /dev/null +++ b/margin_logs/step_0000297.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:816870eef45fb10ade1ed63228a65f994ce5e49a49e3577859baabc8c397907a +size 384 diff --git a/margin_logs/step_0000298.npy b/margin_logs/step_0000298.npy new file mode 100644 index 0000000..437007c --- /dev/null +++ b/margin_logs/step_0000298.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ec2380ae05dc1022c9d1d5c860e1d28d543dc2436493dfb4b58a4d6b333c201 +size 384 diff --git a/margin_logs/step_0000299.npy b/margin_logs/step_0000299.npy new file mode 100644 index 0000000..5149705 --- /dev/null +++ b/margin_logs/step_0000299.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f367b551010bc90911a9bf7bd8bc05de19a278aba34a22f305ed38887c315f3 +size 384 diff --git a/margin_logs/step_0000300.npy b/margin_logs/step_0000300.npy new file mode 100644 index 0000000..0dbe30c --- /dev/null +++ b/margin_logs/step_0000300.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c26a205c76cd22f52a072619fb467e3289e3c48fd89c9174f64c3850f5dd15bd +size 384 diff --git a/margin_logs/step_0000301.npy b/margin_logs/step_0000301.npy new file mode 100644 index 0000000..7ee5aae --- /dev/null +++ b/margin_logs/step_0000301.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a049a07b888c256615e741571d224c0847ec76c67dd457df53258f06ed4f3460 +size 384 diff --git a/margin_logs/step_0000302.npy b/margin_logs/step_0000302.npy new file mode 100644 index 0000000..fe257f7 --- /dev/null +++ b/margin_logs/step_0000302.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93a141c79d6a89d0f8b8a3da5e68bc3628153e1ca0add3ae816a59ca2a9743da +size 384 diff --git a/margin_logs/step_0000303.npy b/margin_logs/step_0000303.npy new file mode 100644 index 0000000..e2f5f91 --- /dev/null +++ b/margin_logs/step_0000303.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe742964ae8585f98709331e3b7f48c241a7100d02f4b09ac953244b13000e03 +size 384 diff --git a/margin_logs/step_0000304.npy b/margin_logs/step_0000304.npy new file mode 100644 index 0000000..9eedf04 --- /dev/null +++ b/margin_logs/step_0000304.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a2dfa797f49cacd1b084eb370aeedfa75542398643e03c008f401f9f7cc6093 +size 384 diff --git a/margin_logs/step_0000305.npy b/margin_logs/step_0000305.npy new file mode 100644 index 0000000..c38b160 --- /dev/null +++ b/margin_logs/step_0000305.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13a28f23640619c541ec278fa0f58a0b5276d0b224452782f2ada450e8056399 +size 384 diff --git a/margin_logs/step_0000306.npy b/margin_logs/step_0000306.npy new file mode 100644 index 0000000..4c931e0 --- /dev/null +++ b/margin_logs/step_0000306.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46b98fb3935007d6334e8bd6512cb53e15ec62fc3c4348a7804b2c36388c14d1 +size 384 diff --git a/margin_logs/step_0000307.npy b/margin_logs/step_0000307.npy new file mode 100644 index 0000000..43de6ad --- /dev/null +++ b/margin_logs/step_0000307.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a10ca8ee7b2f4a8058c0377bac57f4d30735862f6b13e73e40d585ae0278cc56 +size 384 diff --git a/margin_logs/step_0000308.npy b/margin_logs/step_0000308.npy new file mode 100644 index 0000000..65027e3 --- /dev/null +++ b/margin_logs/step_0000308.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0180741c0a68b4a50c9180d77902228e7109f80b97db469e1373ee996ace938 +size 384 diff --git a/margin_logs/step_0000309.npy b/margin_logs/step_0000309.npy new file mode 100644 index 0000000..d430edc --- /dev/null +++ b/margin_logs/step_0000309.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:489812a9925a2346d175e1a209ef17c71c7416904cd6d68e24ef9d1906e09206 +size 384 diff --git a/margin_logs/step_0000310.npy b/margin_logs/step_0000310.npy new file mode 100644 index 0000000..d2288a2 --- /dev/null +++ b/margin_logs/step_0000310.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3282f50af7a14072d8e74ba594657e5328b08f012145df7c056018a74615605f +size 384 diff --git a/margin_logs/step_0000311.npy b/margin_logs/step_0000311.npy new file mode 100644 index 0000000..9cc712c --- /dev/null +++ b/margin_logs/step_0000311.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89f7ceb01680a4ee661dc2324530a243fda43f1fb7e0e8b6cb32c7506bb5a7fc +size 384 diff --git a/margin_logs/step_0000312.npy b/margin_logs/step_0000312.npy new file mode 100644 index 0000000..41c1ad1 --- /dev/null +++ b/margin_logs/step_0000312.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78c5490b340f8e7c5f1934bf8d9df26488e6fa22d1fcb740c1555cee7ec4b6f3 +size 384 diff --git a/margin_logs/step_0000313.npy b/margin_logs/step_0000313.npy new file mode 100644 index 0000000..a8c9e19 --- /dev/null +++ b/margin_logs/step_0000313.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56a4f7045a4f1868a8091089a2056d5c5c71c55bb04c762214cb588d4c6f34be +size 384 diff --git a/margin_logs/step_0000314.npy b/margin_logs/step_0000314.npy new file mode 100644 index 0000000..d7d569b --- /dev/null +++ b/margin_logs/step_0000314.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c52e4841a3a0c30fc36d82e574425640dbaf8e818dde214ecb0698c792afc781 +size 384 diff --git a/margin_logs/step_0000315.npy b/margin_logs/step_0000315.npy new file mode 100644 index 0000000..c50031a --- /dev/null +++ b/margin_logs/step_0000315.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a92f42ee2a574dfba351f26993776a621cae9d18805e4aa7398f88991a4a6eaa +size 384 diff --git a/margin_logs/step_0000316.npy b/margin_logs/step_0000316.npy new file mode 100644 index 0000000..551e439 --- /dev/null +++ b/margin_logs/step_0000316.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75b0e9e74cbe1f5d02e9789907daa4feb1edb4ac4610f589a7401f4669345eef +size 384 diff --git a/margin_logs/step_0000317.npy b/margin_logs/step_0000317.npy new file mode 100644 index 0000000..3f7de47 --- /dev/null +++ b/margin_logs/step_0000317.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c890c8cbc26dac7cc955d17cd5af4b608efdc19b6afc1fd90ea0de6ed2fbb09 +size 384 diff --git a/margin_logs/step_0000318.npy b/margin_logs/step_0000318.npy new file mode 100644 index 0000000..3859556 --- /dev/null +++ b/margin_logs/step_0000318.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c6f47aff679cf277850a858a1d3ba96630331eec4299845ecd576857d79d5ed +size 384 diff --git a/margin_logs/step_0000319.npy b/margin_logs/step_0000319.npy new file mode 100644 index 0000000..419607f --- /dev/null +++ b/margin_logs/step_0000319.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2270e80cd36d6cb91511525707ead0c65c542d4ebbf3a5b671d476ae2e4f8c00 +size 384 diff --git a/margin_logs/step_0000320.npy b/margin_logs/step_0000320.npy new file mode 100644 index 0000000..2cbf8de --- /dev/null +++ b/margin_logs/step_0000320.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09c0bb69c91f723d029f38b2f1a77fa8bca14ed57bbb3c706807a7a52697bed5 +size 384 diff --git a/margin_logs/step_0000321.npy b/margin_logs/step_0000321.npy new file mode 100644 index 0000000..9c22f10 --- /dev/null +++ b/margin_logs/step_0000321.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2a67b243d6603c5627bfd479f55e81e123622efeb002005f25a5b4e0e165ecb +size 384 diff --git a/margin_logs/step_0000322.npy b/margin_logs/step_0000322.npy new file mode 100644 index 0000000..987a47f --- /dev/null +++ b/margin_logs/step_0000322.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1cc8aaafd0d05a24294ad8420588a2ee64b7001910031596c795f90c43130b7 +size 384 diff --git a/margin_logs/step_0000323.npy b/margin_logs/step_0000323.npy new file mode 100644 index 0000000..b335076 --- /dev/null +++ b/margin_logs/step_0000323.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e82bca19738f77b4c2e4f485127a910ff072cab584eecae0ca59931917f5b57a +size 384 diff --git a/margin_logs/step_0000324.npy b/margin_logs/step_0000324.npy new file mode 100644 index 0000000..65e3084 --- /dev/null +++ b/margin_logs/step_0000324.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd7eea9c0dbdf0d8f5bed1deea5a8e5d977527958674107cda87b83080d38854 +size 384 diff --git a/margin_logs/step_0000325.npy b/margin_logs/step_0000325.npy new file mode 100644 index 0000000..f7f00eb --- /dev/null +++ b/margin_logs/step_0000325.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa0066c20d2f3177e65fee554c5720e77ac30e43abf548aae3031e697e8c7b37 +size 384 diff --git a/margin_logs/step_0000326.npy b/margin_logs/step_0000326.npy new file mode 100644 index 0000000..c3f6f00 --- /dev/null +++ b/margin_logs/step_0000326.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:365251f1b162151fe33437f874764b4c120037fc660f78aaef9065cd3b02f453 +size 384 diff --git a/margin_logs/step_0000327.npy b/margin_logs/step_0000327.npy new file mode 100644 index 0000000..d7752f6 --- /dev/null +++ b/margin_logs/step_0000327.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:231a5e33adfc5ab95e17ccfbf89b6743c39bb7216f49c48e8fe78c1043ccb68c +size 384 diff --git a/margin_logs/step_0000328.npy b/margin_logs/step_0000328.npy new file mode 100644 index 0000000..1cc8b90 --- /dev/null +++ b/margin_logs/step_0000328.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c56b4b356c47470009ce98b93a06e1998b307df8f239f5cb2444ae59f70ada1d +size 384 diff --git a/margin_logs/step_0000329.npy b/margin_logs/step_0000329.npy new file mode 100644 index 0000000..2155b3d --- /dev/null +++ b/margin_logs/step_0000329.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99a8cbd77a3f5c2e385f4eca6d5fb595b3df08f611012c4f0efe4b6998e819b7 +size 384 diff --git a/margin_logs/step_0000330.npy b/margin_logs/step_0000330.npy new file mode 100644 index 0000000..e587291 --- /dev/null +++ b/margin_logs/step_0000330.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fe18d15dae636ea5871b747cfab362dc6eb4597fca83536275d349e1913d786 +size 384 diff --git a/margin_logs/step_0000331.npy b/margin_logs/step_0000331.npy new file mode 100644 index 0000000..5dc975b --- /dev/null +++ b/margin_logs/step_0000331.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b7b1ef35d67d8168cc7e6459f98158b68ac4e372e7db935bb38c8aeee955db5 +size 384 diff --git a/margin_logs/step_0000332.npy b/margin_logs/step_0000332.npy new file mode 100644 index 0000000..ebf3e35 --- /dev/null +++ b/margin_logs/step_0000332.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7451439592a9e1307225191e3e3ca6c8975660b96b5b96a618e66c138278a41d +size 384 diff --git a/margin_logs/step_0000333.npy b/margin_logs/step_0000333.npy new file mode 100644 index 0000000..7025b46 --- /dev/null +++ b/margin_logs/step_0000333.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d7d0458e52eda8049972fd9fd33520f91c8441dcaeaa31aef0a5a2bd229d8cb +size 384 diff --git a/margin_logs/step_0000334.npy b/margin_logs/step_0000334.npy new file mode 100644 index 0000000..08742c0 --- /dev/null +++ b/margin_logs/step_0000334.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b62edb0321f7ca1b8a9c2ab8452aeeb0c2880789d72c73fa6175f8f5266666e2 +size 384 diff --git a/margin_logs/step_0000335.npy b/margin_logs/step_0000335.npy new file mode 100644 index 0000000..30239e6 --- /dev/null +++ b/margin_logs/step_0000335.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd5a6c43b94f5cf4d17fd158a0ebaed12a15548f34d93057de133c9c8db6a78a +size 384 diff --git a/margin_logs/step_0000336.npy b/margin_logs/step_0000336.npy new file mode 100644 index 0000000..ea339bb --- /dev/null +++ b/margin_logs/step_0000336.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a095516ac6caad1aaa2d39daf6df91fa18cc35889b250677a7f44e7d0ccfb66 +size 384 diff --git a/margin_logs/step_0000337.npy b/margin_logs/step_0000337.npy new file mode 100644 index 0000000..bd22314 --- /dev/null +++ b/margin_logs/step_0000337.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db10781baf8ab41395829da6a1f2cef89788823a638ee0da411ace580e16259e +size 384 diff --git a/margin_logs/step_0000338.npy b/margin_logs/step_0000338.npy new file mode 100644 index 0000000..51196f3 --- /dev/null +++ b/margin_logs/step_0000338.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55da6ae717d68bb516175383b3df605b47a3e714d99c51c72ea81b149b2226f0 +size 384 diff --git a/margin_logs/step_0000339.npy b/margin_logs/step_0000339.npy new file mode 100644 index 0000000..210d832 --- /dev/null +++ b/margin_logs/step_0000339.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d2fa2d620fbc266e8cbdda4e22a5f2803d1371e0c804e473033a2c7be404ee3 +size 384 diff --git a/margin_logs/step_0000340.npy b/margin_logs/step_0000340.npy new file mode 100644 index 0000000..49fba03 --- /dev/null +++ b/margin_logs/step_0000340.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca8caf6037faa9a222728747e45ffc89796c4923a0f95021c01fcdde5ad67cc5 +size 384 diff --git a/margin_logs/step_0000341.npy b/margin_logs/step_0000341.npy new file mode 100644 index 0000000..28b32a4 --- /dev/null +++ b/margin_logs/step_0000341.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af741bd83269a37d91e369c2aaac1d0f83732ba8037a535c455e3f3e4d8318e4 +size 384 diff --git a/margin_logs/step_0000342.npy b/margin_logs/step_0000342.npy new file mode 100644 index 0000000..d4ac046 --- /dev/null +++ b/margin_logs/step_0000342.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a62538393169bfe8e58aa852ce74d817006d3678168a1dda7cac246ab641207 +size 384 diff --git a/margin_logs/step_0000343.npy b/margin_logs/step_0000343.npy new file mode 100644 index 0000000..1f689c8 --- /dev/null +++ b/margin_logs/step_0000343.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fef009e9c65bab2fb953bdb6940c25fefceff4dfd3e1ee332fde2479a9e657e0 +size 384 diff --git a/margin_logs/step_0000344.npy b/margin_logs/step_0000344.npy new file mode 100644 index 0000000..b646e9b --- /dev/null +++ b/margin_logs/step_0000344.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:662c9164da6d3829e58b34056dd260b2df8fc390a3091509780f9ca119a8a748 +size 384 diff --git a/margin_logs/step_0000345.npy b/margin_logs/step_0000345.npy new file mode 100644 index 0000000..3e91984 --- /dev/null +++ b/margin_logs/step_0000345.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:021a98a8db5fd21a34146b7a0b00adf02eba0d480deff10e0b8c13c0613add12 +size 384 diff --git a/margin_logs/step_0000346.npy b/margin_logs/step_0000346.npy new file mode 100644 index 0000000..ad106f0 --- /dev/null +++ b/margin_logs/step_0000346.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5b650d18d5088c5fd69437a9a0df94989994bc2a0b146164f00c7233af3897d +size 384 diff --git a/margin_logs/step_0000347.npy b/margin_logs/step_0000347.npy new file mode 100644 index 0000000..c485041 --- /dev/null +++ b/margin_logs/step_0000347.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8f863491deb28efaa1b1f48c02221538e0483557d02d0320d49611ac2cf0d36 +size 384 diff --git a/margin_logs/step_0000348.npy b/margin_logs/step_0000348.npy new file mode 100644 index 0000000..bba9e8e --- /dev/null +++ b/margin_logs/step_0000348.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddb1a1417ed96e4c9ec83d429ded28780e3a13f407958d25e3dd5fc4d4f707a6 +size 384 diff --git a/margin_logs/step_0000349.npy b/margin_logs/step_0000349.npy new file mode 100644 index 0000000..748fc92 --- /dev/null +++ b/margin_logs/step_0000349.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:610b9a8fe608350e39725dee08dae692e890d8939e236cb84c0965b31086b7fd +size 384 diff --git a/margin_logs/step_0000350.npy b/margin_logs/step_0000350.npy new file mode 100644 index 0000000..15752c3 --- /dev/null +++ b/margin_logs/step_0000350.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d596ec593d6204824c92f0f220723c7ed48c29bc8ba4f98c7a4f048b0bc28730 +size 384 diff --git a/margin_logs/step_0000351.npy b/margin_logs/step_0000351.npy new file mode 100644 index 0000000..5723088 --- /dev/null +++ b/margin_logs/step_0000351.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64d9f359c8e4b260ec5394eeb13c193519baf715981f033116542a3a0cf15c30 +size 384 diff --git a/margin_logs/step_0000352.npy b/margin_logs/step_0000352.npy new file mode 100644 index 0000000..91fa40c --- /dev/null +++ b/margin_logs/step_0000352.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d3145008f9ee70d30a6251b25c0884bcf84cef0b61be5992e3e454ab1028e28 +size 384 diff --git a/margin_logs/step_0000353.npy b/margin_logs/step_0000353.npy new file mode 100644 index 0000000..0a8ab10 --- /dev/null +++ b/margin_logs/step_0000353.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59dfcd4df247882039f2f392eb514bffae4bed6982ab6c13d2dbd90990f7f157 +size 384 diff --git a/margin_logs/step_0000354.npy b/margin_logs/step_0000354.npy new file mode 100644 index 0000000..56f2b62 --- /dev/null +++ b/margin_logs/step_0000354.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b853dcc90668e7182d70551b05689a79d5688271936e44be1d30a4a6153a2ba3 +size 384 diff --git a/margin_logs/step_0000355.npy b/margin_logs/step_0000355.npy new file mode 100644 index 0000000..6047ad0 --- /dev/null +++ b/margin_logs/step_0000355.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edfd0c89511e257bc7315bbc98695a4811f34cf2774a8ec8f5e3b2a7d3dd6d4f +size 384 diff --git a/margin_logs/step_0000356.npy b/margin_logs/step_0000356.npy new file mode 100644 index 0000000..a7c4a50 --- /dev/null +++ b/margin_logs/step_0000356.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d14e17b54ac2f6f30cd5d4a167cdd513c3352aa0fc2e197ed63c16f970e7bf71 +size 384 diff --git a/margin_logs/step_0000357.npy b/margin_logs/step_0000357.npy new file mode 100644 index 0000000..1052bd4 --- /dev/null +++ b/margin_logs/step_0000357.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:192dced7ae9a385f4f0fa46994329ae756933577e2500c117229429e67e5dac7 +size 384 diff --git a/margin_logs/step_0000358.npy b/margin_logs/step_0000358.npy new file mode 100644 index 0000000..d07c943 --- /dev/null +++ b/margin_logs/step_0000358.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d5cd4f868833f4dc3aea66a60965cd81fc95bde35419ca63e20d179ee91ca1d +size 384 diff --git a/margin_logs/step_0000359.npy b/margin_logs/step_0000359.npy new file mode 100644 index 0000000..bfa9170 --- /dev/null +++ b/margin_logs/step_0000359.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15ecf683f5239eb0278b531662512336d792cad9f750788de9c7a4268a64249a +size 384 diff --git a/margin_logs/step_0000360.npy b/margin_logs/step_0000360.npy new file mode 100644 index 0000000..783ab11 --- /dev/null +++ b/margin_logs/step_0000360.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb2d017ac32343883745316810c250111b62dfe82e7c53a4e7fc0655a22dfa86 +size 384 diff --git a/margin_logs/step_0000361.npy b/margin_logs/step_0000361.npy new file mode 100644 index 0000000..c82c5da --- /dev/null +++ b/margin_logs/step_0000361.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04639d025b6e59ae857c54dcc4cb532eadba7063a8408ef2e90a746fd5b9a7f5 +size 384 diff --git a/margin_logs/step_0000362.npy b/margin_logs/step_0000362.npy new file mode 100644 index 0000000..7700149 --- /dev/null +++ b/margin_logs/step_0000362.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b5a155c21383581435f846b1e5fd65ebff408c6d8ab9dd6eaaff4f565ca500f +size 384 diff --git a/margin_logs/step_0000363.npy b/margin_logs/step_0000363.npy new file mode 100644 index 0000000..8c2ced2 --- /dev/null +++ b/margin_logs/step_0000363.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a0b0d75db329011ed78b50cb3ebf1f523e79c151a3c9231137b46740c7b37cb +size 384 diff --git a/margin_logs/step_0000364.npy b/margin_logs/step_0000364.npy new file mode 100644 index 0000000..b9d93e2 --- /dev/null +++ b/margin_logs/step_0000364.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1626ac15c3bd5f6ac345f28c1282b5768b6bafac77475362bdba5f8d12a8329d +size 384 diff --git a/margin_logs/step_0000365.npy b/margin_logs/step_0000365.npy new file mode 100644 index 0000000..7a3ab1f --- /dev/null +++ b/margin_logs/step_0000365.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:662107613436769afd0c5a4f7a47d0fe29eba86ef3544be48e6babc50d0bb8e3 +size 384 diff --git a/margin_logs/step_0000366.npy b/margin_logs/step_0000366.npy new file mode 100644 index 0000000..67b0d08 --- /dev/null +++ b/margin_logs/step_0000366.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c630f871a63cdf2dbd99825590907d8e9bd0489a1fbd6a8c8b8ed9f0ce38f668 +size 384 diff --git a/margin_logs/step_0000367.npy b/margin_logs/step_0000367.npy new file mode 100644 index 0000000..232dbaf --- /dev/null +++ b/margin_logs/step_0000367.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d881f3fa5a466c15d2db3ac07edfe28f324a843bc905e58decc33623869d0df +size 384 diff --git a/margin_logs/step_0000368.npy b/margin_logs/step_0000368.npy new file mode 100644 index 0000000..9443d75 --- /dev/null +++ b/margin_logs/step_0000368.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ede627d0ca31bffc654070a6d794bbc60ee6c680e4bf51f1f8a95edc28f49b7d +size 384 diff --git a/margin_logs/step_0000369.npy b/margin_logs/step_0000369.npy new file mode 100644 index 0000000..ee20dc0 --- /dev/null +++ b/margin_logs/step_0000369.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1b08f5764dc9a6a3f8414f906e7ca2e1c5362065b8fb052ca1aa08462098578 +size 384 diff --git a/margin_logs/step_0000370.npy b/margin_logs/step_0000370.npy new file mode 100644 index 0000000..b2c3329 --- /dev/null +++ b/margin_logs/step_0000370.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b26474e82c2d9c286dfa62781508c6a750f2fd32eb74b7951ff3b640eefc3f2c +size 384 diff --git a/margin_logs/step_0000371.npy b/margin_logs/step_0000371.npy new file mode 100644 index 0000000..d1eef1b --- /dev/null +++ b/margin_logs/step_0000371.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2065fe49549473135d1c37d595a66a73107734491f586adc91678b3f81421c58 +size 384 diff --git a/margin_logs/step_0000372.npy b/margin_logs/step_0000372.npy new file mode 100644 index 0000000..2a98992 --- /dev/null +++ b/margin_logs/step_0000372.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c0169e4e6dc3affd3d7b64ebf5716c18cad2be14f52262c1f618c2a840ab532 +size 384 diff --git a/margin_logs/step_0000373.npy b/margin_logs/step_0000373.npy new file mode 100644 index 0000000..3ba9fed --- /dev/null +++ b/margin_logs/step_0000373.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f47820da20e391ce128db0b68d82e90622d63051266511c2ca5263fa2d1a3c0 +size 384 diff --git a/margin_logs/step_0000374.npy b/margin_logs/step_0000374.npy new file mode 100644 index 0000000..72a7c5a --- /dev/null +++ b/margin_logs/step_0000374.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4c441d8e9c76f75df2c1135507252187ba522afe9146d687a4a1f9216b7d56e +size 384 diff --git a/margin_logs/step_0000375.npy b/margin_logs/step_0000375.npy new file mode 100644 index 0000000..0b5de22 --- /dev/null +++ b/margin_logs/step_0000375.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:406ce2e4e81d3d51f0ed928c41982af2db8964c97081b56d26c64cbc6d4b5498 +size 384 diff --git a/margin_logs/step_0000376.npy b/margin_logs/step_0000376.npy new file mode 100644 index 0000000..510b03e --- /dev/null +++ b/margin_logs/step_0000376.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a10979d52e4a03aa1b94f0e4e08f0c888307b67172bbf978c9244ddb450a5860 +size 384 diff --git a/margin_logs/step_0000377.npy b/margin_logs/step_0000377.npy new file mode 100644 index 0000000..cb7b6cc --- /dev/null +++ b/margin_logs/step_0000377.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1430425d005b06d323d8fe96298a67e5e0449a663dd65acbed96df7a7114e99 +size 384 diff --git a/margin_logs/step_0000378.npy b/margin_logs/step_0000378.npy new file mode 100644 index 0000000..1891592 --- /dev/null +++ b/margin_logs/step_0000378.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df1c47933900757ac7fd1857d2a96a07e2b03da3d84470ea1aa18ebb7e76498f +size 384 diff --git a/margin_logs/step_0000379.npy b/margin_logs/step_0000379.npy new file mode 100644 index 0000000..cae3cd4 --- /dev/null +++ b/margin_logs/step_0000379.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:320eb4376b173dc9bfd7b27f22b59e918b0686c219ded261cfcec2053496f249 +size 384 diff --git a/margin_logs/step_0000380.npy b/margin_logs/step_0000380.npy new file mode 100644 index 0000000..d6954b1 --- /dev/null +++ b/margin_logs/step_0000380.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eac5795037636454eae48614f1f6f62e19573f1368a6cbf5a781b91c48ebabe6 +size 384 diff --git a/margin_logs/step_0000381.npy b/margin_logs/step_0000381.npy new file mode 100644 index 0000000..ced0fe4 --- /dev/null +++ b/margin_logs/step_0000381.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90638fe88441a7617f9a789ead45125ae7bd08e45c008cdf6f877f02ce3154e5 +size 384 diff --git a/margin_logs/step_0000382.npy b/margin_logs/step_0000382.npy new file mode 100644 index 0000000..646ee96 --- /dev/null +++ b/margin_logs/step_0000382.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34334d9f84cb57d954b4c07d7bf3bec83288c759033ffbaf4e27f6450a642284 +size 384 diff --git a/margin_logs/step_0000383.npy b/margin_logs/step_0000383.npy new file mode 100644 index 0000000..9e9ecb2 --- /dev/null +++ b/margin_logs/step_0000383.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f512453e5b202f2db95a957816243a7e77544ffdd78a1c220e1a94e233cc2692 +size 384 diff --git a/margin_logs/step_0000384.npy b/margin_logs/step_0000384.npy new file mode 100644 index 0000000..00a2349 --- /dev/null +++ b/margin_logs/step_0000384.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:374a01c0762ad1c488500c2fe4766f3e221c49a80b672f756a79398b477a981b +size 384 diff --git a/margin_logs/step_0000385.npy b/margin_logs/step_0000385.npy new file mode 100644 index 0000000..0bb0e50 --- /dev/null +++ b/margin_logs/step_0000385.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d39b15c749610b84b507ebdb7abe0f71834ce4f518b85ba0112d1ff6b831cdf3 +size 384 diff --git a/margin_logs/step_0000386.npy b/margin_logs/step_0000386.npy new file mode 100644 index 0000000..da6334c --- /dev/null +++ b/margin_logs/step_0000386.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76d304c77635bc033f335eef23cb44525a2ce17bf371430b5f37c792e782e803 +size 384 diff --git a/margin_logs/step_0000387.npy b/margin_logs/step_0000387.npy new file mode 100644 index 0000000..8771dc4 --- /dev/null +++ b/margin_logs/step_0000387.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4a368cec3e4a05e09f6cc18c14bca8001e4dd2c2d4d90f3f6559cf459620ad2 +size 384 diff --git a/margin_logs/step_0000388.npy b/margin_logs/step_0000388.npy new file mode 100644 index 0000000..8648476 --- /dev/null +++ b/margin_logs/step_0000388.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c9d86182c52a062ba082cc3f21d146c9687105d82d2980b2063b600ea1004d0 +size 384 diff --git a/margin_logs/step_0000389.npy b/margin_logs/step_0000389.npy new file mode 100644 index 0000000..e120196 --- /dev/null +++ b/margin_logs/step_0000389.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3866c7493c1130ef19b27151fa4057eae80602df2d90c7f80931e5895627bc12 +size 384 diff --git a/margin_logs/step_0000390.npy b/margin_logs/step_0000390.npy new file mode 100644 index 0000000..bb28010 --- /dev/null +++ b/margin_logs/step_0000390.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf06eb427b575a3d35155f148bcb735244f16dd5cfd116564d5569549c9aa7ce +size 384 diff --git a/margin_logs/step_0000391.npy b/margin_logs/step_0000391.npy new file mode 100644 index 0000000..3f415aa --- /dev/null +++ b/margin_logs/step_0000391.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95e20210e88be8ba2b68d2e0692b4ec0e3714dba1bbdbe1a68218f420459b2ee +size 384 diff --git a/margin_logs/step_0000392.npy b/margin_logs/step_0000392.npy new file mode 100644 index 0000000..e5dc8d4 --- /dev/null +++ b/margin_logs/step_0000392.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d497366fff19e9d982771b9cdb128cfd4fca0af59e4a6a9a3be9b9ffa425a7e5 +size 384 diff --git a/margin_logs/step_0000393.npy b/margin_logs/step_0000393.npy new file mode 100644 index 0000000..7b2b798 --- /dev/null +++ b/margin_logs/step_0000393.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67ca60e1cb0a49d13180c9ae031a835547141c7c3f8ae526437338334f1d418d +size 384 diff --git a/margin_logs/step_0000394.npy b/margin_logs/step_0000394.npy new file mode 100644 index 0000000..09fd52f --- /dev/null +++ b/margin_logs/step_0000394.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0012a2816ac332c87efc489ab70d29ff775cf22385a097bff8807894d104ccaf +size 384 diff --git a/margin_logs/step_0000395.npy b/margin_logs/step_0000395.npy new file mode 100644 index 0000000..e5e4e86 --- /dev/null +++ b/margin_logs/step_0000395.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e4d59d69df412d7fa228e6750a484ca422040f08f8f3cc158b2d46d03ca2550 +size 384 diff --git a/margin_logs/step_0000396.npy b/margin_logs/step_0000396.npy new file mode 100644 index 0000000..67e482f --- /dev/null +++ b/margin_logs/step_0000396.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aad238703d1fafb7ccaa58d65012734002bc0b5f3ec36e352e1ec39d77cdc0cd +size 384 diff --git a/margin_logs/step_0000397.npy b/margin_logs/step_0000397.npy new file mode 100644 index 0000000..d490138 --- /dev/null +++ b/margin_logs/step_0000397.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa890d68ec1acc2ad675f48b99bb887389385933bc0af2c77a9a69b08f88d902 +size 384 diff --git a/margin_logs/step_0000398.npy b/margin_logs/step_0000398.npy new file mode 100644 index 0000000..ce37527 --- /dev/null +++ b/margin_logs/step_0000398.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02ba0d5339d9a629ea5f5174092b8703b31da31466bc49e077886e4391617eb6 +size 384 diff --git a/margin_logs/step_0000399.npy b/margin_logs/step_0000399.npy new file mode 100644 index 0000000..654cf91 --- /dev/null +++ b/margin_logs/step_0000399.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2025262f264ce0f31f8be0551bcec9f94b9512cfd2b2d0de9c3b7f42a1011d51 +size 384 diff --git a/margin_logs/step_0000400.npy b/margin_logs/step_0000400.npy new file mode 100644 index 0000000..1286872 --- /dev/null +++ b/margin_logs/step_0000400.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2f58083694834cfd25a8bf5826619aec54834ecba48c2909eadf463ce35cc91 +size 384 diff --git a/margin_logs/step_0000401.npy b/margin_logs/step_0000401.npy new file mode 100644 index 0000000..c7b05f2 --- /dev/null +++ b/margin_logs/step_0000401.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19c55d8a1d569b7e320a7da7b710fe6ebfbd7e655f8fb5eba6c5bf352058e43b +size 384 diff --git a/margin_logs/step_0000402.npy b/margin_logs/step_0000402.npy new file mode 100644 index 0000000..fc6369f --- /dev/null +++ b/margin_logs/step_0000402.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:295ea35849857af86d997562a9414c7a8bb0a6c3f543a7747ce20abb76b47a18 +size 384 diff --git a/margin_logs/step_0000403.npy b/margin_logs/step_0000403.npy new file mode 100644 index 0000000..403b3d6 --- /dev/null +++ b/margin_logs/step_0000403.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce30a6e03862cd181a83098e86b60fd15deb6761dfd169127a6fbbfeae0dd87c +size 384 diff --git a/margin_logs/step_0000404.npy b/margin_logs/step_0000404.npy new file mode 100644 index 0000000..31d1206 --- /dev/null +++ b/margin_logs/step_0000404.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2809452668cc4ba1036cb3fa6cde3da20bb7b215c2fe1b1bf56bbf8817e2e1b8 +size 384 diff --git a/margin_logs/step_0000405.npy b/margin_logs/step_0000405.npy new file mode 100644 index 0000000..156a8a7 --- /dev/null +++ b/margin_logs/step_0000405.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fde9d3e2377213a5021d65a1cfefb0dd9715d0a96bae954f660b261b95782681 +size 384 diff --git a/margin_logs/step_0000406.npy b/margin_logs/step_0000406.npy new file mode 100644 index 0000000..afacac8 --- /dev/null +++ b/margin_logs/step_0000406.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b56362c25da42ed332a7260b3ae71e785a41e35208f2b18ebf460f5feeb49d8 +size 384 diff --git a/margin_logs/step_0000407.npy b/margin_logs/step_0000407.npy new file mode 100644 index 0000000..cfb5273 --- /dev/null +++ b/margin_logs/step_0000407.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11d39586e53a974b7085c5a84fd7065de4062f6d4d9c6aade3fbd74697cfe2be +size 384 diff --git a/margin_logs/step_0000408.npy b/margin_logs/step_0000408.npy new file mode 100644 index 0000000..3c6a753 --- /dev/null +++ b/margin_logs/step_0000408.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b825d006bd8d371a23aec1451c13993d340c94b80698e96dc68c3217490c2d8e +size 384 diff --git a/margin_logs/step_0000409.npy b/margin_logs/step_0000409.npy new file mode 100644 index 0000000..14e1a00 --- /dev/null +++ b/margin_logs/step_0000409.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb871e3959f820fefd91e965284ed538c254f316a7ca0609a4ebbead0b4820b0 +size 384 diff --git a/margin_logs/step_0000410.npy b/margin_logs/step_0000410.npy new file mode 100644 index 0000000..3097212 --- /dev/null +++ b/margin_logs/step_0000410.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1820cdf0022266347147a6ed73ebd4a24e1c9f8025a94fde14b5850ba6632aa +size 384 diff --git a/margin_logs/step_0000411.npy b/margin_logs/step_0000411.npy new file mode 100644 index 0000000..9325cb3 --- /dev/null +++ b/margin_logs/step_0000411.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1438d80e7c1fdc982f17ca0bdaa0c0873170c087e586f6d69734f959c0ae49fd +size 384 diff --git a/margin_logs/step_0000412.npy b/margin_logs/step_0000412.npy new file mode 100644 index 0000000..87d3416 --- /dev/null +++ b/margin_logs/step_0000412.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4817131363654d7d54d4f7487c4a38abf1ff756724ad5259bca7c31eb201e8c5 +size 384 diff --git a/margin_logs/step_0000413.npy b/margin_logs/step_0000413.npy new file mode 100644 index 0000000..53e907d --- /dev/null +++ b/margin_logs/step_0000413.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41d3f9ebf3636b0812bf7f460c8288dcca4afd19540d0faac05af5ea08cfcc5b +size 384 diff --git a/margin_logs/step_0000414.npy b/margin_logs/step_0000414.npy new file mode 100644 index 0000000..5a44b47 --- /dev/null +++ b/margin_logs/step_0000414.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee837cf41f2d7b5b63b1e7d632643c87e65f57196bfbc4212192563552dd972d +size 384 diff --git a/margin_logs/step_0000415.npy b/margin_logs/step_0000415.npy new file mode 100644 index 0000000..7ac4d9f --- /dev/null +++ b/margin_logs/step_0000415.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e3b9dcdafabcbe514c047e483598fdb32531e1187acb7c2d587b2dc9a5afcc2 +size 384 diff --git a/margin_logs/step_0000416.npy b/margin_logs/step_0000416.npy new file mode 100644 index 0000000..ae2ff93 --- /dev/null +++ b/margin_logs/step_0000416.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24cc5ad8c1204e8bb325b71a5c6490f60d1cc6aad0526ec19315876a4c161599 +size 384 diff --git a/margin_logs/step_0000417.npy b/margin_logs/step_0000417.npy new file mode 100644 index 0000000..b2113fb --- /dev/null +++ b/margin_logs/step_0000417.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:451f03da08575f158071cb0ab2b4fe147abf6783f31ccf6dbff34ea36bfb850d +size 384 diff --git a/margin_logs/step_0000418.npy b/margin_logs/step_0000418.npy new file mode 100644 index 0000000..e233e2d --- /dev/null +++ b/margin_logs/step_0000418.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f522b954e29498e8dc0bd00f093ed42e7d6ec75850b2914066cf4f4799712ac9 +size 384 diff --git a/margin_logs/step_0000419.npy b/margin_logs/step_0000419.npy new file mode 100644 index 0000000..e84df1d --- /dev/null +++ b/margin_logs/step_0000419.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86cdad4dc8c6c90a56dc796deffd9cb3ade72c8f7a27e4b1459c55f4cd2c93c0 +size 384 diff --git a/margin_logs/step_0000420.npy b/margin_logs/step_0000420.npy new file mode 100644 index 0000000..461f20a --- /dev/null +++ b/margin_logs/step_0000420.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c671e435437782d3541258a7f6ba377d1043dc721f708ba1f7ea5f1b9e5245ab +size 384 diff --git a/margin_logs/step_0000421.npy b/margin_logs/step_0000421.npy new file mode 100644 index 0000000..822ab08 --- /dev/null +++ b/margin_logs/step_0000421.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e42f1fbe1856f980e9ba2dac469d7bfd0ffcdf1d7fefac91e3332ffe70cf1564 +size 384 diff --git a/margin_logs/step_0000422.npy b/margin_logs/step_0000422.npy new file mode 100644 index 0000000..ccddf3a --- /dev/null +++ b/margin_logs/step_0000422.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:058b8ed7e52fd035c71cde7c47bd86fde6f2648ab813c96dfd1a6c8df9a3ccb8 +size 384 diff --git a/margin_logs/step_0000423.npy b/margin_logs/step_0000423.npy new file mode 100644 index 0000000..b1607b9 --- /dev/null +++ b/margin_logs/step_0000423.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86d23056b1a7f37cd9fad901fa915dcdd9b188bef7ecde3ecb12934b4142d12d +size 384 diff --git a/margin_logs/step_0000424.npy b/margin_logs/step_0000424.npy new file mode 100644 index 0000000..d320820 --- /dev/null +++ b/margin_logs/step_0000424.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26b1e335ce757bab543f253cd4aaaca55408be0e7a36775e6847b260b80dbc5a +size 384 diff --git a/margin_logs/step_0000425.npy b/margin_logs/step_0000425.npy new file mode 100644 index 0000000..8d21ecc --- /dev/null +++ b/margin_logs/step_0000425.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e1c89b361e45ab291edab18cc37af27325af7c65f576087f80fffe330ceb375 +size 384 diff --git a/margin_logs/step_0000426.npy b/margin_logs/step_0000426.npy new file mode 100644 index 0000000..d7709eb --- /dev/null +++ b/margin_logs/step_0000426.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d3908655457f234a2a8770e15ad3f0eea5389671b2fb4a21e524634a822cc2f +size 384 diff --git a/margin_logs/step_0000427.npy b/margin_logs/step_0000427.npy new file mode 100644 index 0000000..5f73a72 --- /dev/null +++ b/margin_logs/step_0000427.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dfb0b31c65af06cd2e86fa8136db9d687544c166cc1d6fea471a110428ed81c +size 384 diff --git a/margin_logs/step_0000428.npy b/margin_logs/step_0000428.npy new file mode 100644 index 0000000..8c6e757 --- /dev/null +++ b/margin_logs/step_0000428.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14becaad60085bb09c7a08ed01830f0d718152ab8424fc892559735f19c4e95d +size 384 diff --git a/margin_logs/step_0000429.npy b/margin_logs/step_0000429.npy new file mode 100644 index 0000000..3099cc3 --- /dev/null +++ b/margin_logs/step_0000429.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10e7f5f10676d4f8802260ca8d6e767f2e748ee7a5c3c043a3271fc625d0bbf7 +size 384 diff --git a/margin_logs/step_0000430.npy b/margin_logs/step_0000430.npy new file mode 100644 index 0000000..5c12809 --- /dev/null +++ b/margin_logs/step_0000430.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8137e9d643843bd146567202ac148996f6c35cd67a71d22a7d740b003b495d4d +size 384 diff --git a/margin_logs/step_0000431.npy b/margin_logs/step_0000431.npy new file mode 100644 index 0000000..a0f4cf2 --- /dev/null +++ b/margin_logs/step_0000431.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b520a27a436220bfdba840398ef18bd9cde55c9cfdbcd99e003a921ce7220390 +size 384 diff --git a/margin_logs/step_0000432.npy b/margin_logs/step_0000432.npy new file mode 100644 index 0000000..258a494 --- /dev/null +++ b/margin_logs/step_0000432.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e8dae98fa389f1973e3fac5da01c231d017713507c9c6d7b224f6b2d20c06d +size 384 diff --git a/margin_logs/step_0000433.npy b/margin_logs/step_0000433.npy new file mode 100644 index 0000000..17a3acf --- /dev/null +++ b/margin_logs/step_0000433.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55f56573f285561e2a48e6ee2529472b5f67c9eac747700c892b30d17006ec98 +size 384 diff --git a/margin_logs/step_0000434.npy b/margin_logs/step_0000434.npy new file mode 100644 index 0000000..50b127a --- /dev/null +++ b/margin_logs/step_0000434.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26a7c70e5e3551a2d6c8d1e399f29a91b68b1a57c601c713624427ee26f7216b +size 384 diff --git a/margin_logs/step_0000435.npy b/margin_logs/step_0000435.npy new file mode 100644 index 0000000..cbf327d --- /dev/null +++ b/margin_logs/step_0000435.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f6012dd2e16bb492c61922804b3ec99ab4c4daea3f0c6bf4f7895efdb4e682f +size 384 diff --git a/margin_logs/step_0000436.npy b/margin_logs/step_0000436.npy new file mode 100644 index 0000000..816f81f --- /dev/null +++ b/margin_logs/step_0000436.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fde382ff29a315242f5f3c72fde84ae0048cfc2c68b8d91116360d6b651adac +size 384 diff --git a/margin_logs/step_0000437.npy b/margin_logs/step_0000437.npy new file mode 100644 index 0000000..af69811 --- /dev/null +++ b/margin_logs/step_0000437.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d555ae4612c82ce9e23a78fc45088f311ae145de0bae59235683fa8c00621424 +size 384 diff --git a/margin_logs/step_0000438.npy b/margin_logs/step_0000438.npy new file mode 100644 index 0000000..778415f --- /dev/null +++ b/margin_logs/step_0000438.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3015cdc677a089aa19562eda3baa7fbdcc99d7a58d290c2906243ee34982247f +size 384 diff --git a/margin_logs/step_0000439.npy b/margin_logs/step_0000439.npy new file mode 100644 index 0000000..419f91d --- /dev/null +++ b/margin_logs/step_0000439.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68e08395ed2512930e4b6cd2b8b183ba916d2994c6e399dd14fb3882fbad194f +size 384 diff --git a/margin_logs/step_0000440.npy b/margin_logs/step_0000440.npy new file mode 100644 index 0000000..3e2e5b4 --- /dev/null +++ b/margin_logs/step_0000440.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3c231551a3acc5a783e56f8be76dad9c5d8e3ee416cb39a38e291076e1b79de +size 384 diff --git a/margin_logs/step_0000441.npy b/margin_logs/step_0000441.npy new file mode 100644 index 0000000..8cff887 --- /dev/null +++ b/margin_logs/step_0000441.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:795674bc1c6670e52e377343fe298dd77294ead3b12feadf6e40104126298c28 +size 384 diff --git a/margin_logs/step_0000442.npy b/margin_logs/step_0000442.npy new file mode 100644 index 0000000..f026c0b --- /dev/null +++ b/margin_logs/step_0000442.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a264436fb2d3fd390c4900bbebfca83b80dd784663244e7a682ed9293531d372 +size 384 diff --git a/margin_logs/step_0000443.npy b/margin_logs/step_0000443.npy new file mode 100644 index 0000000..4c40f87 --- /dev/null +++ b/margin_logs/step_0000443.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:393a14765f832e9b79cae26920ab185c33905f7116f9b83e65cbc5bf81044e11 +size 384 diff --git a/margin_logs/step_0000444.npy b/margin_logs/step_0000444.npy new file mode 100644 index 0000000..c330926 --- /dev/null +++ b/margin_logs/step_0000444.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:436a546e264040529fc6aa57ee30c540e73a664422196ced78799b19fa0ed9a3 +size 384 diff --git a/margin_logs/step_0000445.npy b/margin_logs/step_0000445.npy new file mode 100644 index 0000000..a62803f --- /dev/null +++ b/margin_logs/step_0000445.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:817a9a9fe0b580cfbf7764cd5ee9840c75043c101a0c5ed58b3abb8c5212e3ac +size 384 diff --git a/margin_logs/step_0000446.npy b/margin_logs/step_0000446.npy new file mode 100644 index 0000000..bcc4561 --- /dev/null +++ b/margin_logs/step_0000446.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7909151949724533ff7db09c54515e8eb1ad749b347f6c7b38e62b294a68626a +size 384 diff --git a/margin_logs/step_0000447.npy b/margin_logs/step_0000447.npy new file mode 100644 index 0000000..ea1ecf2 --- /dev/null +++ b/margin_logs/step_0000447.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e28fddc6ec302c33eab59926956b72cfff8ebf177df5395ad3c4ef3f11ab0c7 +size 384 diff --git a/margin_logs/step_0000448.npy b/margin_logs/step_0000448.npy new file mode 100644 index 0000000..dcafbc7 --- /dev/null +++ b/margin_logs/step_0000448.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b405246045b1eebd473c8495cc5ce69c8f0048946eaabba1f4e2a868b346fd77 +size 384 diff --git a/margin_logs/step_0000449.npy b/margin_logs/step_0000449.npy new file mode 100644 index 0000000..0751d94 --- /dev/null +++ b/margin_logs/step_0000449.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8f43301380befec888276b191cfd05f7f8d64581f64df1df6a8f65c31d7a5df +size 384 diff --git a/margin_logs/step_0000450.npy b/margin_logs/step_0000450.npy new file mode 100644 index 0000000..2cfa377 --- /dev/null +++ b/margin_logs/step_0000450.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8c0221a4e40d072894c7f469fd5393f8aa31d65fbc75f8093625853dc8028c2 +size 384 diff --git a/margin_logs/step_0000451.npy b/margin_logs/step_0000451.npy new file mode 100644 index 0000000..b13e5c4 --- /dev/null +++ b/margin_logs/step_0000451.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0848eb6ccafd62f439de226cf8265be9aa4f248c6dec08e0ccef79f525de5198 +size 384 diff --git a/margin_logs/step_0000452.npy b/margin_logs/step_0000452.npy new file mode 100644 index 0000000..6615e6f --- /dev/null +++ b/margin_logs/step_0000452.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbadd38bf9d921f4ad9ee142078451e2b372cc4c85445a7320d981a1ad34a6fc +size 384 diff --git a/margin_logs/step_0000453.npy b/margin_logs/step_0000453.npy new file mode 100644 index 0000000..b1f48d9 --- /dev/null +++ b/margin_logs/step_0000453.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c6bbc2b0de7205e74e6d3839f8793ac3f8316e6891d6629563f3f632b59fb9a +size 384 diff --git a/margin_logs/step_0000454.npy b/margin_logs/step_0000454.npy new file mode 100644 index 0000000..84148d3 --- /dev/null +++ b/margin_logs/step_0000454.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1ac3c036671173c98953c168b1dd06c0d57c2ae56d705632f64a56185892218 +size 384 diff --git a/margin_logs/step_0000455.npy b/margin_logs/step_0000455.npy new file mode 100644 index 0000000..643055f --- /dev/null +++ b/margin_logs/step_0000455.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75081fa6cd267d8ad81a83b3616363c1a6934f5e759561f4b759df042e38fa84 +size 384 diff --git a/margin_logs/step_0000456.npy b/margin_logs/step_0000456.npy new file mode 100644 index 0000000..6b2a0f5 --- /dev/null +++ b/margin_logs/step_0000456.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d4887b95f5d64313fc7325b2fdc3e0011fcedf8a1aa8aa35c86bdf3a4b2a89a +size 384 diff --git a/margin_logs/step_0000457.npy b/margin_logs/step_0000457.npy new file mode 100644 index 0000000..aab03e2 --- /dev/null +++ b/margin_logs/step_0000457.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:179d6ceb341f0d7e017cca095b859c45629da22cd022d3c68d011633a72ccb19 +size 384 diff --git a/margin_logs/step_0000458.npy b/margin_logs/step_0000458.npy new file mode 100644 index 0000000..eb0a252 --- /dev/null +++ b/margin_logs/step_0000458.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:965d700641541a6de8084fc86ef279863d96ad7f54362de362ad2088633bd339 +size 384 diff --git a/margin_logs/step_0000459.npy b/margin_logs/step_0000459.npy new file mode 100644 index 0000000..9604ef5 --- /dev/null +++ b/margin_logs/step_0000459.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e768321ba416188e815989bcee1e595953e1d02643842eee9d35f3dc90f17ac +size 384 diff --git a/margin_logs/step_0000460.npy b/margin_logs/step_0000460.npy new file mode 100644 index 0000000..806c839 --- /dev/null +++ b/margin_logs/step_0000460.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5924963a9166500fe887bdc7019d1dbf580c1495f0761fa26c427cd7b779ab76 +size 384 diff --git a/margin_logs/step_0000461.npy b/margin_logs/step_0000461.npy new file mode 100644 index 0000000..671c4e0 --- /dev/null +++ b/margin_logs/step_0000461.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a05148cdbdeea26fb42e1d67c05c3f85dd515e6bee51e6e60fbf496908281c1d +size 384 diff --git a/margin_logs/step_0000462.npy b/margin_logs/step_0000462.npy new file mode 100644 index 0000000..625977c --- /dev/null +++ b/margin_logs/step_0000462.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22943a65d50f6e5544b26dc728174c3efcf15125005bb3a5611a6637d2add3af +size 384 diff --git a/margin_logs/step_0000463.npy b/margin_logs/step_0000463.npy new file mode 100644 index 0000000..0673a39 --- /dev/null +++ b/margin_logs/step_0000463.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e36ae92b0c281aae893974af3336f0d51c4332bf36838e2a9fcca407f24400a4 +size 384 diff --git a/margin_logs/step_0000464.npy b/margin_logs/step_0000464.npy new file mode 100644 index 0000000..f0a6d97 --- /dev/null +++ b/margin_logs/step_0000464.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57552e119bf316153f3d5b804514f278a400710c321a93c6e6aceb857ecf0174 +size 384 diff --git a/margin_logs/step_0000465.npy b/margin_logs/step_0000465.npy new file mode 100644 index 0000000..2ec8fce --- /dev/null +++ b/margin_logs/step_0000465.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:389f807825a8c730eccd338b94666839b35fcb992c31b77d23050f8a62a8a030 +size 384 diff --git a/margin_logs/step_0000466.npy b/margin_logs/step_0000466.npy new file mode 100644 index 0000000..a41c861 --- /dev/null +++ b/margin_logs/step_0000466.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8722c84efba55cc682eb6edc49162b5dbc3992c56d7df2af0de7f96eee31e531 +size 384 diff --git a/margin_logs/step_0000467.npy b/margin_logs/step_0000467.npy new file mode 100644 index 0000000..761f3fb --- /dev/null +++ b/margin_logs/step_0000467.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6e0e4247167888238e185a1154fc5770e55db68cab9491e25a0bc8477b953e0 +size 384 diff --git a/margin_logs/step_0000468.npy b/margin_logs/step_0000468.npy new file mode 100644 index 0000000..56ea4d8 --- /dev/null +++ b/margin_logs/step_0000468.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43801f284098188f67bea8af9b8b4165b61d951bf4574f6b01fa6365ed657310 +size 384 diff --git a/margin_logs/step_0000469.npy b/margin_logs/step_0000469.npy new file mode 100644 index 0000000..24c4740 --- /dev/null +++ b/margin_logs/step_0000469.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c5dd0b91dfb14bda161cf730e027d2431d68b6060409b25c15ffcbc769deefc +size 384 diff --git a/margin_logs/step_0000470.npy b/margin_logs/step_0000470.npy new file mode 100644 index 0000000..281fde8 --- /dev/null +++ b/margin_logs/step_0000470.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75511103250ea555e2d76c5dfd852771033057bfae3f79860c0ada6633336751 +size 384 diff --git a/margin_logs/step_0000471.npy b/margin_logs/step_0000471.npy new file mode 100644 index 0000000..2be5e8c --- /dev/null +++ b/margin_logs/step_0000471.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12b4769674ac2480f0c4765cb7d8b8a7461eca94eea25cd05f103dbaaa4bcca8 +size 384 diff --git a/margin_logs/step_0000472.npy b/margin_logs/step_0000472.npy new file mode 100644 index 0000000..d3cdba1 --- /dev/null +++ b/margin_logs/step_0000472.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f8f22f4e7128d0936c49b7f5fcea2ac15b53161ad85e6ccc53a0cb38cbb46de +size 384 diff --git a/margin_logs/step_0000473.npy b/margin_logs/step_0000473.npy new file mode 100644 index 0000000..a58aed5 --- /dev/null +++ b/margin_logs/step_0000473.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a73768f342d615f1072e9287143b41c7777521087c427ae492b674bf1977342 +size 384 diff --git a/margin_logs/step_0000474.npy b/margin_logs/step_0000474.npy new file mode 100644 index 0000000..97d4837 --- /dev/null +++ b/margin_logs/step_0000474.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8cc1fd5c0557f7ee206f66261fbbb830394f3551312c88529d9d17e9b31b3f7 +size 384 diff --git a/margin_logs/step_0000475.npy b/margin_logs/step_0000475.npy new file mode 100644 index 0000000..a88297a --- /dev/null +++ b/margin_logs/step_0000475.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3795c8064268b83e31eab00bfe237c382a800cf6dbd4e324480e0b53e7c88ca +size 384 diff --git a/margin_logs/step_0000476.npy b/margin_logs/step_0000476.npy new file mode 100644 index 0000000..7a0db29 --- /dev/null +++ b/margin_logs/step_0000476.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99221e07e7abac352999e5d996bbf7cc4d5112c83f9f10a2573785157f35ef8c +size 384 diff --git a/margin_logs/step_0000477.npy b/margin_logs/step_0000477.npy new file mode 100644 index 0000000..58cdf77 --- /dev/null +++ b/margin_logs/step_0000477.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b23a7c33a5588426da3405dc9faca1a3b5155a0d7e08e045c2107984d042e6f0 +size 384 diff --git a/margin_logs/step_0000478.npy b/margin_logs/step_0000478.npy new file mode 100644 index 0000000..7a7c73b --- /dev/null +++ b/margin_logs/step_0000478.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc2fa00c59bf74b80676ab5925b65a6893fcf7411ae06bda3b4f451f788633ed +size 384 diff --git a/margin_logs/step_0000479.npy b/margin_logs/step_0000479.npy new file mode 100644 index 0000000..04986f3 --- /dev/null +++ b/margin_logs/step_0000479.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89ef71e44d8a5c4eff35aa70650c55700277d3510c52c9f3f25ca61310064f67 +size 384 diff --git a/margin_logs/step_0000480.npy b/margin_logs/step_0000480.npy new file mode 100644 index 0000000..5f2c1a6 --- /dev/null +++ b/margin_logs/step_0000480.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bb77bb81941055e4c188a328e4e05418cb103f079bd050968e5922e6a7e0e16 +size 384 diff --git a/margin_logs/step_0000481.npy b/margin_logs/step_0000481.npy new file mode 100644 index 0000000..8f5b4ae --- /dev/null +++ b/margin_logs/step_0000481.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1296aee820942d849d35f3f6e38e3af3bc4b3fb440d719ce99f3aaae3729b3b0 +size 384 diff --git a/margin_logs/step_0000482.npy b/margin_logs/step_0000482.npy new file mode 100644 index 0000000..6fe90e6 --- /dev/null +++ b/margin_logs/step_0000482.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0371e3a2256ec07c0869e5ebbee7798a3f4d4c9e8025fe81c8c465e94b25bf5b +size 384 diff --git a/margin_logs/step_0000483.npy b/margin_logs/step_0000483.npy new file mode 100644 index 0000000..4a8c6e1 --- /dev/null +++ b/margin_logs/step_0000483.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:865548b4f07c92ce3e13717664b0bfef94b7f5bd54c30114d96402841c63fcea +size 384 diff --git a/margin_logs/step_0000484.npy b/margin_logs/step_0000484.npy new file mode 100644 index 0000000..d35fb8b --- /dev/null +++ b/margin_logs/step_0000484.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:608cedaf0994455644234d0624bfac6af31d45df6de5caf3937fc300304aeb3a +size 384 diff --git a/margin_logs/step_0000485.npy b/margin_logs/step_0000485.npy new file mode 100644 index 0000000..b8d7d3f --- /dev/null +++ b/margin_logs/step_0000485.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:655948414fcee3fdd0c8720ed74ff1a26d8f90db00a21c21217a6c7f22e39980 +size 384 diff --git a/margin_logs/step_0000486.npy b/margin_logs/step_0000486.npy new file mode 100644 index 0000000..5924fc6 --- /dev/null +++ b/margin_logs/step_0000486.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb13b7de8f65afec29a6963e1f6448cc27b6b2f6c9f7d988abf1df3fc7c6ff95 +size 384 diff --git a/margin_logs/step_0000487.npy b/margin_logs/step_0000487.npy new file mode 100644 index 0000000..d0cc8b0 --- /dev/null +++ b/margin_logs/step_0000487.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3323cd1772e91411d74abbd6403cc2d365a05f9c995b41590d7a263e9de47bc2 +size 384 diff --git a/margin_logs/step_0000488.npy b/margin_logs/step_0000488.npy new file mode 100644 index 0000000..a2858e0 --- /dev/null +++ b/margin_logs/step_0000488.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7aec1e843fca91656e31009a80d2509f98ab8bfb51b50224811718d08185660 +size 384 diff --git a/margin_logs/step_0000489.npy b/margin_logs/step_0000489.npy new file mode 100644 index 0000000..f8283a0 --- /dev/null +++ b/margin_logs/step_0000489.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:791a9355c92ecb85e1ab9c940627c529d1d0f44e5b4b843b54aa7be6fa976099 +size 384 diff --git a/margin_logs/step_0000490.npy b/margin_logs/step_0000490.npy new file mode 100644 index 0000000..b754570 --- /dev/null +++ b/margin_logs/step_0000490.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca5f227882ea6d2d53692eb62b1473545759c469622cbfc68aa9f3f645c0ab67 +size 384 diff --git a/margin_logs/step_0000491.npy b/margin_logs/step_0000491.npy new file mode 100644 index 0000000..f4f6248 --- /dev/null +++ b/margin_logs/step_0000491.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b335fa8f32f345e21a217e944589faf8e3d7fecc3dd0890b1389dd3a88e7a81f +size 384 diff --git a/margin_logs/step_0000492.npy b/margin_logs/step_0000492.npy new file mode 100644 index 0000000..a8b107f --- /dev/null +++ b/margin_logs/step_0000492.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1870e447fe0433497f8d00612ab1c1f097119761e6f5129bacc13578d097d27c +size 384 diff --git a/margin_logs/step_0000493.npy b/margin_logs/step_0000493.npy new file mode 100644 index 0000000..4660910 --- /dev/null +++ b/margin_logs/step_0000493.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a2835538b7d12ff33788bfa15b1382e2e0c7f79feb8ff82e61dc44e1ffa72d3 +size 384 diff --git a/margin_logs/step_0000494.npy b/margin_logs/step_0000494.npy new file mode 100644 index 0000000..571712c --- /dev/null +++ b/margin_logs/step_0000494.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f19364bd368a8dbf43bf604ecea94b6ae003623cd1726c22fcbde5549695140 +size 384 diff --git a/margin_logs/step_0000495.npy b/margin_logs/step_0000495.npy new file mode 100644 index 0000000..6f53a42 --- /dev/null +++ b/margin_logs/step_0000495.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1bea63b9b4629017a5cd6e0c28bcaed13832c94ad3206b764a8ddd07090574d +size 384 diff --git a/margin_logs/step_0000496.npy b/margin_logs/step_0000496.npy new file mode 100644 index 0000000..9078d0f --- /dev/null +++ b/margin_logs/step_0000496.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:634b05d3a0b116b789daabffd6375bab886e0b7a3c42d612eba9d0f67b76e386 +size 384 diff --git a/margin_logs/step_0000497.npy b/margin_logs/step_0000497.npy new file mode 100644 index 0000000..13518f9 --- /dev/null +++ b/margin_logs/step_0000497.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e620b440de55491eb9e86442a378cad8ce5f4bbf7b86ab7e25aaf6a40b314b79 +size 384 diff --git a/margin_logs/step_0000498.npy b/margin_logs/step_0000498.npy new file mode 100644 index 0000000..a57b69c --- /dev/null +++ b/margin_logs/step_0000498.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd859a69818b62f91be5f6a73ed58f3d86820756e357eb41329a09e30878c0df +size 384 diff --git a/margin_logs/step_0000499.npy b/margin_logs/step_0000499.npy new file mode 100644 index 0000000..028c532 --- /dev/null +++ b/margin_logs/step_0000499.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbe8f30d2eacecf93a6465768095dd64fb463a440e810cc56b8f265c84955d82 +size 384 diff --git a/margin_logs/step_0000500.npy b/margin_logs/step_0000500.npy new file mode 100644 index 0000000..83acab3 --- /dev/null +++ b/margin_logs/step_0000500.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b5bbadc0677cd4efca0b50dfcf45d1c2a998ac9659ff0a42c2c353a39ac572c +size 384 diff --git a/margin_logs/step_0000501.npy b/margin_logs/step_0000501.npy new file mode 100644 index 0000000..793f9bb --- /dev/null +++ b/margin_logs/step_0000501.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:789598051fc354bd53086ed35cfaec6985a14be740f7983ee46e2f1f89ab50e8 +size 384 diff --git a/margin_logs/step_0000502.npy b/margin_logs/step_0000502.npy new file mode 100644 index 0000000..e816718 --- /dev/null +++ b/margin_logs/step_0000502.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95e2b0e91e4d0afe6b7074274405bb536dca072a223130aeb5b83b5867c5a61e +size 384 diff --git a/margin_logs/step_0000503.npy b/margin_logs/step_0000503.npy new file mode 100644 index 0000000..5a857c8 --- /dev/null +++ b/margin_logs/step_0000503.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3c70ec296878b86efc1ad00ce8f39e35527415635dc6d8ef4fc8517a165d800 +size 384 diff --git a/margin_logs/step_0000504.npy b/margin_logs/step_0000504.npy new file mode 100644 index 0000000..1cb97bd --- /dev/null +++ b/margin_logs/step_0000504.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f22f07106549c914dc9af8039652e3aab9ee62430641d0ee295ec4b35d380d0 +size 384 diff --git a/margin_logs/step_0000505.npy b/margin_logs/step_0000505.npy new file mode 100644 index 0000000..7530830 --- /dev/null +++ b/margin_logs/step_0000505.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f162b65155a1d65e29dfe860deb75e04d79c430c50a2f5ca78a9fb5506e4a237 +size 384 diff --git a/margin_logs/step_0000506.npy b/margin_logs/step_0000506.npy new file mode 100644 index 0000000..92e5111 --- /dev/null +++ b/margin_logs/step_0000506.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48e1fd82e4078ea6f0939e65820a7aa19b9b123249b4ad977b27d9f0daff3b19 +size 384 diff --git a/margin_logs/step_0000507.npy b/margin_logs/step_0000507.npy new file mode 100644 index 0000000..843d144 --- /dev/null +++ b/margin_logs/step_0000507.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e5c660f6e9d40e8997c1157750d367009b9313d6026d00ef2e956930ee43170 +size 384 diff --git a/margin_logs/step_0000508.npy b/margin_logs/step_0000508.npy new file mode 100644 index 0000000..f125b11 --- /dev/null +++ b/margin_logs/step_0000508.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f36f8f7c763f094d9abeb533a9e10d3e917ececae415df6a9a1f08ec1fe4a0e0 +size 384 diff --git a/margin_logs/step_0000509.npy b/margin_logs/step_0000509.npy new file mode 100644 index 0000000..12d8fbe --- /dev/null +++ b/margin_logs/step_0000509.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64f30294b1b4133cd8bdf6c5ac34df466b708cd842213a439d23341d80e0e821 +size 384 diff --git a/margin_logs/step_0000510.npy b/margin_logs/step_0000510.npy new file mode 100644 index 0000000..1b49908 --- /dev/null +++ b/margin_logs/step_0000510.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d00b4604c3187226f17ce82fa1a33cd39418502ee50945500b48a24dab8fb1d +size 384 diff --git a/margin_logs/step_0000511.npy b/margin_logs/step_0000511.npy new file mode 100644 index 0000000..158dd0d --- /dev/null +++ b/margin_logs/step_0000511.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaeace306410793affd0ba7db639b3fb97dd5a093be6a32d661a4a822c5cfef8 +size 384 diff --git a/margin_logs/step_0000512.npy b/margin_logs/step_0000512.npy new file mode 100644 index 0000000..13af4aa --- /dev/null +++ b/margin_logs/step_0000512.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7610a237afd99b7a2bc089aebfe03e97fb76b8413bed4a0fd89f2bfc3ec76b3e +size 384 diff --git a/margin_logs/step_0000513.npy b/margin_logs/step_0000513.npy new file mode 100644 index 0000000..d95251d --- /dev/null +++ b/margin_logs/step_0000513.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd7aa315cccbaaa2e7c95717e8f9af43bc962a58bd0fa32bd1d922614b69e662 +size 384 diff --git a/margin_logs/step_0000514.npy b/margin_logs/step_0000514.npy new file mode 100644 index 0000000..4185ee5 --- /dev/null +++ b/margin_logs/step_0000514.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a515c394a2a3487f50677a618841c8a24e49efb3858c6cd70959da55e9c4d712 +size 384 diff --git a/margin_logs/step_0000515.npy b/margin_logs/step_0000515.npy new file mode 100644 index 0000000..1f676fb --- /dev/null +++ b/margin_logs/step_0000515.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfd9b6c61765f9ceaa22ef1dbef9872af85432008a978779c30323363db9adf5 +size 384 diff --git a/margin_logs/step_0000516.npy b/margin_logs/step_0000516.npy new file mode 100644 index 0000000..033e315 --- /dev/null +++ b/margin_logs/step_0000516.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3086330814ac95cd082c09703c329f62e7092c70f70f80c3fde615f0ee9c3d0 +size 384 diff --git a/margin_logs/step_0000517.npy b/margin_logs/step_0000517.npy new file mode 100644 index 0000000..7be2ea5 --- /dev/null +++ b/margin_logs/step_0000517.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e53cdbc0e5a80c1f8cab2eeaa8fc1f5f6fb4f5accd81099d25240292a4d20b8 +size 384 diff --git a/margin_logs/step_0000518.npy b/margin_logs/step_0000518.npy new file mode 100644 index 0000000..9fcd72e --- /dev/null +++ b/margin_logs/step_0000518.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01e3113d56b710282ec012267caa8e0417546beb677dd9a641030e5613d40baa +size 384 diff --git a/margin_logs/step_0000519.npy b/margin_logs/step_0000519.npy new file mode 100644 index 0000000..b2f6b38 --- /dev/null +++ b/margin_logs/step_0000519.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00abf896cf18cdbd6acefc12a899e556f77334c65bc5180850233d944744142c +size 384 diff --git a/margin_logs/step_0000520.npy b/margin_logs/step_0000520.npy new file mode 100644 index 0000000..42c77db --- /dev/null +++ b/margin_logs/step_0000520.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f41be5e174847b4c0db98929edc0c6c957201c47bb6434cedcb7c4aed4bcb59 +size 384 diff --git a/margin_logs/step_0000521.npy b/margin_logs/step_0000521.npy new file mode 100644 index 0000000..938ba2a --- /dev/null +++ b/margin_logs/step_0000521.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fc746783491652fb6864155c3dba0005dec7f8942895fe4e7af24cddd355bde +size 384 diff --git a/margin_logs/step_0000522.npy b/margin_logs/step_0000522.npy new file mode 100644 index 0000000..d335e93 --- /dev/null +++ b/margin_logs/step_0000522.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c51335fc4a247fcb0fb64842e50f359c067838da7804bd3d0e886fb16e42e58 +size 384 diff --git a/margin_logs/step_0000523.npy b/margin_logs/step_0000523.npy new file mode 100644 index 0000000..b4ace7c --- /dev/null +++ b/margin_logs/step_0000523.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42185f026a030a884b1a9e9bc667235dbe5cd221ebaf3e1cc47e190c7e1cea98 +size 384 diff --git a/margin_logs/step_0000524.npy b/margin_logs/step_0000524.npy new file mode 100644 index 0000000..8141714 --- /dev/null +++ b/margin_logs/step_0000524.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:503eddce47799e451a4abb55193eb060b1ffc014693cf76d6ed6dc79a7e98634 +size 384 diff --git a/margin_logs/step_0000525.npy b/margin_logs/step_0000525.npy new file mode 100644 index 0000000..b42116e --- /dev/null +++ b/margin_logs/step_0000525.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b8599876de2802a647272f45793218193670e009d5926ad0169acbbb4dc87ad +size 384 diff --git a/margin_logs/step_0000526.npy b/margin_logs/step_0000526.npy new file mode 100644 index 0000000..8009131 --- /dev/null +++ b/margin_logs/step_0000526.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e2f4912aa5974ce3d0b56317f6d4bc2541b30b29e76b8b83718cddcf322234a +size 384 diff --git a/margin_logs/step_0000527.npy b/margin_logs/step_0000527.npy new file mode 100644 index 0000000..fa97ac3 --- /dev/null +++ b/margin_logs/step_0000527.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:085a473049755d959af1984e9c02289c8021c4eacf6a73adeab1d9a3acd050ff +size 384 diff --git a/margin_logs/step_0000528.npy b/margin_logs/step_0000528.npy new file mode 100644 index 0000000..b2a7137 --- /dev/null +++ b/margin_logs/step_0000528.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba369734cc8637a4c08ce387ab5728763be02fba78006208b3671a965d53fd8c +size 384 diff --git a/margin_logs/step_0000529.npy b/margin_logs/step_0000529.npy new file mode 100644 index 0000000..677f399 --- /dev/null +++ b/margin_logs/step_0000529.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07a51c8485d190d08938c50e25e36abe2428a2f06576a218b0aa948ebbddb652 +size 384 diff --git a/margin_logs/step_0000530.npy b/margin_logs/step_0000530.npy new file mode 100644 index 0000000..fb2f8e3 --- /dev/null +++ b/margin_logs/step_0000530.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe69f4078556832a0d99fde02f619724667014fc9e2118fe9413622c37896502 +size 384 diff --git a/margin_logs/step_0000531.npy b/margin_logs/step_0000531.npy new file mode 100644 index 0000000..880e05f --- /dev/null +++ b/margin_logs/step_0000531.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47349f1c94e9f37e34c6cca5dce7d708fdc1eb7b634753fd7455c817075a5846 +size 384 diff --git a/margin_logs/step_0000532.npy b/margin_logs/step_0000532.npy new file mode 100644 index 0000000..a3cbbb4 --- /dev/null +++ b/margin_logs/step_0000532.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ddcb0fd548b40968033679970c323c258ec0c937cd1fb305ef68f6d8a11f7a0 +size 384 diff --git a/margin_logs/step_0000533.npy b/margin_logs/step_0000533.npy new file mode 100644 index 0000000..34e778e --- /dev/null +++ b/margin_logs/step_0000533.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de649a8edb8599da06d1677bb0673ac59466c7f8f958752e00cce2b28413f8e7 +size 384 diff --git a/margin_logs/step_0000534.npy b/margin_logs/step_0000534.npy new file mode 100644 index 0000000..4a2a44f --- /dev/null +++ b/margin_logs/step_0000534.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83c4310c773a2e9b3422b28a18f4f8d10facf6e118f828226c8ca192487291cc +size 384 diff --git a/margin_logs/step_0000535.npy b/margin_logs/step_0000535.npy new file mode 100644 index 0000000..4a4354c --- /dev/null +++ b/margin_logs/step_0000535.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:515a40882821affb015047210d9a880c0d4301ecad0e3bbd48b8623f5b68be17 +size 384 diff --git a/margin_logs/step_0000536.npy b/margin_logs/step_0000536.npy new file mode 100644 index 0000000..4b3304a --- /dev/null +++ b/margin_logs/step_0000536.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dded7d0e19a9c248cc68ce012398df248805c810bc00df6519f3873c1172fdff +size 384 diff --git a/margin_logs/step_0000537.npy b/margin_logs/step_0000537.npy new file mode 100644 index 0000000..5b66984 --- /dev/null +++ b/margin_logs/step_0000537.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eaa5801181c583b5989c166f5db268530b698b5932c23ad4143f6ee98d4eeea +size 384 diff --git a/margin_logs/step_0000538.npy b/margin_logs/step_0000538.npy new file mode 100644 index 0000000..fba62ef --- /dev/null +++ b/margin_logs/step_0000538.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6359792300105b2eb6605cafbc95144e6bfc8eef071d9afae1bb850791b4a32 +size 384 diff --git a/margin_logs/step_0000539.npy b/margin_logs/step_0000539.npy new file mode 100644 index 0000000..64ae768 --- /dev/null +++ b/margin_logs/step_0000539.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7194377aa41e8daa4bed5e6dd7a5276caa34b2a21a4ba921872949ac0c0b9da1 +size 384 diff --git a/margin_logs/step_0000540.npy b/margin_logs/step_0000540.npy new file mode 100644 index 0000000..8d88a39 --- /dev/null +++ b/margin_logs/step_0000540.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e8943f5b0d088eb4d58ba4b570b8474eb473de3dd45ef8808291e5c2192d24e +size 384 diff --git a/margin_logs/step_0000541.npy b/margin_logs/step_0000541.npy new file mode 100644 index 0000000..53af093 --- /dev/null +++ b/margin_logs/step_0000541.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afdfb510115f6a4bc2c1de725db0e1858fe34242f5ccff512541aac05075241e +size 384 diff --git a/margin_logs/step_0000542.npy b/margin_logs/step_0000542.npy new file mode 100644 index 0000000..2b2ad1c --- /dev/null +++ b/margin_logs/step_0000542.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f33c7660469861aa431cc5b6b31fa39b95e8c01ca1e8c7e8a445d647e2c1ee0 +size 384 diff --git a/margin_logs/step_0000543.npy b/margin_logs/step_0000543.npy new file mode 100644 index 0000000..8e3d6a7 --- /dev/null +++ b/margin_logs/step_0000543.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e78e771e9fe9bc1a028ca5f76887283b01ff561606259b039c3baf5dabaec7ba +size 384 diff --git a/margin_logs/step_0000544.npy b/margin_logs/step_0000544.npy new file mode 100644 index 0000000..1d1090b --- /dev/null +++ b/margin_logs/step_0000544.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccb366c1ddefd2d38a7bb53d41bacc688eaf09c11f4be4c00286ff165f695380 +size 384 diff --git a/margin_logs/step_0000545.npy b/margin_logs/step_0000545.npy new file mode 100644 index 0000000..ceba5a9 --- /dev/null +++ b/margin_logs/step_0000545.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcf43c020155251a0787f63cf8aed47cd3ed35dced81a46753c33f336579a38d +size 384 diff --git a/margin_logs/step_0000546.npy b/margin_logs/step_0000546.npy new file mode 100644 index 0000000..7f5d1ce --- /dev/null +++ b/margin_logs/step_0000546.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83ee7c51877a9988500cdb6a7170bba1bbcd7a6ca00d01eabb29c86d257ca463 +size 384 diff --git a/margin_logs/step_0000547.npy b/margin_logs/step_0000547.npy new file mode 100644 index 0000000..08a92d3 --- /dev/null +++ b/margin_logs/step_0000547.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c712a46117d5c3e7c24b841efaf317f130ee2cc6275aea6bef4be6a35d8d259 +size 384 diff --git a/margin_logs/step_0000548.npy b/margin_logs/step_0000548.npy new file mode 100644 index 0000000..a16e99c --- /dev/null +++ b/margin_logs/step_0000548.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a68a9b78130ccb1057a9295fffedf15d432b2f841c0dc1f18bf244e1ad732f8 +size 384 diff --git a/margin_logs/step_0000549.npy b/margin_logs/step_0000549.npy new file mode 100644 index 0000000..ca45f9c --- /dev/null +++ b/margin_logs/step_0000549.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36a1cb1b9bb24a5261813c5c501394fa6813a68d25b090e6a69a35295e73edb4 +size 384 diff --git a/margin_logs/step_0000550.npy b/margin_logs/step_0000550.npy new file mode 100644 index 0000000..0a9e154 --- /dev/null +++ b/margin_logs/step_0000550.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae6f7a1beb9cf076988bebc6d3b53b1b48e61a24c91357e5aaf49bcf35e32ce0 +size 384 diff --git a/margin_logs/step_0000551.npy b/margin_logs/step_0000551.npy new file mode 100644 index 0000000..d8e7055 --- /dev/null +++ b/margin_logs/step_0000551.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:739b53d73008477b22926fc82039cbf8292893c18079f419bc51373ad760f8db +size 384 diff --git a/margin_logs/step_0000552.npy b/margin_logs/step_0000552.npy new file mode 100644 index 0000000..405d250 --- /dev/null +++ b/margin_logs/step_0000552.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70000fdd357f4d00acb0e6e687cddce5e702cad2dd67cd03435b3200826f4ab5 +size 384 diff --git a/margin_logs/step_0000553.npy b/margin_logs/step_0000553.npy new file mode 100644 index 0000000..2bd9a56 --- /dev/null +++ b/margin_logs/step_0000553.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d75a646644a22c9db6e6116203cbcabbb0bd46a4371706aad40ada13a2416f9e +size 384 diff --git a/margin_logs/step_0000554.npy b/margin_logs/step_0000554.npy new file mode 100644 index 0000000..3fd97d9 --- /dev/null +++ b/margin_logs/step_0000554.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b78507841cc403ac56b9979d4b64dadf5dc09cb77c396f19d20259cff67f6cc +size 384 diff --git a/margin_logs/step_0000555.npy b/margin_logs/step_0000555.npy new file mode 100644 index 0000000..8a57aa9 --- /dev/null +++ b/margin_logs/step_0000555.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a7967897d0740968c81136534a53966067a02295260d96908d0b02a3e888721 +size 384 diff --git a/margin_logs/step_0000556.npy b/margin_logs/step_0000556.npy new file mode 100644 index 0000000..211eb1a --- /dev/null +++ b/margin_logs/step_0000556.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:979f708b9dfdaad502acd089948a3c056047df72c998b83873c3b42d3166801c +size 384 diff --git a/margin_logs/step_0000557.npy b/margin_logs/step_0000557.npy new file mode 100644 index 0000000..04f84cf --- /dev/null +++ b/margin_logs/step_0000557.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f57321c6d4a9a1ba8b1b5b2bca9840c98e0cf235ba3391c2dd2cb664c99f3a1 +size 384 diff --git a/margin_logs/step_0000558.npy b/margin_logs/step_0000558.npy new file mode 100644 index 0000000..4578913 --- /dev/null +++ b/margin_logs/step_0000558.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d59cb761301080212ccbeb0cc08e1e585e9a40c1a4e42c21006b40cdcd2f24b6 +size 384 diff --git a/margin_logs/step_0000559.npy b/margin_logs/step_0000559.npy new file mode 100644 index 0000000..eef7924 --- /dev/null +++ b/margin_logs/step_0000559.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:216ff4a91151ae6b430378c4426be660d8827bdf00d958ee5417a8b9f2ba08cb +size 384 diff --git a/margin_logs/step_0000560.npy b/margin_logs/step_0000560.npy new file mode 100644 index 0000000..4425031 --- /dev/null +++ b/margin_logs/step_0000560.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adefae2b8200bd96ac01f025d36d41af35fe5e4361b9fa5282e9d36216351770 +size 384 diff --git a/margin_logs/step_0000561.npy b/margin_logs/step_0000561.npy new file mode 100644 index 0000000..574026a --- /dev/null +++ b/margin_logs/step_0000561.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6b7a3b1f0b699b34b06c749e6f63abf52f2d576b38af72fdf9d9a2d8cf3c0ac +size 384 diff --git a/margin_logs/step_0000562.npy b/margin_logs/step_0000562.npy new file mode 100644 index 0000000..a2c9276 --- /dev/null +++ b/margin_logs/step_0000562.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d13dec37a48730be79256226456feabda61aa94844f6ea0ff47a8ae668d36e3 +size 384 diff --git a/margin_logs/step_0000563.npy b/margin_logs/step_0000563.npy new file mode 100644 index 0000000..d864d1e --- /dev/null +++ b/margin_logs/step_0000563.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:959eb4ea074afcf3f0b9622db76f80a9ee3fe198396b76f4a9107d52ec0aa388 +size 384 diff --git a/margin_logs/step_0000564.npy b/margin_logs/step_0000564.npy new file mode 100644 index 0000000..456de00 --- /dev/null +++ b/margin_logs/step_0000564.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fceb4586a5f1551ed34de8dd2eb95397a494bf764663e880180e6752aad0645e +size 384 diff --git a/margin_logs/step_0000565.npy b/margin_logs/step_0000565.npy new file mode 100644 index 0000000..2b10453 --- /dev/null +++ b/margin_logs/step_0000565.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a6d23746a8a00b293ae7a38268a8cfa24650940aa8649222d28155aa49bcf7f +size 384 diff --git a/margin_logs/step_0000566.npy b/margin_logs/step_0000566.npy new file mode 100644 index 0000000..67f56e9 --- /dev/null +++ b/margin_logs/step_0000566.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7a876786b5c30327bb4ce6935390485b4335f428e24fb9510d66a4789a4303f +size 384 diff --git a/margin_logs/step_0000567.npy b/margin_logs/step_0000567.npy new file mode 100644 index 0000000..73d8f14 --- /dev/null +++ b/margin_logs/step_0000567.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:869d1fc4c1d69ab24cb5be8ec9ef547cb240b9c3c5d139d9a7702981231bee35 +size 384 diff --git a/margin_logs/step_0000568.npy b/margin_logs/step_0000568.npy new file mode 100644 index 0000000..3c19c11 --- /dev/null +++ b/margin_logs/step_0000568.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b25f82a5472b052699a87a6b4ddd43c877f39726525dbc973497bd260837befd +size 384 diff --git a/margin_logs/step_0000569.npy b/margin_logs/step_0000569.npy new file mode 100644 index 0000000..55515f7 --- /dev/null +++ b/margin_logs/step_0000569.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3162ea046ae13fe377ee567a69291b7a07a7a663302e9a518936688130272f8 +size 384 diff --git a/margin_logs/step_0000570.npy b/margin_logs/step_0000570.npy new file mode 100644 index 0000000..48abd11 --- /dev/null +++ b/margin_logs/step_0000570.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:451464d057108bd4f3a507dade90c48cf76eaa1e107642a93fe7dded7caae2e2 +size 384 diff --git a/margin_logs/step_0000571.npy b/margin_logs/step_0000571.npy new file mode 100644 index 0000000..859cd43 --- /dev/null +++ b/margin_logs/step_0000571.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04e7c421e05f0003cb0e0c9fadd827ede71837aeb7b2eee1b66a68b7ae385236 +size 384 diff --git a/margin_logs/step_0000572.npy b/margin_logs/step_0000572.npy new file mode 100644 index 0000000..f65051c --- /dev/null +++ b/margin_logs/step_0000572.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c313232c168fd8c507a204e5cbc3a59c2f12b2f17a3567382cd43031f8a4085 +size 384 diff --git a/margin_logs/step_0000573.npy b/margin_logs/step_0000573.npy new file mode 100644 index 0000000..8e246e5 --- /dev/null +++ b/margin_logs/step_0000573.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53dc2b33cc24182e6a4d8735f2c63356989b38e3413da20547846d7f148f40f2 +size 384 diff --git a/margin_logs/step_0000574.npy b/margin_logs/step_0000574.npy new file mode 100644 index 0000000..1dbe71d --- /dev/null +++ b/margin_logs/step_0000574.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ed8ba1eb81b0f40ac7889245487b27971b37775a88d0381a310401042dee572 +size 384 diff --git a/margin_logs/step_0000575.npy b/margin_logs/step_0000575.npy new file mode 100644 index 0000000..4d329c1 --- /dev/null +++ b/margin_logs/step_0000575.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bcab7067459d43bc689a53058dffd409ec81a7f65477bb4899a5bf81eb946ae +size 384 diff --git a/margin_logs/step_0000576.npy b/margin_logs/step_0000576.npy new file mode 100644 index 0000000..f7f10d4 --- /dev/null +++ b/margin_logs/step_0000576.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5933c6104ac88368c27310012fc83f34eaa91a7b276a2a539ddf4d197f241bc6 +size 384 diff --git a/margin_logs/step_0000577.npy b/margin_logs/step_0000577.npy new file mode 100644 index 0000000..847562c --- /dev/null +++ b/margin_logs/step_0000577.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d789f644defcee8202a150501c8c38bc299639fc7e4248d90f044fb8d533aea +size 384 diff --git a/margin_logs/step_0000578.npy b/margin_logs/step_0000578.npy new file mode 100644 index 0000000..d15adfa --- /dev/null +++ b/margin_logs/step_0000578.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3d6cc22e5e42c2873cf314319bbb2494943d89c6dc32b662a99011438d183b3 +size 384 diff --git a/margin_logs/step_0000579.npy b/margin_logs/step_0000579.npy new file mode 100644 index 0000000..127a290 --- /dev/null +++ b/margin_logs/step_0000579.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77a7780296f1ffb7c82e45c2e6d93d9c03b245409118b893f0ea7afe431bf690 +size 384 diff --git a/margin_logs/step_0000580.npy b/margin_logs/step_0000580.npy new file mode 100644 index 0000000..8d6377f --- /dev/null +++ b/margin_logs/step_0000580.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b62afc4e409053a67ab4cc3dd9770911844c617128c7dd45a0707eacb8451737 +size 384 diff --git a/margin_logs/step_0000581.npy b/margin_logs/step_0000581.npy new file mode 100644 index 0000000..63bfe1e --- /dev/null +++ b/margin_logs/step_0000581.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66d5dd333052bae89c2c218007c1b3e8e3ff223a092424f6d312d4bd1e6c9e10 +size 384 diff --git a/margin_logs/step_0000582.npy b/margin_logs/step_0000582.npy new file mode 100644 index 0000000..d1a5289 --- /dev/null +++ b/margin_logs/step_0000582.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fd8670aae9f809150265345f6218e665d5ec7d6cf55fa49634bb06678341dee +size 384 diff --git a/margin_logs/step_0000583.npy b/margin_logs/step_0000583.npy new file mode 100644 index 0000000..2c26aa0 --- /dev/null +++ b/margin_logs/step_0000583.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a755d5597e172d8f8e9fc80f6a8b969908bca920bcbb14cd786f76e7a8b18da +size 384 diff --git a/margin_logs/step_0000584.npy b/margin_logs/step_0000584.npy new file mode 100644 index 0000000..faf5c56 --- /dev/null +++ b/margin_logs/step_0000584.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28508b67ff9f83134f9f8530639ddaaa5c30c2b0cf8be8468291d3347baac7b8 +size 384 diff --git a/margin_logs/step_0000585.npy b/margin_logs/step_0000585.npy new file mode 100644 index 0000000..c559890 --- /dev/null +++ b/margin_logs/step_0000585.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bab19527ab5b126309aebce5c3aa4627f4796c0b043d649a6da99b9715f747eb +size 384 diff --git a/margin_logs/step_0000586.npy b/margin_logs/step_0000586.npy new file mode 100644 index 0000000..0ae663e --- /dev/null +++ b/margin_logs/step_0000586.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02c652dbb94d766fab7bf3161f70037ba5b99639dbcdfa5dccd975b5fbe31843 +size 384 diff --git a/margin_logs/step_0000587.npy b/margin_logs/step_0000587.npy new file mode 100644 index 0000000..d5d2b82 --- /dev/null +++ b/margin_logs/step_0000587.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dab37fd175c4b273e030e426b5ca0a0d78a9014a96a22f489cb84aa2e302004f +size 384 diff --git a/margin_logs/step_0000588.npy b/margin_logs/step_0000588.npy new file mode 100644 index 0000000..9a0f9a6 --- /dev/null +++ b/margin_logs/step_0000588.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67eb5fd1084df4f76907e1a8012c0a93ec1f56f20ae015786bb1d47c292e76b7 +size 384 diff --git a/margin_logs/step_0000589.npy b/margin_logs/step_0000589.npy new file mode 100644 index 0000000..b79fbe5 --- /dev/null +++ b/margin_logs/step_0000589.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06b7183ea42bb6565ce1ae382f053c2984fecb032ffc400019c86a76dfae44f6 +size 384 diff --git a/margin_logs/step_0000590.npy b/margin_logs/step_0000590.npy new file mode 100644 index 0000000..49dd5c5 --- /dev/null +++ b/margin_logs/step_0000590.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49b176a44000d49b6602046d1d789ea3b75d262047aa21c63baeaab1558ff711 +size 384 diff --git a/margin_logs/step_0000591.npy b/margin_logs/step_0000591.npy new file mode 100644 index 0000000..16b2cd6 --- /dev/null +++ b/margin_logs/step_0000591.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f377df63756df53c3f2b59cc202e2141198e5aac1f3d3605374c35eaeccd92e +size 384 diff --git a/margin_logs/step_0000592.npy b/margin_logs/step_0000592.npy new file mode 100644 index 0000000..d8d440d --- /dev/null +++ b/margin_logs/step_0000592.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7099964e05faa60328732d910c7484da55f2382f0513a74d9f2a1390be96ebb5 +size 384 diff --git a/margin_logs/step_0000593.npy b/margin_logs/step_0000593.npy new file mode 100644 index 0000000..19c9c3a --- /dev/null +++ b/margin_logs/step_0000593.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58130faed05d70e7bbc29d6e049cbcbeb67dc50dea95b04ef1a4eb6ddaece216 +size 384 diff --git a/margin_logs/step_0000594.npy b/margin_logs/step_0000594.npy new file mode 100644 index 0000000..d04822d --- /dev/null +++ b/margin_logs/step_0000594.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:196413221b0befab7446486428fba6bdc5a9ebf10ad5a7c25e88816f0b8b40cc +size 384 diff --git a/margin_logs/step_0000595.npy b/margin_logs/step_0000595.npy new file mode 100644 index 0000000..5757662 --- /dev/null +++ b/margin_logs/step_0000595.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2f58f45bb56cd253b60e8afbda37a199ce6db2e6cbbcdd85e90f9a7691f6a50 +size 384 diff --git a/margin_logs/step_0000596.npy b/margin_logs/step_0000596.npy new file mode 100644 index 0000000..90803de --- /dev/null +++ b/margin_logs/step_0000596.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6004971756395b14e0ff2350c3161e561c79da4fb3bcfc7d516f189203c193c +size 384 diff --git a/margin_logs/step_0000597.npy b/margin_logs/step_0000597.npy new file mode 100644 index 0000000..69548e4 --- /dev/null +++ b/margin_logs/step_0000597.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12fdabdfac4d9cc9d3c34f385ebb3c99795d8d9f0b21fa7d0cf1c41c02da0001 +size 384 diff --git a/margin_logs/step_0000598.npy b/margin_logs/step_0000598.npy new file mode 100644 index 0000000..3e5495b --- /dev/null +++ b/margin_logs/step_0000598.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afdf67de7c7fdfa352046b7c9ed34df7c666d212943bdc8bd566a4786dcbd485 +size 384 diff --git a/margin_logs/step_0000599.npy b/margin_logs/step_0000599.npy new file mode 100644 index 0000000..bd3e486 --- /dev/null +++ b/margin_logs/step_0000599.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1036e03661affed2428080aa3f79123dc2c0f20314797b171bbab1335e43f1a2 +size 384 diff --git a/margin_logs/step_0000600.npy b/margin_logs/step_0000600.npy new file mode 100644 index 0000000..f40bca8 --- /dev/null +++ b/margin_logs/step_0000600.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:489fd9b9c0c65a7581a19dc5c1871ae4eb292ea7d50e03883cd0b69e6df6716d +size 384 diff --git a/margin_logs/step_0000601.npy b/margin_logs/step_0000601.npy new file mode 100644 index 0000000..4c2ce96 --- /dev/null +++ b/margin_logs/step_0000601.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c19f1e514fb3165fe8f7644cd1474e8baea1477513d50418df9e657ba38b877a +size 384 diff --git a/margin_logs/step_0000602.npy b/margin_logs/step_0000602.npy new file mode 100644 index 0000000..5187b4b --- /dev/null +++ b/margin_logs/step_0000602.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:643bd2e3f6343323a4dfccb5fb9eafc89fb538a3bb03297d5dcfe405ddc4acc7 +size 384 diff --git a/margin_logs/step_0000603.npy b/margin_logs/step_0000603.npy new file mode 100644 index 0000000..7259e35 --- /dev/null +++ b/margin_logs/step_0000603.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8fa3b939cd80158a54607c46b2997a734470ef0af6527218aa094665d3ca5c2 +size 384 diff --git a/margin_logs/step_0000604.npy b/margin_logs/step_0000604.npy new file mode 100644 index 0000000..5e6f409 --- /dev/null +++ b/margin_logs/step_0000604.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d0bdd5e27d076185457e3737ce95c28f0f399c8b2ec9fa4ebd0d4f090013938 +size 384 diff --git a/margin_logs/step_0000605.npy b/margin_logs/step_0000605.npy new file mode 100644 index 0000000..3745b6d --- /dev/null +++ b/margin_logs/step_0000605.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cfbb7365b7108468d3b5b27daf3c6a56d0395bde1e406d9923277aa97e39600 +size 384 diff --git a/margin_logs/step_0000606.npy b/margin_logs/step_0000606.npy new file mode 100644 index 0000000..636b064 --- /dev/null +++ b/margin_logs/step_0000606.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:111358a4546a2e04b128e501c9a8274edcde5f1d74c9b9fce8837c09bea97bef +size 384 diff --git a/margin_logs/step_0000607.npy b/margin_logs/step_0000607.npy new file mode 100644 index 0000000..41ef1f2 --- /dev/null +++ b/margin_logs/step_0000607.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93d85269e212d82b3974206cce92fe1eb5f5edf10d41e3d57b29c89331d393f1 +size 384 diff --git a/margin_logs/step_0000608.npy b/margin_logs/step_0000608.npy new file mode 100644 index 0000000..1782bf3 --- /dev/null +++ b/margin_logs/step_0000608.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a458d58f730388093b104bcbcae3ec1da4b8365cfd4e2e3e2534955e9474952 +size 384 diff --git a/margin_logs/step_0000609.npy b/margin_logs/step_0000609.npy new file mode 100644 index 0000000..10ce509 --- /dev/null +++ b/margin_logs/step_0000609.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a23135ed79e79a5fa382a5178afc33752f6594395539e3c7bbf1bed2ac532dde +size 384 diff --git a/margin_logs/step_0000610.npy b/margin_logs/step_0000610.npy new file mode 100644 index 0000000..95a22ad --- /dev/null +++ b/margin_logs/step_0000610.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc20d6a1590ebbfe0365f3a4d45319aa017cc511910489a52540799ace9f6ecc +size 384 diff --git a/margin_logs/step_0000611.npy b/margin_logs/step_0000611.npy new file mode 100644 index 0000000..fef1553 --- /dev/null +++ b/margin_logs/step_0000611.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b73800ea39de7ed2fe8aaa3b3e503b77de47d2d1e198e9cd7b68f7ca4ae47c3 +size 384 diff --git a/margin_logs/step_0000612.npy b/margin_logs/step_0000612.npy new file mode 100644 index 0000000..db50445 --- /dev/null +++ b/margin_logs/step_0000612.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b192e9016328329d1e14672166b5f7be064e9688250518ebdd8d45a9fd5b684d +size 384 diff --git a/margin_logs/step_0000613.npy b/margin_logs/step_0000613.npy new file mode 100644 index 0000000..e29de02 --- /dev/null +++ b/margin_logs/step_0000613.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:455546620188437c524190c3bae5208d159f1b17272854918b66f3ff514d45b4 +size 384 diff --git a/margin_logs/step_0000614.npy b/margin_logs/step_0000614.npy new file mode 100644 index 0000000..5b58f71 --- /dev/null +++ b/margin_logs/step_0000614.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef9671169ada8d2b1d13e784ee4ffb7ad7c123cb0a6ade8e10a3c93443a329e7 +size 384 diff --git a/margin_logs/step_0000615.npy b/margin_logs/step_0000615.npy new file mode 100644 index 0000000..3efa40a --- /dev/null +++ b/margin_logs/step_0000615.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72e61dde597b10de16a20c9a77684136242864c247fe2524580421742f5959ad +size 384 diff --git a/margin_logs/step_0000616.npy b/margin_logs/step_0000616.npy new file mode 100644 index 0000000..7df56a2 --- /dev/null +++ b/margin_logs/step_0000616.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae416e4e0c3186ae793c6971fd59fdcd8d806009ddc46b5f47d707cc12717975 +size 384 diff --git a/margin_logs/step_0000617.npy b/margin_logs/step_0000617.npy new file mode 100644 index 0000000..daec7b0 --- /dev/null +++ b/margin_logs/step_0000617.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2477cfa3ddc7e6e47829905c83f4c8c973bb26f29c913250bf713705f223c85a +size 384 diff --git a/margin_logs/step_0000618.npy b/margin_logs/step_0000618.npy new file mode 100644 index 0000000..88263da --- /dev/null +++ b/margin_logs/step_0000618.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c30992c763dff77d007122d4c146d98a2b21f61bcedf185b28ee0d9c5470bfc +size 384 diff --git a/margin_logs/step_0000619.npy b/margin_logs/step_0000619.npy new file mode 100644 index 0000000..c4ae343 --- /dev/null +++ b/margin_logs/step_0000619.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7752f6e6eafb79813b0fa0313c95db8450cb7b7ce92d2a79d5517bdf4077e7f6 +size 384 diff --git a/margin_logs/step_0000620.npy b/margin_logs/step_0000620.npy new file mode 100644 index 0000000..750b025 --- /dev/null +++ b/margin_logs/step_0000620.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34cd407059cce11e83f1c83e34673f5abb50a38af36b4498b7d272b69f25de8d +size 384 diff --git a/margin_logs/step_0000621.npy b/margin_logs/step_0000621.npy new file mode 100644 index 0000000..416c28e --- /dev/null +++ b/margin_logs/step_0000621.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0c4fc2c8c635f78977c559ff2bcce709a49490412852fc43a9783b65ad7036a +size 384 diff --git a/margin_logs/step_0000622.npy b/margin_logs/step_0000622.npy new file mode 100644 index 0000000..9f1f24f --- /dev/null +++ b/margin_logs/step_0000622.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e461911a6e5dac2bfd20ec158e2f2478bf162c872b58667d125b80f206b1fdc0 +size 384 diff --git a/margin_logs/step_0000623.npy b/margin_logs/step_0000623.npy new file mode 100644 index 0000000..b81c120 --- /dev/null +++ b/margin_logs/step_0000623.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a03d7b01be31eb62e77865d1f66237defeda6fc8236a8582b632c30c3d4dc494 +size 384 diff --git a/margin_logs/step_0000624.npy b/margin_logs/step_0000624.npy new file mode 100644 index 0000000..e0b3f38 --- /dev/null +++ b/margin_logs/step_0000624.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e35fcfe6fd5d9a6f521c934ad72ce8f83f3adf9a339d9519aa6ea84b8180ba93 +size 384 diff --git a/margin_logs/step_0000625.npy b/margin_logs/step_0000625.npy new file mode 100644 index 0000000..45e49a6 --- /dev/null +++ b/margin_logs/step_0000625.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df7bed5a6f48cfb2e9acddde8f07521658ee8f1ab6d00ba201320adaa9b024e7 +size 384 diff --git a/margin_logs/step_0000626.npy b/margin_logs/step_0000626.npy new file mode 100644 index 0000000..cd54cb1 --- /dev/null +++ b/margin_logs/step_0000626.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4bb2dadbb4c2ddf63c79bc461ad65dc317d8d5e0ed645e784b35a4d2a234bc6 +size 384 diff --git a/margin_logs/step_0000627.npy b/margin_logs/step_0000627.npy new file mode 100644 index 0000000..b4ede5d --- /dev/null +++ b/margin_logs/step_0000627.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:412bc2c0f378935978cdc8914f28a5f8ea49d73ad1a05532fb95407c72e35bbe +size 384 diff --git a/margin_logs/step_0000628.npy b/margin_logs/step_0000628.npy new file mode 100644 index 0000000..4cf14b1 --- /dev/null +++ b/margin_logs/step_0000628.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37b9018e0bfe80ff5c566b4b79a5130ecec1a27d70b808c676bab5433cff7062 +size 384 diff --git a/margin_logs/step_0000629.npy b/margin_logs/step_0000629.npy new file mode 100644 index 0000000..6d2790e --- /dev/null +++ b/margin_logs/step_0000629.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a56c938ee6008746825030bef3f4f67ed5b9a0838c4d568979d4eac6e5e0e067 +size 384 diff --git a/margin_logs/step_0000630.npy b/margin_logs/step_0000630.npy new file mode 100644 index 0000000..dd00c49 --- /dev/null +++ b/margin_logs/step_0000630.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecf7d6c4f2cbe45b5351ce6a60a8dd86bfa5aa7e415cbfe1f01a79512aa70d76 +size 384 diff --git a/margin_logs/step_0000631.npy b/margin_logs/step_0000631.npy new file mode 100644 index 0000000..47ad663 --- /dev/null +++ b/margin_logs/step_0000631.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c18887551ed1d867c80390ba49033c1f7dbbfefc818f28455b7c632e8b37c89e +size 384 diff --git a/margin_logs/step_0000632.npy b/margin_logs/step_0000632.npy new file mode 100644 index 0000000..381cd85 --- /dev/null +++ b/margin_logs/step_0000632.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f94eeef4caeae0aec8e163889fe34ed6e1c1e5724aef98f903429d5d6fd0b47 +size 384 diff --git a/margin_logs/step_0000633.npy b/margin_logs/step_0000633.npy new file mode 100644 index 0000000..a73b46a --- /dev/null +++ b/margin_logs/step_0000633.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c4c486dbcbdf20d0e9ef895ff2cc1133bc9fd607ab60c50e33c678801bedac4 +size 384 diff --git a/margin_logs/step_0000634.npy b/margin_logs/step_0000634.npy new file mode 100644 index 0000000..e05d38d --- /dev/null +++ b/margin_logs/step_0000634.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0502087bbc320bc1e951379dfd253ea59cf2d87e8ef7b23de4aa739d91214da0 +size 384 diff --git a/margin_logs/step_0000635.npy b/margin_logs/step_0000635.npy new file mode 100644 index 0000000..877f328 --- /dev/null +++ b/margin_logs/step_0000635.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c1a66e2ed53bdd74f2412b5b1509d649f35c278f77c5199de8ec7c70d423029 +size 384 diff --git a/margin_logs/step_0000636.npy b/margin_logs/step_0000636.npy new file mode 100644 index 0000000..8d19599 --- /dev/null +++ b/margin_logs/step_0000636.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62f2e8015132e0ccfcf0c626ba1f6cb3b9fa977234412ca3652e272b03fb8479 +size 384 diff --git a/margin_logs/step_0000637.npy b/margin_logs/step_0000637.npy new file mode 100644 index 0000000..bc0f6bd --- /dev/null +++ b/margin_logs/step_0000637.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c821224a3c9d2df35129e153fd22ddde8aef11e92312ab4cdbff46852f384ec7 +size 384 diff --git a/margin_logs/step_0000638.npy b/margin_logs/step_0000638.npy new file mode 100644 index 0000000..1d5ae1e --- /dev/null +++ b/margin_logs/step_0000638.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d3c13cd7fadb1fe5887484359b449c79c972945313b910f2de096fa428ac63e +size 384 diff --git a/margin_logs/step_0000639.npy b/margin_logs/step_0000639.npy new file mode 100644 index 0000000..da6904b --- /dev/null +++ b/margin_logs/step_0000639.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9842c352a87f9f4b9b2cbec93d4d9860043864cca69649240954bb0becaea262 +size 384 diff --git a/margin_logs/step_0000640.npy b/margin_logs/step_0000640.npy new file mode 100644 index 0000000..c7dda15 --- /dev/null +++ b/margin_logs/step_0000640.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d4b189b01b29b95750bae0cf489f7bcc145e2d80e85dfcb8d4fd607d38faa11 +size 384 diff --git a/margin_logs/step_0000641.npy b/margin_logs/step_0000641.npy new file mode 100644 index 0000000..cd86e17 --- /dev/null +++ b/margin_logs/step_0000641.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:004c6d1080b775b231b9bffb3293f3985a9a903ec4950630d80bdbc07ca62228 +size 384 diff --git a/margin_logs/step_0000642.npy b/margin_logs/step_0000642.npy new file mode 100644 index 0000000..00f7fa5 --- /dev/null +++ b/margin_logs/step_0000642.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9002e4106ab1966079d9671e7b1a19cfd5ec284472cab6511df552f3d5664b3e +size 384 diff --git a/margin_logs/step_0000643.npy b/margin_logs/step_0000643.npy new file mode 100644 index 0000000..58bc328 --- /dev/null +++ b/margin_logs/step_0000643.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e8c62cdf45535f74efea4dd6634acfeeacaecdec612b3723c9c71a9cdd894fb +size 384 diff --git a/margin_logs/step_0000644.npy b/margin_logs/step_0000644.npy new file mode 100644 index 0000000..79f14c2 --- /dev/null +++ b/margin_logs/step_0000644.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ac57807c302dd237b1249ef4962a439a5782e6aeb37d31f0d52774262bef15b +size 384 diff --git a/margin_logs/step_0000645.npy b/margin_logs/step_0000645.npy new file mode 100644 index 0000000..80f69f9 --- /dev/null +++ b/margin_logs/step_0000645.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a84a771833dbf7ca2e053ed8767c1720d8c99d2766381dd305021bdc0dc8a3b +size 384 diff --git a/margin_logs/step_0000646.npy b/margin_logs/step_0000646.npy new file mode 100644 index 0000000..2528799 --- /dev/null +++ b/margin_logs/step_0000646.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8523b7906586ffc68cd0bfc7ef918bafa825a10387db9a66e232d85b42a78116 +size 384 diff --git a/margin_logs/step_0000647.npy b/margin_logs/step_0000647.npy new file mode 100644 index 0000000..94a4d93 --- /dev/null +++ b/margin_logs/step_0000647.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cd96df4c3398e2e54df8aeeb30ddd7482222adef8bd5e02048d384ef19334c0 +size 384 diff --git a/margin_logs/step_0000648.npy b/margin_logs/step_0000648.npy new file mode 100644 index 0000000..5ae7edf --- /dev/null +++ b/margin_logs/step_0000648.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bf214f85905757425e9b4a8d8f8f97a187b7c9374482e3e8d03841e7c2ca015 +size 384 diff --git a/margin_logs/step_0000649.npy b/margin_logs/step_0000649.npy new file mode 100644 index 0000000..39768d3 --- /dev/null +++ b/margin_logs/step_0000649.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d4a6b71658fca07d94403a9f1a65bddddfb3de04fb77f1fede1b292c854c8ec +size 384 diff --git a/margin_logs/step_0000650.npy b/margin_logs/step_0000650.npy new file mode 100644 index 0000000..9b6d293 --- /dev/null +++ b/margin_logs/step_0000650.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03a71c550f36a3788554d4406f4f2ac7b644b71082c80343fb1d88b151409d43 +size 384 diff --git a/margin_logs/step_0000651.npy b/margin_logs/step_0000651.npy new file mode 100644 index 0000000..9ff0d36 --- /dev/null +++ b/margin_logs/step_0000651.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89d36bd0eeb68486182a4032feaaec121b7d3794f52d1fe0048290b2b856bf8e +size 384 diff --git a/margin_logs/step_0000652.npy b/margin_logs/step_0000652.npy new file mode 100644 index 0000000..cd125d8 --- /dev/null +++ b/margin_logs/step_0000652.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e17fd61330b9c939bd92c3ee9616df262d66e7b2cc38db1bd80faf805db4c59 +size 384 diff --git a/margin_logs/step_0000653.npy b/margin_logs/step_0000653.npy new file mode 100644 index 0000000..72f1c2a --- /dev/null +++ b/margin_logs/step_0000653.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45d1be4da74a614eb3b8ef039ccab44cecb4e9cf54967bd870e86bbfe158e730 +size 384 diff --git a/margin_logs/step_0000654.npy b/margin_logs/step_0000654.npy new file mode 100644 index 0000000..18221c6 --- /dev/null +++ b/margin_logs/step_0000654.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23ec26562cd846f6e2b96d72c2b3217efa4a2ad4ec45fb5b10a22ae6cd95c465 +size 384 diff --git a/margin_logs/step_0000655.npy b/margin_logs/step_0000655.npy new file mode 100644 index 0000000..563749a --- /dev/null +++ b/margin_logs/step_0000655.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7b7351771d567a7ffe24b1701785646c47b9fef5aedd09cf51c6c57db143166 +size 384 diff --git a/margin_logs/step_0000656.npy b/margin_logs/step_0000656.npy new file mode 100644 index 0000000..76c9056 --- /dev/null +++ b/margin_logs/step_0000656.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87fa30a5d1a30172ccf003540b1e700ce6e972248b4959a4482374f9955b091b +size 384 diff --git a/margin_logs/step_0000657.npy b/margin_logs/step_0000657.npy new file mode 100644 index 0000000..e21eac1 --- /dev/null +++ b/margin_logs/step_0000657.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3be87a89d9ec3c3f460e1a3fd2b7c15660520f4d9dda0a6af6238a08043ed273 +size 384 diff --git a/margin_logs/step_0000658.npy b/margin_logs/step_0000658.npy new file mode 100644 index 0000000..3a02080 --- /dev/null +++ b/margin_logs/step_0000658.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d34ebd5d0b4be0310d60e2fdd1074d6e09d2651947f511c44c4dde3f3252582 +size 384 diff --git a/margin_logs/step_0000659.npy b/margin_logs/step_0000659.npy new file mode 100644 index 0000000..e8a57ef --- /dev/null +++ b/margin_logs/step_0000659.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c861b7b7256f0155395bd271ce9926eb28caa4a3f3c81904c8666b27aa329ca9 +size 384 diff --git a/margin_logs/step_0000660.npy b/margin_logs/step_0000660.npy new file mode 100644 index 0000000..bf315d6 --- /dev/null +++ b/margin_logs/step_0000660.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0e64851bb94a4a20691910f19099030556e43c4edfed5f412f749edbab03db2 +size 384 diff --git a/margin_logs/step_0000661.npy b/margin_logs/step_0000661.npy new file mode 100644 index 0000000..964fce8 --- /dev/null +++ b/margin_logs/step_0000661.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:530e6378a959f076c8a2b74b95d270909b278d07fa12d78bdbfcaa018d31a21a +size 384 diff --git a/margin_logs/step_0000662.npy b/margin_logs/step_0000662.npy new file mode 100644 index 0000000..c0d7635 --- /dev/null +++ b/margin_logs/step_0000662.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e299c7aa1fc5004d5d5f495bfa95349499387c0b6be3ab0b9f5d100a8aaa012 +size 384 diff --git a/margin_logs/step_0000663.npy b/margin_logs/step_0000663.npy new file mode 100644 index 0000000..ce56c48 --- /dev/null +++ b/margin_logs/step_0000663.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64e8ab0e4f040dd54b4ab17b9bc76ce5493d5b72153353fcabec04690526a03a +size 384 diff --git a/margin_logs/step_0000664.npy b/margin_logs/step_0000664.npy new file mode 100644 index 0000000..bbc15d5 --- /dev/null +++ b/margin_logs/step_0000664.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5d6a9fceb266a078a651357bf7b3d05c704e350c454b185de070ca664789859 +size 384 diff --git a/margin_logs/step_0000665.npy b/margin_logs/step_0000665.npy new file mode 100644 index 0000000..9c66cf3 --- /dev/null +++ b/margin_logs/step_0000665.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6f411468a6396c8eed759b1504607f504b27eb53063444265f947575c0abe28 +size 384 diff --git a/margin_logs/step_0000666.npy b/margin_logs/step_0000666.npy new file mode 100644 index 0000000..300b7b6 --- /dev/null +++ b/margin_logs/step_0000666.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f977c3956e3f4e9a3929cf12f22f424169fbe4415b293c90de070fdc593a65ac +size 384 diff --git a/margin_logs/step_0000667.npy b/margin_logs/step_0000667.npy new file mode 100644 index 0000000..fa42885 --- /dev/null +++ b/margin_logs/step_0000667.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ae33e35277e7497e273f44ecf9863c00323a72dfe125ba849c0dcce7c4df432 +size 384 diff --git a/margin_logs/step_0000668.npy b/margin_logs/step_0000668.npy new file mode 100644 index 0000000..1f612f3 --- /dev/null +++ b/margin_logs/step_0000668.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9d68e85fb2df2a2ac2fcbc6d4c12a6b70a898d88bd8d632244de2b1a51c34db +size 384 diff --git a/margin_logs/step_0000669.npy b/margin_logs/step_0000669.npy new file mode 100644 index 0000000..24bf3f9 --- /dev/null +++ b/margin_logs/step_0000669.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4b60e874b4af25b6cb034634a2ac864e399765bf7c7a3c474650b02e956d291 +size 384 diff --git a/margin_logs/step_0000670.npy b/margin_logs/step_0000670.npy new file mode 100644 index 0000000..38cecef --- /dev/null +++ b/margin_logs/step_0000670.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43f00765f1f3e28ef8b3f37ee42fe793eb22afa19d8d6bc410408739fbf95d49 +size 384 diff --git a/margin_logs/step_0000671.npy b/margin_logs/step_0000671.npy new file mode 100644 index 0000000..35ad579 --- /dev/null +++ b/margin_logs/step_0000671.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad803eae92299f974c50ad9e84f0c15092b6e9f5a1d9840207b7dcb69064f8f8 +size 384 diff --git a/margin_logs/step_0000672.npy b/margin_logs/step_0000672.npy new file mode 100644 index 0000000..19cb4c8 --- /dev/null +++ b/margin_logs/step_0000672.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:923b512d7db8656fcccc1ad6bbe141a3d8667383cd70ccd1fe48df33236bd641 +size 384 diff --git a/margin_logs/step_0000673.npy b/margin_logs/step_0000673.npy new file mode 100644 index 0000000..a90d2cb --- /dev/null +++ b/margin_logs/step_0000673.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0df520af62d5e628f2d1e2c247074c0b7648e9dbcae1ff6fc5375e9c50c7ac09 +size 384 diff --git a/margin_logs/step_0000674.npy b/margin_logs/step_0000674.npy new file mode 100644 index 0000000..727217a --- /dev/null +++ b/margin_logs/step_0000674.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94fba58dce2db7dcbe40196ee966c30bf84d67c6eef0da4cfebb7556a7cf9f26 +size 384 diff --git a/margin_logs/step_0000675.npy b/margin_logs/step_0000675.npy new file mode 100644 index 0000000..2045cc3 --- /dev/null +++ b/margin_logs/step_0000675.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c0743f264059fe7aa79b20167c956abe3bd243708584e2d6e68f2da54589c10 +size 384 diff --git a/margin_logs/step_0000676.npy b/margin_logs/step_0000676.npy new file mode 100644 index 0000000..d27a980 --- /dev/null +++ b/margin_logs/step_0000676.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00fae2c703efdcc137985ae0acedc0dc076eda961380ed00a8151ab1b6a4f62a +size 384 diff --git a/margin_logs/step_0000677.npy b/margin_logs/step_0000677.npy new file mode 100644 index 0000000..263945c --- /dev/null +++ b/margin_logs/step_0000677.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fce091c8ff2dffa743a8560326d04b3a986e21556dd6b1a922babb2ef8f1881a +size 384 diff --git a/margin_logs/step_0000678.npy b/margin_logs/step_0000678.npy new file mode 100644 index 0000000..93ac607 --- /dev/null +++ b/margin_logs/step_0000678.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:565f4ab620a8b43807f50718a8520a1dd3a84b037febe905d46e0e3c18b56d39 +size 384 diff --git a/margin_logs/step_0000679.npy b/margin_logs/step_0000679.npy new file mode 100644 index 0000000..fea415e --- /dev/null +++ b/margin_logs/step_0000679.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2431c39cc5046a61b8d8d637ec1dd9441272ad4bfc8bf0fee7501855e925c406 +size 384 diff --git a/margin_logs/step_0000680.npy b/margin_logs/step_0000680.npy new file mode 100644 index 0000000..27a29e5 --- /dev/null +++ b/margin_logs/step_0000680.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2c97b35fad695a71eb1e9b74472fc2f137a458887db09de670ddacba18585a6 +size 384 diff --git a/margin_logs/step_0000681.npy b/margin_logs/step_0000681.npy new file mode 100644 index 0000000..be671db --- /dev/null +++ b/margin_logs/step_0000681.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22fb3cf34f660066b175eb7affa05c63beab75e22d5a91ea5340e4cbdd90882d +size 384 diff --git a/model-00001-of-00007.safetensors b/model-00001-of-00007.safetensors new file mode 100644 index 0000000..f01b6bb --- /dev/null +++ b/model-00001-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7fa8b130190df32470961824e586144df42d0e21f5289094d50036508051baf +size 4886466168 diff --git a/model-00002-of-00007.safetensors b/model-00002-of-00007.safetensors new file mode 100644 index 0000000..66fb894 --- /dev/null +++ b/model-00002-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff3919a258d2f8ee3e12f1d70746ac07d4d003a7bd5c4aee2481e1206cb94e5b +size 4832007448 diff --git a/model-00003-of-00007.safetensors b/model-00003-of-00007.safetensors new file mode 100644 index 0000000..03d5adf --- /dev/null +++ b/model-00003-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f2c198749dc71644fd62c5aa91c8f503bd08a9014572a3eca4307930d7d51d8 +size 4999813112 diff --git a/model-00004-of-00007.safetensors b/model-00004-of-00007.safetensors new file mode 100644 index 0000000..8fc26a3 --- /dev/null +++ b/model-00004-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67cbb2a1a8bcb0e0f97374003df983cac26fd592be2789e463d0bf192957869b +size 4999813128 diff --git a/model-00005-of-00007.safetensors b/model-00005-of-00007.safetensors new file mode 100644 index 0000000..d1fa55d --- /dev/null +++ b/model-00005-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c98262ca27e0cbf4e68587f389083a8a29b493d2bc6ed87707bf13489919dc8 +size 4832007496 diff --git a/model-00006-of-00007.safetensors b/model-00006-of-00007.safetensors new file mode 100644 index 0000000..c665bea --- /dev/null +++ b/model-00006-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6def3c81f374680c374063f1c6fc9c66ad65da9582372af8ad1cfd8a919e3b64 +size 4999813120 diff --git a/model-00007-of-00007.safetensors b/model-00007-of-00007.safetensors new file mode 100644 index 0000000..8ca1f9c --- /dev/null +++ b/model-00007-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05c09c49800e66be88b88402b54072d4c6137648a4b0d5694cec54b9975db3d8 +size 2571158184 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..0985084 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,298 @@ +{ + "metadata": { + "total_size": 32121044992 + }, + "weight_map": { + "lm_head.weight": "model-00007-of-00007.safetensors", + "model.embed_tokens.weight": "model-00001-of-00007.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.10.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.15.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.20.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.21.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.26.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.3.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.30.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.input_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.4.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.9.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.norm.weight": "model-00007-of-00007.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..e5b39b6 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..86a3394 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..8c6916a --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 2048, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/train.log b/train.log new file mode 100644 index 0000000..41ce57d --- /dev/null +++ b/train.log @@ -0,0 +1,1160 @@ +2026-04-29 16:44:26 - INFO - __main__ - Model parameters ModelArguments(base_model_revision=None, model_name_or_path='/workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200', model_revision='main', model_code_revision=None, torch_dtype='bfloat16', tokenizer_name_or_path=None, trust_remote_code=False, attn_implementation='flash_attention_2', use_peft=False, lora_r=16, lora_alpha=32, lora_dropout=0.05, lora_target_modules=None, lora_modules_to_save=None, load_in_8bit=False, load_in_4bit=False, bnb_4bit_quant_type='nf4', use_bnb_nested_quant=False, bnb_4bit_quant_storage='uint8') +2026-04-29 16:44:26 - INFO - __main__ - Data parameters DataArguments(chat_template=None, dataset_mixer={'Anthropic/hh-rlhf': 1.0}, text_column='text', dataset_splits=['train'], dataset_configs=['helpful-base'], dataset_dir=None, preprocessing_num_workers=12, use_persistent_hf_cache=True, hf_cache_dir='/workspace/dynamic-dpo-v4/hf/datasets', truncation_side=None, auto_insert_empty_system_msg=True, disable_thinking=False, preprocessing_log_samples=0, preprocessing_log_dir=None) +2026-04-29 16:44:26 - INFO - __main__ - Training/evaluation parameters NewDPOConfig( +_n_gpu=1, +accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False}, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +average_tokens_across_devices=False, +batch_eval_metrics=False, +beta=0.5, +bf16=True, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=True, +dataloader_num_workers=0, +dataloader_persistent_workers=False, +dataloader_pin_memory=True, +dataloader_prefetch_factor=None, +dataset_num_proc=12, +ddp_backend=None, +ddp_broadcast_buffers=None, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +ddp_timeout=1800, +debug=[], +deepspeed=None, +disable_dropout=True, +disable_tqdm=False, +do_eval=False, +do_predict=False, +do_train=False, +eta=0.1, +eval_accumulation_steps=None, +eval_delay=0, +eval_do_concat_batches=True, +eval_on_start=False, +eval_steps=200, +eval_strategy=IntervalStrategy.NO, +eval_use_gather_object=False, +f_alpha_divergence_coef=1.0, +f_divergence_type=reverse_kl, +force_use_ref_model=False, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +generate_during_eval=False, +gradient_accumulation_steps=2, +gradient_checkpointing=True, +gradient_checkpointing_kwargs={'use_reentrant': False}, +greater_is_better=None, +group_by_length=False, +half_precision_backend=auto, +hub_always_push=False, +hub_margin_dataset_id=None, +hub_model_id=W-61/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449, +hub_model_revision=main, +hub_private_repo=None, +hub_strategy=HubStrategy.EVERY_SAVE, +hub_token=, +ignore_data_skip=False, +include_for_metrics=[], +include_inputs_for_metrics=False, +include_num_input_tokens_seen=False, +include_tokens_per_second=False, +is_encoder_decoder=None, +jit_mode_eval=False, +label_names=None, +label_pad_token_id=-100, +label_smoothing=0.0, +label_smoothing_factor=0.0, +learning_rate=5e-07, +length_column_name=length, +load_best_model_at_end=False, +local_rank=0, +log_level=info, +log_level_replica=warning, +log_on_each_node=True, +logging_dir=/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/runs/Apr29_16-44-26_bc4ce3cd7c4e, +logging_first_step=True, +logging_nan_inf_filter=True, +logging_steps=1, +logging_strategy=IntervalStrategy.STEPS, +loss_type=sigmoid, +lr_scheduler_kwargs={}, +lr_scheduler_type=SchedulerType.COSINE, +margin_dataset_private=None, +margin_dataset_split=train, +margin_log_path=/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs, +margin_log_steps=1, +margin_save_full=True, +max_grad_norm=1.0, +max_length=512, +max_prompt_length=256, +max_steps=-1, +max_target_length=None, +metric_for_best_model=None, +model_adapter_name=None, +model_init_kwargs=None, +mp_parameters=, +neftune_noise_alpha=None, +no_cuda=False, +non_finite_logits_handling=error, +num_train_epochs=1, +optim=OptimizerNames.ADAMW_TORCH, +optim_args=None, +optim_target_modules=None, +output_dir=/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449, +overwrite_output_dir=False, +padding_value=None, +past_index=-1, +per_device_eval_batch_size=8, +per_device_train_batch_size=8, +post_tokenization_log_dir=None, +post_tokenization_log_samples=0, +precompute_ref_batch_size=None, +precompute_ref_eval_batch_size=None, +precompute_ref_log_probs=False, +prediction_loss_only=False, +push_margin_dataset=False, +push_to_hub=False, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +q_target=0.45, +ray_scope=last, +ref_adapter_name=None, +ref_model_init_kwargs=None, +ref_model_mixup_alpha=0.9, +ref_model_sync_steps=64, +reference_free=False, +remove_unused_columns=False, +report_to=['wandb'], +require_explicit_ref_model=True, +restore_callback_states_from_checkpoint=False, +resume_from_checkpoint=None, +reuse_tokenized_dataset=True, +rpo_alpha=None, +run_name=llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449, +s_star=0.4, +save_hf_model_artifacts=True, +save_on_each_node=False, +save_only_model=False, +save_safetensors=True, +save_steps=50, +save_strategy=SaveStrategy.NO, +save_total_limit=2, +seed=42, +sft_weight=0.0, +skip_memory_metrics=True, +sync_ref_model=False, +tf32=None, +tokenization_batch_size=128, +tokenization_mode=online, +tokenized_dataset_cache_dir=/workspace/dynamic-dpo-v4/tokenized_preferences, +torch_compile=False, +torch_compile_backend=None, +torch_compile_mode=None, +torch_empty_cache_steps=None, +torchdynamo=None, +tp_size=0, +tpu_metrics_debug=False, +tpu_num_cores=None, +trainer_type=new_dpo, +truncation_mode=keep_end, +use_cpu=False, +use_ipex=False, +use_legacy_prediction_loop=False, +use_liger_kernel=False, +use_mps_device=False, +wandb_project=llama3-hh-new-dpo-multi-beta-sweep, +warmup_ratio=0.1, +warmup_steps=0, +weight_decay=0.0, +) +2026-04-29 16:44:26 - INFO - __main__ - Using W&B project from training args: llama3-hh-new-dpo-multi-beta-sweep +wandb: Currently logged in as: can-not-fand (can-not-fand-northeastern-university). Use `wandb login --relogin` to force relogin +wandb: wandb version 0.26.1 is available! To upgrade, please run: +wandb: $ pip install wandb --upgrade +wandb: Tracking run with wandb version 0.17.5 +wandb: Run data is saved locally in /workspace/dynamic-dpo-v4/wandb/wandb/run-20260429_164429-ypz8eup0 +wandb: Run `wandb offline` to turn off syncing. +wandb: Syncing run llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449 +wandb: ⭐️ View project at https://wandb.ai/can-not-fand-northeastern-university/llama3-hh-new-dpo-multi-beta-sweep +wandb: 🚀 View run at https://wandb.ai/can-not-fand-northeastern-university/llama3-hh-new-dpo-multi-beta-sweep/runs/ypz8eup0 +2026-04-29 16:44:30 - INFO - __main__ - New-DPO parameters: beta=0.5, q_target=0.45, s_star=0.4, eta=0.1 +2026-04-29 16:44:30 - INFO - __main__ - Using persistent HF datasets cache at /workspace/dynamic-dpo-v4/hf/datasets + Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Normalizing raw HH preferences (train): 96%|█████████▌| 41655/43598 [00:03<00:00, 11388.52 examples/s] Normalizing raw HH preferences (train): 18%|█▊ | 8000/43598 [00:00<00:03, 9212.77 examples/s] Loading checkpoint shards: 0%| | 0/7 [00:00> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. + Normalizing raw HH preferences (train): 98%|█████████▊| 42808/43598 [00:03<00:00, 11425.06 examples/s] Normalizing raw HH preferences (train): 21%|██ | 9136/43598 [00:00<00:03, 9807.15 examples/s] Normalizing raw HH preferences (train): 24%|██▎ | 10293/43598 [00:01<00:03, 10305.92 examples/s] Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10679.29 examples/s] + Normalizing raw HH preferences (train): 26%|██▌ | 11437/43598 [00:01<00:03, 10632.34 examples/s] Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10623.72 examples/s] + Normalizing raw HH preferences (train): 29%|██▉ | 12649/43598 [00:01<00:02, 10936.72 examples/s] Normalizing raw HH preferences (train): 32%|███▏ | 13810/43598 [00:01<00:02, 11131.05 examples/s] Normalizing raw HH preferences (train): 34%|███▍ | 14965/43598 [00:01<00:02, 11252.16 examples/s] Normalizing raw HH preferences (train): 38%|███▊ | 16664/43598 [00:01<00:02, 11277.60 examples/s]/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you. + warnings.warn( +[WARNING|logging.py:328] 2026-04-29 16:44:37,094 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you. + warnings.warn( + Loading checkpoint shards: 0%| | 0/7 [00:00> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 696.40it/s] + Loading checkpoint shards: 0%| | 0/7 [00:00> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. + Loading checkpoint shards: 0%| | 0/7 [00:00> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. + Normalizing raw HH preferences (train): 45%|████▍ | 19405/43598 [00:01<00:02, 11147.44 examples/s] Normalizing raw HH preferences (train): 47%|████▋ | 20640/43598 [00:01<00:02, 11287.82 examples/s] Normalizing raw HH preferences (train): 50%|████▉ | 21782/43598 [00:02<00:01, 11320.13 examples/s] Normalizing raw HH preferences (train): 53%|█████▎ | 22943/43598 [00:02<00:01, 11396.75 examples/s] Normalizing raw HH preferences (train): 57%|█████▋ | 24643/43598 [00:02<00:01, 11346.63 examples/s] Normalizing raw HH preferences (train): 59%|█████▉ | 25793/43598 [00:02<00:01, 11383.46 examples/s] Normalizing raw HH preferences (train): 63%|██████▎ | 27449/43598 [00:02<00:01, 11257.61 examples/s] Normalizing raw HH preferences (train): 66%|██████▌ | 28654/43598 [00:02<00:01, 11368.47 examples/s] Normalizing raw HH preferences (train): 68%|██████▊ | 29838/43598 [00:02<00:01, 11491.01 examples/s] Normalizing raw HH preferences (train): 71%|███████ | 31000/43598 [00:02<00:01, 11273.29 examples/s] Normalizing raw HH preferences (train): 74%|███████▍ | 32171/43598 [00:02<00:01, 11392.00 examples/s] Normalizing raw HH preferences (train): 76%|███████▋ | 33323/43598 [00:03<00:00, 11425.39 examples/s] Normalizing raw HH preferences (train): 79%|███████▉ | 34477/43598 [00:03<00:00, 11456.63 examples/s] Normalizing raw HH preferences (train): 82%|████████▏ | 35642/43598 [00:03<00:00, 11498.14 examples/s] Normalizing raw HH preferences (train): 84%|████████▍ | 36796/43598 [00:03<00:00, 11508.20 examples/s] Normalizing raw HH preferences (train): 88%|████████▊ | 38451/43598 [00:03<00:00, 11321.16 examples/s] Normalizing raw HH preferences (train): 91%|█████████ | 39649/43598 [00:03<00:00, 11425.26 examples/s] Normalizing raw HH preferences (train): 94%|█████████▎| 40803/43598 [00:03<00:00, 11456.16 examples/s] Normalizing raw HH preferences (train): 96%|█████████▋| 41965/43598 [00:03<00:00, 11501.01 examples/s] Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:03<00:00, 10957.74 examples/s] Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:03<00:00, 10956.90 examples/s] +2026-04-29 16:44:39 - INFO - __main__ - Training on the following splits: ['train : 43598'] +[INFO|tokenization_utils_base.py:2058] 2026-04-29 16:44:39,433 >> loading file tokenizer.json +[INFO|tokenization_utils_base.py:2058] 2026-04-29 16:44:39,433 >> loading file tokenizer.model +[INFO|tokenization_utils_base.py:2058] 2026-04-29 16:44:39,433 >> loading file added_tokens.json +[INFO|tokenization_utils_base.py:2058] 2026-04-29 16:44:39,433 >> loading file special_tokens_map.json +[INFO|tokenization_utils_base.py:2058] 2026-04-29 16:44:39,433 >> loading file tokenizer_config.json +[INFO|tokenization_utils_base.py:2058] 2026-04-29 16:44:39,433 >> loading file chat_template.jinja +[INFO|tokenization_utils_base.py:2323] 2026-04-29 16:44:39,824 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. +2026-04-29 16:44:39 - INFO - __main__ - Processed train sample 41905: + +Prompt: +<|begin_of_text|><|start_header_id|>user<|end_header_id|> + +What is the best way to clean my refrigerator?<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +Well, I have a few ideas, but the best way to know what works best is to run some tests and then analyze the data to figure out which technique works best.<|eot_id|><|start_header_id|>user<|end_header_id|> + +What are your ideas?<|eot_id|> + +Chosen: +<|start_header_id|>assistant<|end_header_id|> + +Some of my ideas include wiping down the interior with a rag, using bleach and water to wipe down the door, emptying the contents of the fridge to scrub the shelves with a clean cloth, cleaning the interior of the fridge with vinegar and water, and using a vacuum cleaner to clean the interior.<|eot_id|> + +Rejected: +<|start_header_id|>assistant<|end_header_id|> + +I was thinking that you could try different types of soap, which could be a variation of “soap,” like antibacterial soap. I was also thinking that you could try using different tools, like a toothbrush, to clean the inside. Or I was thinking that you could try different ways to get the gunk out, like using a squeegee, or using a paper towel. I was also thinking you could try other types of cleaning, like vacuuming, but I think that could have the opposite of the desired effect.<|eot_id|> +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you. + warnings.warn( +[INFO|configuration_utils.py:691] 2026-04-29 16:44:39,939 >> loading configuration file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/config.json +[INFO|configuration_utils.py:765] 2026-04-29 16:44:39,940 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.0", + "use_cache": false, + "vocab_size": 128256 +} + +[INFO|modeling_utils.py:1121] 2026-04-29 16:44:39,951 >> loading weights file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/model.safetensors.index.json +[INFO|modeling_utils.py:2167] 2026-04-29 16:44:39,952 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16. +[WARNING|logging.py:328] 2026-04-29 16:44:39,955 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[INFO|configuration_utils.py:1142] 2026-04-29 16:44:39,956 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "eos_token_id": 128001, + "use_cache": false +} + + Loading checkpoint shards: 0%| | 0/7 [00:00> All model checkpoint weights were used when initializing LlamaForCausalLM. + +[INFO|modeling_utils.py:4934] 2026-04-29 16:44:51,592 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200. +If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. +[INFO|configuration_utils.py:1095] 2026-04-29 16:44:51,594 >> loading configuration file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/generation_config.json +[INFO|configuration_utils.py:1142] 2026-04-29 16:44:51,595 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9 +} + +[INFO|configuration_utils.py:691] 2026-04-29 16:44:51,597 >> loading configuration file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/config.json +[INFO|configuration_utils.py:765] 2026-04-29 16:44:51,597 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.0", + "use_cache": false, + "vocab_size": 128256 +} + +[INFO|modeling_utils.py:1121] 2026-04-29 16:44:51,598 >> loading weights file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/model.safetensors.index.json +[INFO|modeling_utils.py:2167] 2026-04-29 16:44:51,599 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16. +[INFO|configuration_utils.py:1142] 2026-04-29 16:44:51,603 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "eos_token_id": 128001, + "use_cache": false +} + + Loading checkpoint shards: 0%| | 0/7 [00:00> All model checkpoint weights were used when initializing LlamaForCausalLM. + +[INFO|modeling_utils.py:4934] 2026-04-29 16:45:02,951 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200. +If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. +[INFO|configuration_utils.py:1095] 2026-04-29 16:45:02,954 >> loading configuration file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/generation_config.json +[INFO|configuration_utils.py:1142] 2026-04-29 16:45:02,955 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9 +} + +[WARNING|trainer.py:821] 2026-04-29 16:45:02,956 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. +[WARNING|trainer.py:816] 2026-04-29 16:45:02,956 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-29 16:45:02,968 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:522: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `NewDPOTrainer.__init__`. Use `processing_class` instead. + super().__init__( +[WARNING|trainer.py:816] 2026-04-29 16:45:04,398 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-29 16:45:04,398 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-29 16:45:04,398 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-29 16:45:04,413 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:522: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `NewDPOTrainer.__init__`. Use `processing_class` instead. + super().__init__( +[WARNING|trainer.py:816] 2026-04-29 16:45:04,417 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:522: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `NewDPOTrainer.__init__`. Use `processing_class` instead. + super().__init__( +[WARNING|trainer.py:816] 2026-04-29 16:45:04,423 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:522: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `NewDPOTrainer.__init__`. Use `processing_class` instead. + super().__init__( +[INFO|trainer.py:748] 2026-04-29 16:45:04,658 >> Using auto half precision backend +/workspace/dynamic-dpo-v4/.venv/lib/python3.11/site-packages/accelerate/accelerator.py:1557: UserWarning: Upcasted low precision parameters in LlamaForCausalLM because mixed precision turned on in FSDP. Affects: model.embed_tokens.weight, model.norm.weight, lm_head.weight. + warnings.warn( +/workspace/dynamic-dpo-v4/.venv/lib/python3.11/site-packages/accelerate/accelerator.py:1557: UserWarning: Upcasted low precision parameters in LlamaDecoderLayer because mixed precision turned on in FSDP. Affects: self_attn.q_proj.weight, self_attn.k_proj.weight, self_attn.v_proj.weight, self_attn.o_proj.weight, mlp.gate_proj.weight, mlp.up_proj.weight, mlp.down_proj.weight, input_layernorm.weight, post_attention_layernorm.weight. + warnings.warn( +/workspace/dynamic-dpo-v4/.venv/lib/python3.11/site-packages/accelerate/accelerator.py:1563: UserWarning: FSDP upcast of low precision parameters may affect the precision of model checkpoints. + warnings.warn( +[INFO|trainer.py:2414] 2026-04-29 16:45:12,880 >> ***** Running training ***** +[INFO|trainer.py:2415] 2026-04-29 16:45:12,881 >> Num examples = 43,598 +[INFO|trainer.py:2416] 2026-04-29 16:45:12,881 >> Num Epochs = 1 +[INFO|trainer.py:2417] 2026-04-29 16:45:12,881 >> Instantaneous batch size per device = 8 +[INFO|trainer.py:2420] 2026-04-29 16:45:12,881 >> Total train batch size (w. parallel, distributed & accumulation) = 64 +[INFO|trainer.py:2421] 2026-04-29 16:45:12,881 >> Gradient Accumulation steps = 2 +[INFO|trainer.py:2422] 2026-04-29 16:45:12,881 >> Total optimization steps = 681 +[INFO|trainer.py:2423] 2026-04-29 16:45:12,882 >> Number of trainable parameters = 2,007,565,312 +[INFO|integration_utils.py:831] 2026-04-29 16:45:12,883 >> Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 0%| | 0/681 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:1713] 2026-04-29 16:45:14,478 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:1713] 2026-04-29 16:45:14,488 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:1713] 2026-04-29 16:45:14,493 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 1/681 [00:02<31:08, 2.75s/it] {'loss': 1.4087, 'grad_norm': 420.2432861328125, 'learning_rate': 0.0, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.5027250051498413, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.02287006378173828, 'margin_dpo/margin_mean': -0.02287048101425171, 'margin_dpo/margin_std': 0.41920793056488037, 'logps/chosen': -50.1435661315918, 'logps/rejected': -74.09991455078125, 'logps/ref_chosen': -50.14883804321289, 'logps/ref_rejected': -74.1280517578125, 'KL/chosen_KL_mean': 0.00527191162109375, 'KL/rejected_KL_mean': 0.028141021728515625, 'KL/mean': 0.016706019639968872, 'KL/std': 0.272699236869812, 'logits/chosen': -0.4974287748336792, 'logits/rejected': -0.43299180269241333, 'epoch': 0.0} + 0%| | 1/681 [00:02<31:08, 2.75s/it] 0%| | 2/681 [00:05<29:36, 2.62s/it] {'loss': 1.4271, 'grad_norm': 364.62652587890625, 'learning_rate': 7.246376811594203e-09, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.5081548094749451, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.06572261452674866, 'margin_dpo/margin_mean': -0.06572240591049194, 'margin_dpo/margin_std': 0.35048407316207886, 'logps/chosen': -52.65568923950195, 'logps/rejected': -75.27340698242188, 'logps/ref_chosen': -52.620704650878906, 'logps/ref_rejected': -75.30413818359375, 'KL/chosen_KL_mean': -0.03498649597167969, 'KL/rejected_KL_mean': 0.030735015869140625, 'KL/mean': -0.00212840735912323, 'KL/std': 0.24797174334526062, 'logits/chosen': -0.49536412954330444, 'logits/rejected': -0.4594460427761078, 'epoch': 0.0} + 0%| | 2/681 [00:05<29:36, 2.62s/it] 0%| | 3/681 [00:07<29:21, 2.60s/it] {'loss': 1.362, 'grad_norm': 347.2252197265625, 'learning_rate': 1.4492753623188406e-08, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.4915676712989807, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.06905469298362732, 'margin_dpo/margin_mean': 0.06905469298362732, 'margin_dpo/margin_std': 0.3988131284713745, 'logps/chosen': -60.929290771484375, 'logps/rejected': -68.6893539428711, 'logps/ref_chosen': -60.981597900390625, 'logps/ref_rejected': -68.67259216308594, 'KL/chosen_KL_mean': 0.052303314208984375, 'KL/rejected_KL_mean': -0.016756057739257812, 'KL/mean': 0.017774119973182678, 'KL/std': 0.28824305534362793, 'logits/chosen': -0.4816562235355377, 'logits/rejected': -0.44209641218185425, 'epoch': 0.0} + 0%| | 3/681 [00:07<29:21, 2.60s/it] 1%| | 4/681 [00:10<29:32, 2.62s/it] {'loss': 1.4044, 'grad_norm': 359.3165588378906, 'learning_rate': 2.1739130434782606e-08, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.5027137994766235, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.021249920129776, 'margin_dpo/margin_mean': -0.02125033736228943, 'margin_dpo/margin_std': 0.33959275484085083, 'logps/chosen': -56.789520263671875, 'logps/rejected': -86.64767456054688, 'logps/ref_chosen': -56.7677116394043, 'logps/ref_rejected': -86.64710998535156, 'KL/chosen_KL_mean': -0.021808624267578125, 'KL/rejected_KL_mean': -0.000560760498046875, 'KL/mean': -0.011183008551597595, 'KL/std': 0.250108003616333, 'logits/chosen': -0.4682066738605499, 'logits/rejected': -0.44051969051361084, 'epoch': 0.01} + 1%| | 4/681 [00:10<29:32, 2.62s/it] 1%| | 5/681 [00:13<29:24, 2.61s/it] {'loss': 1.3765, 'grad_norm': 448.5081481933594, 'learning_rate': 2.898550724637681e-08, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.4954211413860321, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.036554425954818726, 'margin_dpo/margin_mean': 0.03655460476875305, 'margin_dpo/margin_std': 0.3572620153427124, 'logps/chosen': -53.81924057006836, 'logps/rejected': -84.14559936523438, 'logps/ref_chosen': -53.859375, 'logps/ref_rejected': -84.14918518066406, 'KL/chosen_KL_mean': 0.040134429931640625, 'KL/rejected_KL_mean': 0.003582000732421875, 'KL/mean': 0.021857306361198425, 'KL/std': 0.26523804664611816, 'logits/chosen': -0.49668556451797485, 'logits/rejected': -0.45167264342308044, 'epoch': 0.01} + 1%| | 5/681 [00:13<29:24, 2.61s/it] 1%| | 6/681 [00:15<27:49, 2.47s/it] {'loss': 1.4172, 'grad_norm': 474.76165771484375, 'learning_rate': 3.6231884057971014e-08, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.5049124956130981, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.04002267122268677, 'margin_dpo/margin_mean': -0.04002311825752258, 'margin_dpo/margin_std': 0.41552552580833435, 'logps/chosen': -63.018836975097656, 'logps/rejected': -92.61666870117188, 'logps/ref_chosen': -63.007484436035156, 'logps/ref_rejected': -92.64534759521484, 'KL/chosen_KL_mean': -0.011350631713867188, 'KL/rejected_KL_mean': 0.028675079345703125, 'KL/mean': 0.008662402629852295, 'KL/std': 0.28275883197784424, 'logits/chosen': -0.5011003613471985, 'logits/rejected': -0.4586023688316345, 'epoch': 0.01} + 1%| | 6/681 [00:15<27:49, 2.47s/it] 1%| | 7/681 [00:17<27:13, 2.42s/it] {'loss': 1.3777, 'grad_norm': 406.9675598144531, 'learning_rate': 4.347826086956521e-08, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.49542200565338135, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.0368523895740509, 'margin_dpo/margin_mean': 0.03685298562049866, 'margin_dpo/margin_std': 0.3953211307525635, 'logps/chosen': -57.75729751586914, 'logps/rejected': -103.93992614746094, 'logps/ref_chosen': -57.774818420410156, 'logps/ref_rejected': -103.92059326171875, 'KL/chosen_KL_mean': 0.017522811889648438, 'KL/rejected_KL_mean': -0.0193328857421875, 'KL/mean': -0.0009044557809829712, 'KL/std': 0.27743956446647644, 'logits/chosen': -0.5030827522277832, 'logits/rejected': -0.4692496657371521, 'epoch': 0.01} + 1%| | 7/681 [00:17<27:13, 2.42s/it] 1%| | 8/681 [00:19<26:55, 2.40s/it] {'loss': 1.4225, 'grad_norm': 401.7236328125, 'learning_rate': 5.0724637681159424e-08, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.5070033073425293, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.05733850598335266, 'margin_dpo/margin_mean': -0.05733811855316162, 'margin_dpo/margin_std': 0.3359847962856293, 'logps/chosen': -58.7501220703125, 'logps/rejected': -79.28817749023438, 'logps/ref_chosen': -58.716033935546875, 'logps/ref_rejected': -79.3114242553711, 'KL/chosen_KL_mean': -0.03408622741699219, 'KL/rejected_KL_mean': 0.023250579833984375, 'KL/mean': -0.005419567227363586, 'KL/std': 0.2861067056655884, 'logits/chosen': -0.5170855522155762, 'logits/rejected': -0.4922248125076294, 'epoch': 0.01} + 1%| | 8/681 [00:20<26:55, 2.40s/it] 1%|▏ | 9/681 [00:22<27:27, 2.45s/it] {'loss': 1.3925, 'grad_norm': 423.5147705078125, 'learning_rate': 5.797101449275362e-08, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.49866122007369995, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.010491371154785156, 'margin_dpo/margin_mean': 0.010491013526916504, 'margin_dpo/margin_std': 0.42117273807525635, 'logps/chosen': -69.88443756103516, 'logps/rejected': -99.63075256347656, 'logps/ref_chosen': -69.8668441772461, 'logps/ref_rejected': -99.6026611328125, 'KL/chosen_KL_mean': -0.017595291137695312, 'KL/rejected_KL_mean': -0.02808380126953125, 'KL/mean': -0.022840231657028198, 'KL/std': 0.28952154517173767, 'logits/chosen': -0.5013039708137512, 'logits/rejected': -0.45518267154693604, 'epoch': 0.01} + 1%|▏ | 9/681 [00:22<27:27, 2.45s/it] 1%|▏ | 10/681 [00:25<27:34, 2.47s/it] {'loss': 1.4024, 'grad_norm': 353.2280578613281, 'learning_rate': 6.521739130434782e-08, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.5018855333328247, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.015163615345954895, 'margin_dpo/margin_mean': -0.015163183212280273, 'margin_dpo/margin_std': 0.35796934366226196, 'logps/chosen': -48.340641021728516, 'logps/rejected': -80.33985137939453, 'logps/ref_chosen': -48.35768508911133, 'logps/ref_rejected': -80.37206268310547, 'KL/chosen_KL_mean': 0.0170440673828125, 'KL/rejected_KL_mean': 0.0322113037109375, 'KL/mean': 0.02462557703256607, 'KL/std': 0.2662718594074249, 'logits/chosen': -0.4687877297401428, 'logits/rejected': -0.42438995838165283, 'epoch': 0.01} + 1%|▏ | 10/681 [00:25<27:34, 2.47s/it] 2%|▏ | 11/681 [00:27<28:18, 2.53s/it] {'loss': 1.3821, 'grad_norm': 344.31915283203125, 'learning_rate': 7.246376811594203e-08, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.49689868092536926, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.024578213691711426, 'margin_dpo/margin_mean': 0.024578243494033813, 'margin_dpo/margin_std': 0.3401423990726471, 'logps/chosen': -52.995601654052734, 'logps/rejected': -87.78370666503906, 'logps/ref_chosen': -53.01685333251953, 'logps/ref_rejected': -87.78038024902344, 'KL/chosen_KL_mean': 0.021253585815429688, 'KL/rejected_KL_mean': -0.003330230712890625, 'KL/mean': 0.008961886167526245, 'KL/std': 0.24493196606636047, 'logits/chosen': -0.4556809365749359, 'logits/rejected': -0.43051183223724365, 'epoch': 0.02} + 2%|▏ | 11/681 [00:27<28:18, 2.53s/it] 2%|▏ | 12/681 [00:30<28:31, 2.56s/it] {'loss': 1.395, 'grad_norm': 445.4076843261719, 'learning_rate': 7.971014492753623e-08, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.4991750717163086, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.0059018731117248535, 'margin_dpo/margin_mean': 0.005901157855987549, 'margin_dpo/margin_std': 0.43184518814086914, 'logps/chosen': -61.876739501953125, 'logps/rejected': -104.93547058105469, 'logps/ref_chosen': -61.80543518066406, 'logps/ref_rejected': -104.8582763671875, 'KL/chosen_KL_mean': -0.0713043212890625, 'KL/rejected_KL_mean': -0.07719802856445312, 'KL/mean': -0.07425594329833984, 'KL/std': 0.2839137315750122, 'logits/chosen': -0.5402117967605591, 'logits/rejected': -0.5041322708129883, 'epoch': 0.02} + 2%|▏ | 12/681 [00:30<28:31, 2.56s/it] 2%|▏ | 13/681 [00:33<28:54, 2.60s/it] {'loss': 1.3929, 'grad_norm': 399.35504150390625, 'learning_rate': 8.695652173913042e-08, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.4988465905189514, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.009646743535995483, 'margin_dpo/margin_mean': 0.009646564722061157, 'margin_dpo/margin_std': 0.4087739586830139, 'logps/chosen': -64.24199676513672, 'logps/rejected': -87.19436645507812, 'logps/ref_chosen': -64.2603530883789, 'logps/ref_rejected': -87.20307922363281, 'KL/chosen_KL_mean': 0.018360137939453125, 'KL/rejected_KL_mean': 0.00872039794921875, 'KL/mean': 0.013540104031562805, 'KL/std': 0.2952546775341034, 'logits/chosen': -0.49472951889038086, 'logits/rejected': -0.46776068210601807, 'epoch': 0.02} + 2%|▏ | 13/681 [00:33<28:54, 2.60s/it] 2%|▏ | 14/681 [00:35<28:34, 2.57s/it] {'loss': 1.3972, 'grad_norm': 423.43255615234375, 'learning_rate': 9.420289855072464e-08, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.49986132979393005, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.0010128617286682129, 'margin_dpo/margin_mean': 0.0010128915309906006, 'margin_dpo/margin_std': 0.4278063476085663, 'logps/chosen': -58.12610626220703, 'logps/rejected': -104.06399536132812, 'logps/ref_chosen': -58.11021041870117, 'logps/ref_rejected': -104.04708099365234, 'KL/chosen_KL_mean': -0.015897750854492188, 'KL/rejected_KL_mean': -0.016910552978515625, 'KL/mean': -0.016403615474700928, 'KL/std': 0.29813089966773987, 'logits/chosen': -0.4713672995567322, 'logits/rejected': -0.4317617416381836, 'epoch': 0.02} + 2%|▏ | 14/681 [00:35<28:34, 2.57s/it] 2%|▏ | 15/681 [00:38<28:31, 2.57s/it] {'loss': 1.3803, 'grad_norm': 320.57830810546875, 'learning_rate': 1.0144927536231885e-07, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.4963420033454895, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.029320567846298218, 'margin_dpo/margin_mean': 0.029320329427719116, 'margin_dpo/margin_std': 0.3670857548713684, 'logps/chosen': -56.99608612060547, 'logps/rejected': -80.86714172363281, 'logps/ref_chosen': -56.96691131591797, 'logps/ref_rejected': -80.80863952636719, 'KL/chosen_KL_mean': -0.029178619384765625, 'KL/rejected_KL_mean': -0.058498382568359375, 'KL/mean': -0.043839290738105774, 'KL/std': 0.21881349384784698, 'logits/chosen': -0.4776439368724823, 'logits/rejected': -0.45821863412857056, 'epoch': 0.02} + 2%|▏ | 15/681 [00:38<28:31, 2.57s/it] 2%|▏ | 16/681 [00:40<28:09, 2.54s/it] {'loss': 1.3621, 'grad_norm': 413.9620056152344, 'learning_rate': 1.0869565217391303e-07, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.49249696731567383, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.06042605638504028, 'margin_dpo/margin_mean': 0.06042572855949402, 'margin_dpo/margin_std': 0.28903117775917053, 'logps/chosen': -61.70491027832031, 'logps/rejected': -84.3949203491211, 'logps/ref_chosen': -61.739891052246094, 'logps/ref_rejected': -84.36947631835938, 'KL/chosen_KL_mean': 0.03498077392578125, 'KL/rejected_KL_mean': -0.025447845458984375, 'KL/mean': 0.0047643184661865234, 'KL/std': 0.22115886211395264, 'logits/chosen': -0.5402108430862427, 'logits/rejected': -0.5008047223091125, 'epoch': 0.02} + 2%|▏ | 16/681 [00:40<28:09, 2.54s/it] 2%|▏ | 17/681 [00:43<27:58, 2.53s/it] {'loss': 1.3408, 'grad_norm': 378.045166015625, 'learning_rate': 1.1594202898550725e-07, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.4866830110549927, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.10754328966140747, 'margin_dpo/margin_mean': 0.10754308104515076, 'margin_dpo/margin_std': 0.33711522817611694, 'logps/chosen': -67.67829895019531, 'logps/rejected': -85.45416259765625, 'logps/ref_chosen': -67.71033477783203, 'logps/ref_rejected': -85.37865447998047, 'KL/chosen_KL_mean': 0.032032012939453125, 'KL/rejected_KL_mean': -0.07551193237304688, 'KL/mean': -0.021739423274993896, 'KL/std': 0.28327155113220215, 'logits/chosen': -0.5082837343215942, 'logits/rejected': -0.4719049036502838, 'epoch': 0.02} + 2%|▏ | 17/681 [00:43<27:58, 2.53s/it] 3%|▎ | 18/681 [00:45<27:49, 2.52s/it] {'loss': 1.3788, 'grad_norm': 400.0282897949219, 'learning_rate': 1.2318840579710146e-07, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.49600082635879517, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.03179961442947388, 'margin_dpo/margin_mean': 0.0317995548248291, 'margin_dpo/margin_std': 0.3525484800338745, 'logps/chosen': -47.74420166015625, 'logps/rejected': -75.50880432128906, 'logps/ref_chosen': -47.7394905090332, 'logps/ref_rejected': -75.4722900390625, 'KL/chosen_KL_mean': -0.004711151123046875, 'KL/rejected_KL_mean': -0.0365142822265625, 'KL/mean': -0.020610541105270386, 'KL/std': 0.24805116653442383, 'logits/chosen': -0.4709518253803253, 'logits/rejected': -0.41293156147003174, 'epoch': 0.03} + 3%|▎ | 18/681 [00:45<27:49, 2.52s/it] 3%|▎ | 19/681 [00:48<27:59, 2.54s/it] {'loss': 1.3391, 'grad_norm': 357.18487548828125, 'learning_rate': 1.3043478260869563e-07, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.48626208305358887, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.11113607883453369, 'margin_dpo/margin_mean': 0.11113619804382324, 'margin_dpo/margin_std': 0.3337096571922302, 'logps/chosen': -70.16448974609375, 'logps/rejected': -89.82785034179688, 'logps/ref_chosen': -70.20536041259766, 'logps/ref_rejected': -89.7575912475586, 'KL/chosen_KL_mean': 0.040874481201171875, 'KL/rejected_KL_mean': -0.07026290893554688, 'KL/mean': -0.014696747064590454, 'KL/std': 0.2666897773742676, 'logits/chosen': -0.485554575920105, 'logits/rejected': -0.4352126121520996, 'epoch': 0.03} + 3%|▎ | 19/681 [00:48<27:59, 2.54s/it] 3%|▎ | 20/681 [00:50<28:02, 2.54s/it] {'loss': 1.3552, 'grad_norm': 360.1549377441406, 'learning_rate': 1.3768115942028986e-07, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.4906235337257385, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.0754203200340271, 'margin_dpo/margin_mean': 0.07542020082473755, 'margin_dpo/margin_std': 0.3174728751182556, 'logps/chosen': -50.8135986328125, 'logps/rejected': -78.90911865234375, 'logps/ref_chosen': -50.80324172973633, 'logps/ref_rejected': -78.82334899902344, 'KL/chosen_KL_mean': -0.010358810424804688, 'KL/rejected_KL_mean': -0.08577346801757812, 'KL/mean': -0.048069894313812256, 'KL/std': 0.24265292286872864, 'logits/chosen': -0.5490742325782776, 'logits/rejected': -0.4924872815608978, 'epoch': 0.03} + 3%|▎ | 20/681 [00:50<28:02, 2.54s/it] 3%|▎ | 21/681 [00:53<27:45, 2.52s/it] {'loss': 1.3604, 'grad_norm': 378.02789306640625, 'learning_rate': 1.4492753623188405e-07, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.4920843839645386, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.06414835155010223, 'margin_dpo/margin_mean': 0.06414888799190521, 'margin_dpo/margin_std': 0.30178433656692505, 'logps/chosen': -50.06586837768555, 'logps/rejected': -77.935791015625, 'logps/ref_chosen': -50.063018798828125, 'logps/ref_rejected': -77.86878967285156, 'KL/chosen_KL_mean': -0.0028514862060546875, 'KL/rejected_KL_mean': -0.0670013427734375, 'KL/mean': -0.034926123917102814, 'KL/std': 0.2600030303001404, 'logits/chosen': -0.5145970582962036, 'logits/rejected': -0.4921773076057434, 'epoch': 0.03} + 3%|▎ | 21/681 [00:53<27:45, 2.52s/it] 3%|▎ | 22/681 [00:55<27:46, 2.53s/it] {'loss': 1.2978, 'grad_norm': 388.7802429199219, 'learning_rate': 1.5217391304347825e-07, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.4745423197746277, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.20674064755439758, 'margin_dpo/margin_mean': 0.206741064786911, 'margin_dpo/margin_std': 0.43350815773010254, 'logps/chosen': -59.02252197265625, 'logps/rejected': -97.6762924194336, 'logps/ref_chosen': -59.05763626098633, 'logps/ref_rejected': -97.50466918945312, 'KL/chosen_KL_mean': 0.03511619567871094, 'KL/rejected_KL_mean': -0.17161941528320312, 'KL/mean': -0.06825144588947296, 'KL/std': 0.30668485164642334, 'logits/chosen': -0.4853633642196655, 'logits/rejected': -0.4417203366756439, 'epoch': 0.03} + 3%|▎ | 22/681 [00:55<27:46, 2.53s/it] 3%|▎ | 23/681 [00:58<29:03, 2.65s/it] {'loss': 1.2662, 'grad_norm': 364.82421875, 'learning_rate': 1.5942028985507245e-07, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.4666573405265808, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.27054840326309204, 'margin_dpo/margin_mean': 0.2705477774143219, 'margin_dpo/margin_std': 0.40643441677093506, 'logps/chosen': -59.98466873168945, 'logps/rejected': -81.31707763671875, 'logps/ref_chosen': -60.07769775390625, 'logps/ref_rejected': -81.13955688476562, 'KL/chosen_KL_mean': 0.09302711486816406, 'KL/rejected_KL_mean': -0.17752456665039062, 'KL/mean': -0.042252302169799805, 'KL/std': 0.31162387132644653, 'logits/chosen': -0.5055208206176758, 'logits/rejected': -0.4839291274547577, 'epoch': 0.03} + 3%|▎ | 23/681 [00:58<29:03, 2.65s/it] 4%|▎ | 24/681 [01:01<29:05, 2.66s/it] {'loss': 1.2773, 'grad_norm': 391.4231872558594, 'learning_rate': 1.6666666666666665e-07, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.4699401259422302, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.24319219589233398, 'margin_dpo/margin_mean': 0.24319320917129517, 'margin_dpo/margin_std': 0.3720870018005371, 'logps/chosen': -44.24581527709961, 'logps/rejected': -99.32318878173828, 'logps/ref_chosen': -44.29103469848633, 'logps/ref_rejected': -99.12521362304688, 'KL/chosen_KL_mean': 0.04521942138671875, 'KL/rejected_KL_mean': -0.19797515869140625, 'KL/mean': -0.0763748288154602, 'KL/std': 0.28322041034698486, 'logits/chosen': -0.5012839436531067, 'logits/rejected': -0.4848848581314087, 'epoch': 0.04} + 4%|▎ | 24/681 [01:01<29:05, 2.66s/it] 4%|▎ | 25/681 [01:03<29:00, 2.65s/it] {'loss': 1.2852, 'grad_norm': 349.39923095703125, 'learning_rate': 1.7391304347826085e-07, 'fcm_dpo/beta': 0.5, 'fcm_dpo/q_t': 0.4704943895339966, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.2395843267440796, 'margin_dpo/margin_mean': 0.2395840287208557, 'margin_dpo/margin_std': 0.49307340383529663, 'logps/chosen': -52.482852935791016, 'logps/rejected': -89.52757263183594, 'logps/ref_chosen': -52.537052154541016, 'logps/ref_rejected': -89.34219360351562, 'KL/chosen_KL_mean': 0.05419921875, 'KL/rejected_KL_mean': -0.18538284301757812, 'KL/mean': -0.06559216976165771, 'KL/std': 0.3871203064918518, 'logits/chosen': -0.5076802968978882, 'logits/rejected': -0.47814005613327026, 'epoch': 0.04} + 4%|▎ | 25/681 [01:04<29:00, 2.65s/it] 4%|▍ | 26/681 [01:06<27:42, 2.54s/it] {'loss': 1.2223, 'grad_norm': 383.2384948730469, 'learning_rate': 1.8115942028985507e-07, 'fcm_dpo/beta': 0.5046226978302002, 'fcm_dpo/q_t': 0.4540257155895233, 'fcm_dpo/delta': 0.09160952270030975, 'fcm_dpo/margin': 0.37870925664901733, 'margin_dpo/margin_mean': 0.37870919704437256, 'margin_dpo/margin_std': 0.5341597199440002, 'logps/chosen': -53.858070373535156, 'logps/rejected': -103.67369079589844, 'logps/ref_chosen': -53.92280578613281, 'logps/ref_rejected': -103.35971069335938, 'KL/chosen_KL_mean': 0.06473541259765625, 'KL/rejected_KL_mean': -0.31397247314453125, 'KL/mean': -0.1246185302734375, 'KL/std': 0.4182741940021515, 'logits/chosen': -0.5243556499481201, 'logits/rejected': -0.4925326108932495, 'epoch': 0.04} + 4%|▍ | 26/681 [01:06<27:42, 2.54s/it] 4%|▍ | 27/681 [01:08<27:30, 2.52s/it] {'loss': 1.1512, 'grad_norm': 404.0160217285156, 'learning_rate': 1.8840579710144927e-07, 'fcm_dpo/beta': 0.5186325311660767, 'fcm_dpo/q_t': 0.435089111328125, 'fcm_dpo/delta': 0.13306188583374023, 'fcm_dpo/margin': 0.5219477415084839, 'margin_dpo/margin_mean': 0.5219472646713257, 'margin_dpo/margin_std': 0.5090110301971436, 'logps/chosen': -42.75566101074219, 'logps/rejected': -99.10327911376953, 'logps/ref_chosen': -42.898529052734375, 'logps/ref_rejected': -98.72419738769531, 'KL/chosen_KL_mean': 0.1428699493408203, 'KL/rejected_KL_mean': -0.3790779113769531, 'KL/mean': -0.11810372769832611, 'KL/std': 0.44679516553878784, 'logits/chosen': -0.5184708833694458, 'logits/rejected': -0.4820369780063629, 'epoch': 0.04} + 4%|▍ | 27/681 [01:08<27:30, 2.52s/it] 4%|▍ | 28/681 [01:11<27:37, 2.54s/it] {'loss': 1.2296, 'grad_norm': 340.0865173339844, 'learning_rate': 1.9565217391304347e-07, 'fcm_dpo/beta': 0.5273520350456238, 'fcm_dpo/q_t': 0.45420730113983154, 'fcm_dpo/delta': 0.08291557431221008, 'fcm_dpo/margin': 0.36012983322143555, 'margin_dpo/margin_mean': 0.36013028025627136, 'margin_dpo/margin_std': 0.5909340381622314, 'logps/chosen': -60.52132797241211, 'logps/rejected': -91.72607421875, 'logps/ref_chosen': -60.55650329589844, 'logps/ref_rejected': -91.40111541748047, 'KL/chosen_KL_mean': 0.03517341613769531, 'KL/rejected_KL_mean': -0.32495880126953125, 'KL/mean': -0.1448913812637329, 'KL/std': 0.41489964723587036, 'logits/chosen': -0.5174187421798706, 'logits/rejected': -0.4631088972091675, 'epoch': 0.04} + 4%|▍ | 28/681 [01:11<27:37, 2.54s/it] 4%|▍ | 29/681 [01:13<26:33, 2.44s/it] {'loss': 1.1301, 'grad_norm': 398.7171936035156, 'learning_rate': 2.028985507246377e-07, 'fcm_dpo/beta': 0.5401861667633057, 'fcm_dpo/q_t': 0.42827117443084717, 'fcm_dpo/delta': 0.10225643217563629, 'fcm_dpo/margin': 0.5567755699157715, 'margin_dpo/margin_mean': 0.5567758679389954, 'margin_dpo/margin_std': 0.5642556548118591, 'logps/chosen': -57.71403503417969, 'logps/rejected': -97.85737609863281, 'logps/ref_chosen': -57.80778503417969, 'logps/ref_rejected': -97.39434814453125, 'KL/chosen_KL_mean': 0.09375, 'KL/rejected_KL_mean': -0.4630241394042969, 'KL/mean': -0.18463768064975739, 'KL/std': 0.4841146767139435, 'logits/chosen': -0.5421187877655029, 'logits/rejected': -0.49508053064346313, 'epoch': 0.04} + 4%|▍ | 29/681 [01:13<26:33, 2.44s/it] 4%|▍ | 30/681 [01:16<27:09, 2.50s/it] {'loss': 1.0681, 'grad_norm': 362.9571533203125, 'learning_rate': 2.1014492753623187e-07, 'fcm_dpo/beta': 0.542646050453186, 'fcm_dpo/q_t': 0.40854281187057495, 'fcm_dpo/delta': 0.01639886572957039, 'fcm_dpo/margin': 0.7079055309295654, 'margin_dpo/margin_mean': 0.7079058289527893, 'margin_dpo/margin_std': 0.6428213119506836, 'logps/chosen': -52.417388916015625, 'logps/rejected': -99.03712463378906, 'logps/ref_chosen': -52.577369689941406, 'logps/ref_rejected': -98.48920440673828, 'KL/chosen_KL_mean': 0.15998458862304688, 'KL/rejected_KL_mean': -0.5479164123535156, 'KL/mean': -0.19396524131298065, 'KL/std': 0.6062048673629761, 'logits/chosen': -0.482383131980896, 'logits/rejected': -0.451177716255188, 'epoch': 0.04} + 4%|▍ | 30/681 [01:16<27:09, 2.50s/it] 5%|▍ | 31/681 [01:18<27:39, 2.55s/it] {'loss': 1.1624, 'grad_norm': 309.9827575683594, 'learning_rate': 2.1739130434782607e-07, 'fcm_dpo/beta': 0.5562627911567688, 'fcm_dpo/q_t': 0.43538039922714233, 'fcm_dpo/delta': 0.12924844026565552, 'fcm_dpo/margin': 0.4924760162830353, 'margin_dpo/margin_mean': 0.492476224899292, 'margin_dpo/margin_std': 0.6477472186088562, 'logps/chosen': -63.704071044921875, 'logps/rejected': -73.28363037109375, 'logps/ref_chosen': -63.806922912597656, 'logps/ref_rejected': -72.89400482177734, 'KL/chosen_KL_mean': 0.10284805297851562, 'KL/rejected_KL_mean': -0.3896293640136719, 'KL/mean': -0.1433902531862259, 'KL/std': 0.5450563430786133, 'logits/chosen': -0.48639383912086487, 'logits/rejected': -0.4393838047981262, 'epoch': 0.05} + 5%|▍ | 31/681 [01:18<27:39, 2.55s/it] 5%|▍ | 32/681 [01:21<28:12, 2.61s/it] {'loss': 1.0627, 'grad_norm': 343.0960998535156, 'learning_rate': 2.2463768115942027e-07, 'fcm_dpo/beta': 0.5558298230171204, 'fcm_dpo/q_t': 0.4026581645011902, 'fcm_dpo/delta': -0.025696825236082077, 'fcm_dpo/margin': 0.7637799978256226, 'margin_dpo/margin_mean': 0.7637800574302673, 'margin_dpo/margin_std': 0.9043101668357849, 'logps/chosen': -62.550331115722656, 'logps/rejected': -89.89208984375, 'logps/ref_chosen': -62.739524841308594, 'logps/ref_rejected': -89.3175048828125, 'KL/chosen_KL_mean': 0.1891956329345703, 'KL/rejected_KL_mean': -0.5745887756347656, 'KL/mean': -0.19269661605358124, 'KL/std': 0.7516759634017944, 'logits/chosen': -0.5173541307449341, 'logits/rejected': -0.4762566089630127, 'epoch': 0.05} + 5%|▍ | 32/681 [01:21<28:12, 2.61s/it] 5%|▍ | 33/681 [01:23<27:30, 2.55s/it] {'loss': 1.1085, 'grad_norm': 328.4997253417969, 'learning_rate': 2.318840579710145e-07, 'fcm_dpo/beta': 0.562440037727356, 'fcm_dpo/q_t': 0.4198671281337738, 'fcm_dpo/delta': 0.0659160241484642, 'fcm_dpo/margin': 0.5975915789604187, 'margin_dpo/margin_mean': 0.5975916385650635, 'margin_dpo/margin_std': 0.6252603530883789, 'logps/chosen': -53.1654052734375, 'logps/rejected': -88.38716125488281, 'logps/ref_chosen': -53.26097106933594, 'logps/ref_rejected': -87.8851318359375, 'KL/chosen_KL_mean': 0.09556961059570312, 'KL/rejected_KL_mean': -0.5020217895507812, 'KL/mean': -0.20322665572166443, 'KL/std': 0.5590921640396118, 'logits/chosen': -0.5058610439300537, 'logits/rejected': -0.48015594482421875, 'epoch': 0.05} + 5%|▍ | 33/681 [01:24<27:30, 2.55s/it] 5%|▍ | 34/681 [01:26<27:40, 2.57s/it] {'loss': 1.0336, 'grad_norm': 312.8501892089844, 'learning_rate': 2.391304347826087e-07, 'fcm_dpo/beta': 0.5575153827667236, 'fcm_dpo/q_t': 0.39417457580566406, 'fcm_dpo/delta': -0.057598959654569626, 'fcm_dpo/margin': 0.8158173561096191, 'margin_dpo/margin_mean': 0.8158169984817505, 'margin_dpo/margin_std': 0.8447773456573486, 'logps/chosen': -50.72174072265625, 'logps/rejected': -102.64208221435547, 'logps/ref_chosen': -50.81732940673828, 'logps/ref_rejected': -101.92184448242188, 'KL/chosen_KL_mean': 0.09558486938476562, 'KL/rejected_KL_mean': -0.7202377319335938, 'KL/mean': -0.3123227655887604, 'KL/std': 0.7326186895370483, 'logits/chosen': -0.4918326139450073, 'logits/rejected': -0.47446727752685547, 'epoch': 0.05} + 5%|▍ | 34/681 [01:26<27:40, 2.57s/it] 5%|▌ | 35/681 [01:29<27:58, 2.60s/it] {'loss': 0.9218, 'grad_norm': 279.2007751464844, 'learning_rate': 2.463768115942029e-07, 'fcm_dpo/beta': 0.5353924036026001, 'fcm_dpo/q_t': 0.35741329193115234, 'fcm_dpo/delta': -0.24643680453300476, 'fcm_dpo/margin': 1.1769663095474243, 'margin_dpo/margin_mean': 1.1769659519195557, 'margin_dpo/margin_std': 1.0974863767623901, 'logps/chosen': -50.89972686767578, 'logps/rejected': -107.87663269042969, 'logps/ref_chosen': -51.02449035644531, 'logps/ref_rejected': -106.82443237304688, 'KL/chosen_KL_mean': 0.12476348876953125, 'KL/rejected_KL_mean': -1.0522003173828125, 'KL/mean': -0.46371960639953613, 'KL/std': 0.9688708782196045, 'logits/chosen': -0.5165481567382812, 'logits/rejected': -0.47960567474365234, 'epoch': 0.05} + 5%|▌ | 35/681 [01:29<27:58, 2.60s/it] 5%|▌ | 36/681 [01:31<28:06, 2.61s/it] {'loss': 0.9947, 'grad_norm': 232.2285919189453, 'learning_rate': 2.536231884057971e-07, 'fcm_dpo/beta': 0.5205714702606201, 'fcm_dpo/q_t': 0.37265270948410034, 'fcm_dpo/delta': -0.1801259070634842, 'fcm_dpo/margin': 1.09473717212677, 'margin_dpo/margin_mean': 1.094736099243164, 'margin_dpo/margin_std': 1.23842453956604, 'logps/chosen': -51.936004638671875, 'logps/rejected': -87.07987976074219, 'logps/ref_chosen': -51.991493225097656, 'logps/ref_rejected': -86.0406265258789, 'KL/chosen_KL_mean': 0.055484771728515625, 'KL/rejected_KL_mean': -1.0392494201660156, 'KL/mean': -0.4918856918811798, 'KL/std': 1.049076795578003, 'logits/chosen': -0.5617629885673523, 'logits/rejected': -0.5254461765289307, 'epoch': 0.05} + 5%|▌ | 36/681 [01:31<28:06, 2.61s/it] 5%|▌ | 37/681 [01:34<28:05, 2.62s/it] {'loss': 1.0432, 'grad_norm': 219.2965850830078, 'learning_rate': 2.6086956521739126e-07, 'fcm_dpo/beta': 0.49882519245147705, 'fcm_dpo/q_t': 0.3891563415527344, 'fcm_dpo/delta': -0.11387760937213898, 'fcm_dpo/margin': 1.0146119594573975, 'margin_dpo/margin_mean': 1.0146114826202393, 'margin_dpo/margin_std': 1.3467109203338623, 'logps/chosen': -62.80524826049805, 'logps/rejected': -78.90782928466797, 'logps/ref_chosen': -62.807106018066406, 'logps/ref_rejected': -77.89507293701172, 'KL/chosen_KL_mean': 0.0018596649169921875, 'KL/rejected_KL_mean': -1.01275634765625, 'KL/mean': -0.5054476261138916, 'KL/std': 1.0474822521209717, 'logits/chosen': -0.4967535734176636, 'logits/rejected': -0.4519627094268799, 'epoch': 0.05} + 5%|▌ | 37/681 [01:34<28:05, 2.62s/it] 6%|▌ | 38/681 [01:36<26:47, 2.50s/it] {'loss': 0.9599, 'grad_norm': 220.9096221923828, 'learning_rate': 2.681159420289855e-07, 'fcm_dpo/beta': 0.4817589521408081, 'fcm_dpo/q_t': 0.3633432388305664, 'fcm_dpo/delta': -0.2642138600349426, 'fcm_dpo/margin': 1.3442010879516602, 'margin_dpo/margin_mean': 1.344200849533081, 'margin_dpo/margin_std': 1.6261742115020752, 'logps/chosen': -48.24705505371094, 'logps/rejected': -99.11317443847656, 'logps/ref_chosen': -48.39051818847656, 'logps/ref_rejected': -97.91244506835938, 'KL/chosen_KL_mean': 0.1434650421142578, 'KL/rejected_KL_mean': -1.2007369995117188, 'KL/mean': -0.5286348462104797, 'KL/std': 1.3252570629119873, 'logits/chosen': -0.5368313789367676, 'logits/rejected': -0.5042594075202942, 'epoch': 0.06} + 6%|▌ | 38/681 [01:36<26:47, 2.50s/it] 6%|▌ | 39/681 [01:39<26:46, 2.50s/it] {'loss': 0.8796, 'grad_norm': 206.50027465820312, 'learning_rate': 2.753623188405797e-07, 'fcm_dpo/beta': 0.44901108741760254, 'fcm_dpo/q_t': 0.3372858464717865, 'fcm_dpo/delta': -0.3407745361328125, 'fcm_dpo/margin': 1.592177152633667, 'margin_dpo/margin_mean': 1.5921775102615356, 'margin_dpo/margin_std': 1.383728265762329, 'logps/chosen': -50.679996490478516, 'logps/rejected': -80.09121704101562, 'logps/ref_chosen': -50.75047302246094, 'logps/ref_rejected': -78.56951141357422, 'KL/chosen_KL_mean': 0.07047653198242188, 'KL/rejected_KL_mean': -1.5217018127441406, 'KL/mean': -0.7256151437759399, 'KL/std': 1.2704432010650635, 'logits/chosen': -0.5508787631988525, 'logits/rejected': -0.5106680989265442, 'epoch': 0.06} + 6%|▌ | 39/681 [01:39<26:46, 2.50s/it] 6%|▌ | 40/681 [01:42<27:26, 2.57s/it] {'loss': 0.9362, 'grad_norm': 164.9803009033203, 'learning_rate': 2.8260869565217386e-07, 'fcm_dpo/beta': 0.4270463287830353, 'fcm_dpo/q_t': 0.3570956885814667, 'fcm_dpo/delta': -0.2727539539337158, 'fcm_dpo/margin': 1.5351669788360596, 'margin_dpo/margin_mean': 1.5351676940917969, 'margin_dpo/margin_std': 1.6188992261886597, 'logps/chosen': -57.79644012451172, 'logps/rejected': -75.6466064453125, 'logps/ref_chosen': -57.985069274902344, 'logps/ref_rejected': -74.3000717163086, 'KL/chosen_KL_mean': 0.18862533569335938, 'KL/rejected_KL_mean': -1.3465385437011719, 'KL/mean': -0.5789550542831421, 'KL/std': 1.350581407546997, 'logits/chosen': -0.4705943763256073, 'logits/rejected': -0.4372428059577942, 'epoch': 0.06} + 6%|▌ | 40/681 [01:42<27:26, 2.57s/it] 6%|▌ | 41/681 [01:44<27:21, 2.57s/it] {'loss': 0.9018, 'grad_norm': 177.38894653320312, 'learning_rate': 2.898550724637681e-07, 'fcm_dpo/beta': 0.3977815508842468, 'fcm_dpo/q_t': 0.3413015604019165, 'fcm_dpo/delta': -0.3646352291107178, 'fcm_dpo/margin': 1.8554785251617432, 'margin_dpo/margin_mean': 1.8554792404174805, 'margin_dpo/margin_std': 1.9582023620605469, 'logps/chosen': -62.69247055053711, 'logps/rejected': -98.87565612792969, 'logps/ref_chosen': -62.69581604003906, 'logps/ref_rejected': -97.02352905273438, 'KL/chosen_KL_mean': 0.003345489501953125, 'KL/rejected_KL_mean': -1.8521308898925781, 'KL/mean': -0.9243937730789185, 'KL/std': 1.8027801513671875, 'logits/chosen': -0.5352627038955688, 'logits/rejected': -0.4983564019203186, 'epoch': 0.06} + 6%|▌ | 41/681 [01:44<27:21, 2.57s/it] 6%|▌ | 42/681 [01:47<27:14, 2.56s/it] {'loss': 0.8128, 'grad_norm': 159.704833984375, 'learning_rate': 2.971014492753623e-07, 'fcm_dpo/beta': 0.3601230978965759, 'fcm_dpo/q_t': 0.3121350407600403, 'fcm_dpo/delta': -0.5366164445877075, 'fcm_dpo/margin': 2.467926502227783, 'margin_dpo/margin_mean': 2.467926502227783, 'margin_dpo/margin_std': 2.2979540824890137, 'logps/chosen': -58.745147705078125, 'logps/rejected': -112.15501403808594, 'logps/ref_chosen': -58.966426849365234, 'logps/ref_rejected': -109.90837097167969, 'KL/chosen_KL_mean': 0.2212810516357422, 'KL/rejected_KL_mean': -2.24664306640625, 'KL/mean': -1.0126826763153076, 'KL/std': 2.0452983379364014, 'logits/chosen': -0.5344812870025635, 'logits/rejected': -0.4876905083656311, 'epoch': 0.06} + 6%|▌ | 42/681 [01:47<27:14, 2.56s/it] 6%|▋ | 43/681 [01:49<27:18, 2.57s/it] {'loss': 0.8349, 'grad_norm': 152.13230895996094, 'learning_rate': 3.043478260869565e-07, 'fcm_dpo/beta': 0.32807010412216187, 'fcm_dpo/q_t': 0.325826495885849, 'fcm_dpo/delta': -0.4021656811237335, 'fcm_dpo/margin': 2.342538356781006, 'margin_dpo/margin_mean': 2.342538356781006, 'margin_dpo/margin_std': 1.8504887819290161, 'logps/chosen': -53.676002502441406, 'logps/rejected': -98.34274291992188, 'logps/ref_chosen': -54.15599822998047, 'logps/ref_rejected': -96.48019409179688, 'KL/chosen_KL_mean': 0.4799919128417969, 'KL/rejected_KL_mean': -1.862548828125, 'KL/mean': -0.6912780404090881, 'KL/std': 1.731245756149292, 'logits/chosen': -0.5518888235092163, 'logits/rejected': -0.5275447368621826, 'epoch': 0.06} + 6%|▋ | 43/681 [01:49<27:18, 2.57s/it] 6%|▋ | 44/681 [01:52<27:16, 2.57s/it] {'loss': 0.8114, 'grad_norm': 148.4615020751953, 'learning_rate': 3.115942028985507e-07, 'fcm_dpo/beta': 0.3006964921951294, 'fcm_dpo/q_t': 0.31683629751205444, 'fcm_dpo/delta': -0.4574472904205322, 'fcm_dpo/margin': 2.719846248626709, 'margin_dpo/margin_mean': 2.719846248626709, 'margin_dpo/margin_std': 2.149664878845215, 'logps/chosen': -49.82621765136719, 'logps/rejected': -111.25132751464844, 'logps/ref_chosen': -50.07849884033203, 'logps/ref_rejected': -108.78376007080078, 'KL/chosen_KL_mean': 0.25227928161621094, 'KL/rejected_KL_mean': -2.4675674438476562, 'KL/mean': -1.1076438426971436, 'KL/std': 2.1824231147766113, 'logits/chosen': -0.49776938557624817, 'logits/rejected': -0.4771164655685425, 'epoch': 0.06} + 6%|▋ | 44/681 [01:52<27:16, 2.57s/it] 7%|▋ | 45/681 [01:54<27:25, 2.59s/it] {'loss': 0.9552, 'grad_norm': 119.1253890991211, 'learning_rate': 3.188405797101449e-07, 'fcm_dpo/beta': 0.2850699722766876, 'fcm_dpo/q_t': 0.3639563322067261, 'fcm_dpo/delta': -0.2445305585861206, 'fcm_dpo/margin': 2.2085297107696533, 'margin_dpo/margin_mean': 2.208528518676758, 'margin_dpo/margin_std': 2.4762864112854004, 'logps/chosen': -48.28730010986328, 'logps/rejected': -80.01732635498047, 'logps/ref_chosen': -48.4149284362793, 'logps/ref_rejected': -77.93643188476562, 'KL/chosen_KL_mean': 0.12762832641601562, 'KL/rejected_KL_mean': -2.0808982849121094, 'KL/mean': -0.9766333103179932, 'KL/std': 1.9980565309524536, 'logits/chosen': -0.47408968210220337, 'logits/rejected': -0.46131014823913574, 'epoch': 0.07} + 7%|▋ | 45/681 [01:54<27:25, 2.59s/it] 7%|▋ | 46/681 [01:57<27:37, 2.61s/it] {'loss': 0.9049, 'grad_norm': 128.59988403320312, 'learning_rate': 3.260869565217391e-07, 'fcm_dpo/beta': 0.26706546545028687, 'fcm_dpo/q_t': 0.34314534068107605, 'fcm_dpo/delta': -0.361088365316391, 'fcm_dpo/margin': 2.753678798675537, 'margin_dpo/margin_mean': 2.753678798675537, 'margin_dpo/margin_std': 2.9481449127197266, 'logps/chosen': -55.783634185791016, 'logps/rejected': -98.19047546386719, 'logps/ref_chosen': -55.999427795410156, 'logps/ref_rejected': -95.652587890625, 'KL/chosen_KL_mean': 0.2157917022705078, 'KL/rejected_KL_mean': -2.5378875732421875, 'KL/mean': -1.1610474586486816, 'KL/std': 2.4480698108673096, 'logits/chosen': -0.5371255278587341, 'logits/rejected': -0.486606240272522, 'epoch': 0.07} + 7%|▋ | 46/681 [01:57<27:37, 2.61s/it] 7%|▋ | 47/681 [02:00<27:38, 2.62s/it] {'loss': 0.8917, 'grad_norm': 118.37008666992188, 'learning_rate': 3.333333333333333e-07, 'fcm_dpo/beta': 0.2504443824291229, 'fcm_dpo/q_t': 0.34311166405677795, 'fcm_dpo/delta': -0.3251284062862396, 'fcm_dpo/margin': 2.808493137359619, 'margin_dpo/margin_mean': 2.8084943294525146, 'margin_dpo/margin_std': 2.5350003242492676, 'logps/chosen': -57.50041198730469, 'logps/rejected': -97.0620346069336, 'logps/ref_chosen': -57.92607879638672, 'logps/ref_rejected': -94.67920684814453, 'KL/chosen_KL_mean': 0.42566680908203125, 'KL/rejected_KL_mean': -2.3828277587890625, 'KL/mean': -0.9785783290863037, 'KL/std': 2.440776824951172, 'logits/chosen': -0.5844066143035889, 'logits/rejected': -0.5324996709823608, 'epoch': 0.07} + 7%|▋ | 47/681 [02:00<27:38, 2.62s/it] 7%|▋ | 48/681 [02:02<27:57, 2.65s/it] {'loss': 0.9335, 'grad_norm': 125.24164581298828, 'learning_rate': 3.4057971014492755e-07, 'fcm_dpo/beta': 0.23410022258758545, 'fcm_dpo/q_t': 0.35484230518341064, 'fcm_dpo/delta': -0.2554876506328583, 'fcm_dpo/margin': 2.720108985900879, 'margin_dpo/margin_mean': 2.720109224319458, 'margin_dpo/margin_std': 2.4631295204162598, 'logps/chosen': -57.077545166015625, 'logps/rejected': -90.62617492675781, 'logps/ref_chosen': -57.188072204589844, 'logps/ref_rejected': -88.0166015625, 'KL/chosen_KL_mean': 0.11053085327148438, 'KL/rejected_KL_mean': -2.609577178955078, 'KL/mean': -1.249524712562561, 'KL/std': 2.3069586753845215, 'logits/chosen': -0.5820130109786987, 'logits/rejected': -0.5238351225852966, 'epoch': 0.07} + 7%|▋ | 48/681 [02:02<27:57, 2.65s/it] 7%|▋ | 49/681 [02:05<27:39, 2.63s/it] {'loss': 0.8999, 'grad_norm': 352.78228759765625, 'learning_rate': 3.478260869565217e-07, 'fcm_dpo/beta': 0.2220032513141632, 'fcm_dpo/q_t': 0.3432408273220062, 'fcm_dpo/delta': -0.34955620765686035, 'fcm_dpo/margin': 3.2672815322875977, 'margin_dpo/margin_mean': 3.2672815322875977, 'margin_dpo/margin_std': 3.3359837532043457, 'logps/chosen': -61.286048889160156, 'logps/rejected': -86.63554382324219, 'logps/ref_chosen': -61.685272216796875, 'logps/ref_rejected': -83.76747131347656, 'KL/chosen_KL_mean': 0.39922142028808594, 'KL/rejected_KL_mean': -2.8680648803710938, 'KL/mean': -1.2344255447387695, 'KL/std': 2.883481025695801, 'logits/chosen': -0.5358976125717163, 'logits/rejected': -0.47647011280059814, 'epoch': 0.07} + 7%|▋ | 49/681 [02:05<27:39, 2.63s/it] 7%|▋ | 50/681 [02:08<27:46, 2.64s/it] {'loss': 0.8875, 'grad_norm': 98.57312774658203, 'learning_rate': 3.5507246376811595e-07, 'fcm_dpo/beta': 0.20502600073814392, 'fcm_dpo/q_t': 0.33918917179107666, 'fcm_dpo/delta': -0.3554917573928833, 'fcm_dpo/margin': 3.5516459941864014, 'margin_dpo/margin_mean': 3.5516457557678223, 'margin_dpo/margin_std': 3.4122915267944336, 'logps/chosen': -58.757667541503906, 'logps/rejected': -99.94332122802734, 'logps/ref_chosen': -58.72413635253906, 'logps/ref_rejected': -96.35814666748047, 'KL/chosen_KL_mean': -0.03353118896484375, 'KL/rejected_KL_mean': -3.585174560546875, 'KL/mean': -1.8093570470809937, 'KL/std': 2.991940975189209, 'logits/chosen': -0.5251749157905579, 'logits/rejected': -0.4888863265514374, 'epoch': 0.07} + 7%|▋ | 50/681 [02:08<27:46, 2.64s/it] 7%|▋ | 51/681 [02:10<27:41, 2.64s/it] {'loss': 0.9556, 'grad_norm': 78.19511413574219, 'learning_rate': 3.6231884057971015e-07, 'fcm_dpo/beta': 0.19137313961982727, 'fcm_dpo/q_t': 0.35805124044418335, 'fcm_dpo/delta': -0.30843037366867065, 'fcm_dpo/margin': 3.572404384613037, 'margin_dpo/margin_mean': 3.5724036693573, 'margin_dpo/margin_std': 4.397710800170898, 'logps/chosen': -61.48182678222656, 'logps/rejected': -79.68255615234375, 'logps/ref_chosen': -61.3736686706543, 'logps/ref_rejected': -76.00199890136719, 'KL/chosen_KL_mean': -0.10815811157226562, 'KL/rejected_KL_mean': -3.6805572509765625, 'KL/mean': -1.8943548202514648, 'KL/std': 3.606013774871826, 'logits/chosen': -0.5243328809738159, 'logits/rejected': -0.491935670375824, 'epoch': 0.07} + 7%|▋ | 51/681 [02:10<27:41, 2.64s/it] 8%|▊ | 52/681 [02:13<27:04, 2.58s/it] {'loss': 0.7675, 'grad_norm': 78.32229614257812, 'learning_rate': 3.695652173913043e-07, 'fcm_dpo/beta': 0.17365267872810364, 'fcm_dpo/q_t': 0.29616397619247437, 'fcm_dpo/delta': -0.5967501401901245, 'fcm_dpo/margin': 5.411087989807129, 'margin_dpo/margin_mean': 5.411087989807129, 'margin_dpo/margin_std': 4.405357360839844, 'logps/chosen': -51.833335876464844, 'logps/rejected': -84.88097381591797, 'logps/ref_chosen': -52.33735656738281, 'logps/ref_rejected': -79.97391510009766, 'KL/chosen_KL_mean': 0.5040225982666016, 'KL/rejected_KL_mean': -4.9070587158203125, 'KL/mean': -2.2015185356140137, 'KL/std': 4.136686325073242, 'logits/chosen': -0.5358279943466187, 'logits/rejected': -0.4792342185974121, 'epoch': 0.08} + 8%|▊ | 52/681 [02:13<27:04, 2.58s/it] 8%|▊ | 53/681 [02:15<26:57, 2.58s/it] {'loss': 0.8413, 'grad_norm': 76.56503295898438, 'learning_rate': 3.7681159420289855e-07, 'fcm_dpo/beta': 0.1579442024230957, 'fcm_dpo/q_t': 0.32274216413497925, 'fcm_dpo/delta': -0.5074787139892578, 'fcm_dpo/margin': 5.484250068664551, 'margin_dpo/margin_mean': 5.484249114990234, 'margin_dpo/margin_std': 5.2846550941467285, 'logps/chosen': -53.35175323486328, 'logps/rejected': -97.30493927001953, 'logps/ref_chosen': -53.31465148925781, 'logps/ref_rejected': -91.78359985351562, 'KL/chosen_KL_mean': -0.03709983825683594, 'KL/rejected_KL_mean': -5.521343231201172, 'KL/mean': -2.7792229652404785, 'KL/std': 4.680900573730469, 'logits/chosen': -0.620309591293335, 'logits/rejected': -0.5990212559700012, 'epoch': 0.08} + 8%|▊ | 53/681 [02:15<26:57, 2.58s/it] 8%|▊ | 54/681 [02:18<26:21, 2.52s/it] {'loss': 0.8929, 'grad_norm': 68.20849609375, 'learning_rate': 3.8405797101449274e-07, 'fcm_dpo/beta': 0.14552612602710724, 'fcm_dpo/q_t': 0.34551307559013367, 'fcm_dpo/delta': -0.3178091049194336, 'fcm_dpo/margin': 4.783502578735352, 'margin_dpo/margin_mean': 4.783502578735352, 'margin_dpo/margin_std': 4.461567401885986, 'logps/chosen': -50.84632110595703, 'logps/rejected': -96.65655517578125, 'logps/ref_chosen': -50.68865966796875, 'logps/ref_rejected': -91.71539306640625, 'KL/chosen_KL_mean': -0.15765953063964844, 'KL/rejected_KL_mean': -4.941162109375, 'KL/mean': -2.5494110584259033, 'KL/std': 4.4056077003479, 'logits/chosen': -0.5885263085365295, 'logits/rejected': -0.5346698760986328, 'epoch': 0.08} + 8%|▊ | 54/681 [02:18<26:21, 2.52s/it] 8%|▊ | 55/681 [02:20<25:25, 2.44s/it] {'loss': 0.9046, 'grad_norm': 66.71011352539062, 'learning_rate': 3.9130434782608694e-07, 'fcm_dpo/beta': 0.13501086831092834, 'fcm_dpo/q_t': 0.3375312089920044, 'fcm_dpo/delta': -0.40534526109695435, 'fcm_dpo/margin': 5.739419460296631, 'margin_dpo/margin_mean': 5.739418983459473, 'margin_dpo/margin_std': 6.410279273986816, 'logps/chosen': -63.210758209228516, 'logps/rejected': -95.32845306396484, 'logps/ref_chosen': -62.615234375, 'logps/ref_rejected': -88.99349975585938, 'KL/chosen_KL_mean': -0.5955238342285156, 'KL/rejected_KL_mean': -6.334949493408203, 'KL/mean': -3.465237617492676, 'KL/std': 5.3761420249938965, 'logits/chosen': -0.653782844543457, 'logits/rejected': -0.5924779176712036, 'epoch': 0.08} + 8%|▊ | 55/681 [02:20<25:25, 2.44s/it] 8%|▊ | 56/681 [02:23<26:01, 2.50s/it] {'loss': 0.9462, 'grad_norm': 55.40151596069336, 'learning_rate': 3.9855072463768114e-07, 'fcm_dpo/beta': 0.12596547603607178, 'fcm_dpo/q_t': 0.3533214330673218, 'fcm_dpo/delta': -0.30908891558647156, 'fcm_dpo/margin': 5.461906433105469, 'margin_dpo/margin_mean': 5.461906433105469, 'margin_dpo/margin_std': 6.3516526222229, 'logps/chosen': -58.33158493041992, 'logps/rejected': -100.03520202636719, 'logps/ref_chosen': -57.9327278137207, 'logps/ref_rejected': -94.1744384765625, 'KL/chosen_KL_mean': -0.39885711669921875, 'KL/rejected_KL_mean': -5.860759735107422, 'KL/mean': -3.1298060417175293, 'KL/std': 5.217003345489502, 'logits/chosen': -0.5743478536605835, 'logits/rejected': -0.5299459099769592, 'epoch': 0.08} + 8%|▊ | 56/681 [02:23<26:01, 2.50s/it] 8%|▊ | 57/681 [02:25<26:03, 2.51s/it] {'loss': 0.8847, 'grad_norm': 61.338130950927734, 'learning_rate': 4.057971014492754e-07, 'fcm_dpo/beta': 0.11817534267902374, 'fcm_dpo/q_t': 0.3378201723098755, 'fcm_dpo/delta': -0.3478173613548279, 'fcm_dpo/margin': 6.123730659484863, 'margin_dpo/margin_mean': 6.123730182647705, 'margin_dpo/margin_std': 5.595479488372803, 'logps/chosen': -70.93359375, 'logps/rejected': -102.12748718261719, 'logps/ref_chosen': -70.49528503417969, 'logps/ref_rejected': -95.56546020507812, 'KL/chosen_KL_mean': -0.4383068084716797, 'KL/rejected_KL_mean': -6.562034606933594, 'KL/mean': -3.5001718997955322, 'KL/std': 5.148193359375, 'logits/chosen': -0.5510739088058472, 'logits/rejected': -0.5219501256942749, 'epoch': 0.08} + 8%|▊ | 57/681 [02:25<26:03, 2.51s/it] 9%|▊ | 58/681 [02:28<26:27, 2.55s/it] {'loss': 0.8919, 'grad_norm': 61.08738708496094, 'learning_rate': 4.1304347826086954e-07, 'fcm_dpo/beta': 0.10953576862812042, 'fcm_dpo/q_t': 0.3375468850135803, 'fcm_dpo/delta': -0.3910744786262512, 'fcm_dpo/margin': 6.96067476272583, 'margin_dpo/margin_mean': 6.960675239562988, 'margin_dpo/margin_std': 7.216721534729004, 'logps/chosen': -62.61653137207031, 'logps/rejected': -92.06156158447266, 'logps/ref_chosen': -62.13294219970703, 'logps/ref_rejected': -84.61729431152344, 'KL/chosen_KL_mean': -0.48359107971191406, 'KL/rejected_KL_mean': -7.444267272949219, 'KL/mean': -3.9639272689819336, 'KL/std': 5.993289470672607, 'logits/chosen': -0.5824143886566162, 'logits/rejected': -0.5054690837860107, 'epoch': 0.09} + 9%|▊ | 58/681 [02:28<26:27, 2.55s/it] 9%|▊ | 59/681 [02:30<26:28, 2.55s/it] {'loss': 0.8958, 'grad_norm': 56.65191650390625, 'learning_rate': 4.2028985507246374e-07, 'fcm_dpo/beta': 0.1001485139131546, 'fcm_dpo/q_t': 0.339927077293396, 'fcm_dpo/delta': -0.369930237531662, 'fcm_dpo/margin': 7.362071514129639, 'margin_dpo/margin_mean': 7.362071514129639, 'margin_dpo/margin_std': 7.449038505554199, 'logps/chosen': -52.93829345703125, 'logps/rejected': -97.25303649902344, 'logps/ref_chosen': -51.932525634765625, 'logps/ref_rejected': -88.88520050048828, 'KL/chosen_KL_mean': -1.005767822265625, 'KL/rejected_KL_mean': -8.367839813232422, 'KL/mean': -4.686802864074707, 'KL/std': 6.362232208251953, 'logits/chosen': -0.6022584438323975, 'logits/rejected': -0.5596363544464111, 'epoch': 0.09} + 9%|▊ | 59/681 [02:30<26:28, 2.55s/it] 9%|▉ | 60/681 [02:33<26:18, 2.54s/it] {'loss': 0.9865, 'grad_norm': 63.26620864868164, 'learning_rate': 4.2753623188405794e-07, 'fcm_dpo/beta': 0.09618770331144333, 'fcm_dpo/q_t': 0.36946025490760803, 'fcm_dpo/delta': -0.18251214921474457, 'fcm_dpo/margin': 5.937169075012207, 'margin_dpo/margin_mean': 5.937168598175049, 'margin_dpo/margin_std': 6.618038177490234, 'logps/chosen': -62.88031005859375, 'logps/rejected': -93.26869201660156, 'logps/ref_chosen': -60.94218826293945, 'logps/ref_rejected': -85.39340209960938, 'KL/chosen_KL_mean': -1.9381217956542969, 'KL/rejected_KL_mean': -7.875293731689453, 'KL/mean': -4.90670919418335, 'KL/std': 5.7876200675964355, 'logits/chosen': -0.622028112411499, 'logits/rejected': -0.5631489753723145, 'epoch': 0.09} + 9%|▉ | 60/681 [02:33<26:18, 2.54s/it] 9%|▉ | 61/681 [02:35<26:32, 2.57s/it] {'loss': 0.9727, 'grad_norm': 50.66496658325195, 'learning_rate': 4.3478260869565214e-07, 'fcm_dpo/beta': 0.09131693840026855, 'fcm_dpo/q_t': 0.3635759949684143, 'fcm_dpo/delta': -0.28324007987976074, 'fcm_dpo/margin': 7.258551597595215, 'margin_dpo/margin_mean': 7.258552074432373, 'margin_dpo/margin_std': 9.66031265258789, 'logps/chosen': -61.59498596191406, 'logps/rejected': -98.072509765625, 'logps/ref_chosen': -60.633522033691406, 'logps/ref_rejected': -89.85249328613281, 'KL/chosen_KL_mean': -0.9614639282226562, 'KL/rejected_KL_mean': -8.220016479492188, 'KL/mean': -4.590740203857422, 'KL/std': 7.815638542175293, 'logits/chosen': -0.615394115447998, 'logits/rejected': -0.5809042453765869, 'epoch': 0.09} + 9%|▉ | 61/681 [02:36<26:32, 2.57s/it] 9%|▉ | 62/681 [02:38<26:59, 2.62s/it] {'loss': 1.0378, 'grad_norm': 49.19923782348633, 'learning_rate': 4.420289855072464e-07, 'fcm_dpo/beta': 0.08945208787918091, 'fcm_dpo/q_t': 0.389636754989624, 'fcm_dpo/delta': -0.09905220568180084, 'fcm_dpo/margin': 5.52489709854126, 'margin_dpo/margin_mean': 5.524896621704102, 'margin_dpo/margin_std': 6.8345046043396, 'logps/chosen': -57.32429504394531, 'logps/rejected': -82.26461791992188, 'logps/ref_chosen': -56.15077209472656, 'logps/ref_rejected': -75.56619262695312, 'KL/chosen_KL_mean': -1.1735248565673828, 'KL/rejected_KL_mean': -6.69842529296875, 'KL/mean': -3.935976266860962, 'KL/std': 5.688698768615723, 'logits/chosen': -0.5917966365814209, 'logits/rejected': -0.5570877194404602, 'epoch': 0.09} + 9%|▉ | 62/681 [02:38<26:59, 2.62s/it] 9%|▉ | 63/681 [02:41<26:41, 2.59s/it] {'loss': 0.9454, 'grad_norm': 52.68979263305664, 'learning_rate': 4.4927536231884053e-07, 'fcm_dpo/beta': 0.08485674113035202, 'fcm_dpo/q_t': 0.3566383123397827, 'fcm_dpo/delta': -0.2533873915672302, 'fcm_dpo/margin': 7.485637664794922, 'margin_dpo/margin_mean': 7.485637664794922, 'margin_dpo/margin_std': 7.758219242095947, 'logps/chosen': -75.13002014160156, 'logps/rejected': -107.07832336425781, 'logps/ref_chosen': -73.14739227294922, 'logps/ref_rejected': -97.61006164550781, 'KL/chosen_KL_mean': -1.9826297760009766, 'KL/rejected_KL_mean': -9.468265533447266, 'KL/mean': -5.725447654724121, 'KL/std': 7.366238594055176, 'logits/chosen': -0.5868048667907715, 'logits/rejected': -0.5401818752288818, 'epoch': 0.09} + 9%|▉ | 63/681 [02:41<26:41, 2.59s/it] 9%|▉ | 64/681 [02:43<26:15, 2.55s/it] {'loss': 0.9296, 'grad_norm': 47.52156066894531, 'learning_rate': 4.5652173913043473e-07, 'fcm_dpo/beta': 0.0794319286942482, 'fcm_dpo/q_t': 0.34900087118148804, 'fcm_dpo/delta': -0.3237980306148529, 'fcm_dpo/margin': 8.758337020874023, 'margin_dpo/margin_mean': 8.758337020874023, 'margin_dpo/margin_std': 9.623977661132812, 'logps/chosen': -54.60005187988281, 'logps/rejected': -102.8899917602539, 'logps/ref_chosen': -53.998600006103516, 'logps/ref_rejected': -93.53019714355469, 'KL/chosen_KL_mean': -0.6014499664306641, 'KL/rejected_KL_mean': -9.359790802001953, 'KL/mean': -4.980618953704834, 'KL/std': 7.880523204803467, 'logits/chosen': -0.5694026947021484, 'logits/rejected': -0.5367158651351929, 'epoch': 0.09} + 9%|▉ | 64/681 [02:43<26:15, 2.55s/it] 10%|▉ | 65/681 [02:46<26:26, 2.58s/it] {'loss': 0.9428, 'grad_norm': 46.06278991699219, 'learning_rate': 4.63768115942029e-07, 'fcm_dpo/beta': 0.07528192549943924, 'fcm_dpo/q_t': 0.3515852093696594, 'fcm_dpo/delta': -0.2872685492038727, 'fcm_dpo/margin': 8.849407196044922, 'margin_dpo/margin_mean': 8.849407196044922, 'margin_dpo/margin_std': 9.624493598937988, 'logps/chosen': -67.2533187866211, 'logps/rejected': -121.21318054199219, 'logps/ref_chosen': -64.83599853515625, 'logps/ref_rejected': -109.94645690917969, 'KL/chosen_KL_mean': -2.417318344116211, 'KL/rejected_KL_mean': -11.2667236328125, 'KL/mean': -6.842019081115723, 'KL/std': 8.855112075805664, 'logits/chosen': -0.6717353463172913, 'logits/rejected': -0.6576972007751465, 'epoch': 0.1} + 10%|▉ | 65/681 [02:46<26:26, 2.58s/it] 10%|▉ | 66/681 [02:48<26:34, 2.59s/it] {'loss': 0.9802, 'grad_norm': 40.174861907958984, 'learning_rate': 4.7101449275362313e-07, 'fcm_dpo/beta': 0.07178394496440887, 'fcm_dpo/q_t': 0.3704487979412079, 'fcm_dpo/delta': -0.2061339020729065, 'fcm_dpo/margin': 8.239856719970703, 'margin_dpo/margin_mean': 8.239856719970703, 'margin_dpo/margin_std': 9.627714157104492, 'logps/chosen': -53.68299865722656, 'logps/rejected': -86.11563110351562, 'logps/ref_chosen': -51.44352722167969, 'logps/ref_rejected': -75.63629913330078, 'KL/chosen_KL_mean': -2.239471435546875, 'KL/rejected_KL_mean': -10.479331970214844, 'KL/mean': -6.359401702880859, 'KL/std': 8.035075187683105, 'logits/chosen': -0.6172059774398804, 'logits/rejected': -0.5826204419136047, 'epoch': 0.1} + 10%|▉ | 66/681 [02:48<26:34, 2.59s/it] 10%|▉ | 67/681 [02:51<25:32, 2.50s/it] {'loss': 0.9789, 'grad_norm': 41.403099060058594, 'learning_rate': 4.782608695652174e-07, 'fcm_dpo/beta': 0.06964662671089172, 'fcm_dpo/q_t': 0.3705397844314575, 'fcm_dpo/delta': -0.19918228685855865, 'fcm_dpo/margin': 8.440168380737305, 'margin_dpo/margin_mean': 8.440168380737305, 'margin_dpo/margin_std': 9.680936813354492, 'logps/chosen': -61.326568603515625, 'logps/rejected': -83.21321868896484, 'logps/ref_chosen': -59.34080505371094, 'logps/ref_rejected': -72.78728485107422, 'KL/chosen_KL_mean': -1.9857635498046875, 'KL/rejected_KL_mean': -10.425933837890625, 'KL/mean': -6.205845832824707, 'KL/std': 8.025278091430664, 'logits/chosen': -0.5965728759765625, 'logits/rejected': -0.5546629428863525, 'epoch': 0.1} + 10%|▉ | 67/681 [02:51<25:32, 2.50s/it] 10%|▉ | 68/681 [02:53<25:13, 2.47s/it] {'loss': 0.9771, 'grad_norm': 39.689701080322266, 'learning_rate': 4.855072463768116e-07, 'fcm_dpo/beta': 0.06731708347797394, 'fcm_dpo/q_t': 0.3742911219596863, 'fcm_dpo/delta': -0.15928582847118378, 'fcm_dpo/margin': 8.181710243225098, 'margin_dpo/margin_mean': 8.181710243225098, 'margin_dpo/margin_std': 8.135688781738281, 'logps/chosen': -67.4329833984375, 'logps/rejected': -87.6161117553711, 'logps/ref_chosen': -65.2058334350586, 'logps/ref_rejected': -77.20724487304688, 'KL/chosen_KL_mean': -2.2271480560302734, 'KL/rejected_KL_mean': -10.408863067626953, 'KL/mean': -6.318003177642822, 'KL/std': 7.332122325897217, 'logits/chosen': -0.6384579539299011, 'logits/rejected': -0.5809626579284668, 'epoch': 0.1} + 10%|▉ | 68/681 [02:53<25:13, 2.47s/it] 10%|█ | 69/681 [02:56<26:00, 2.55s/it] {'loss': 0.937, 'grad_norm': 42.04729080200195, 'learning_rate': 4.927536231884058e-07, 'fcm_dpo/beta': 0.06404094398021698, 'fcm_dpo/q_t': 0.360501229763031, 'fcm_dpo/delta': -0.23506096005439758, 'fcm_dpo/margin': 9.668986320495605, 'margin_dpo/margin_mean': 9.668987274169922, 'margin_dpo/margin_std': 9.378090858459473, 'logps/chosen': -62.53855514526367, 'logps/rejected': -115.77716064453125, 'logps/ref_chosen': -59.81924057006836, 'logps/ref_rejected': -103.38886260986328, 'KL/chosen_KL_mean': -2.7193145751953125, 'KL/rejected_KL_mean': -12.388301849365234, 'KL/mean': -7.553807258605957, 'KL/std': 8.551678657531738, 'logits/chosen': -0.6100502014160156, 'logits/rejected': -0.5856792330741882, 'epoch': 0.1} + 10%|█ | 69/681 [02:56<26:00, 2.55s/it] 10%|█ | 70/681 [02:58<25:40, 2.52s/it] {'loss': 0.9405, 'grad_norm': 42.36003875732422, 'learning_rate': 5e-07, 'fcm_dpo/beta': 0.061325304210186005, 'fcm_dpo/q_t': 0.3579747676849365, 'fcm_dpo/delta': -0.25857973098754883, 'fcm_dpo/margin': 10.476768493652344, 'margin_dpo/margin_mean': 10.476768493652344, 'margin_dpo/margin_std': 10.90849494934082, 'logps/chosen': -65.84473419189453, 'logps/rejected': -105.45164489746094, 'logps/ref_chosen': -61.930641174316406, 'logps/ref_rejected': -91.06078338623047, 'KL/chosen_KL_mean': -3.914093017578125, 'KL/rejected_KL_mean': -14.390865325927734, 'KL/mean': -9.15247917175293, 'KL/std': 10.303367614746094, 'logits/chosen': -0.6354060173034668, 'logits/rejected': -0.6008400917053223, 'epoch': 0.1} + 10%|█ | 70/681 [02:58<25:40, 2.52s/it] 10%|█ | 71/681 [03:01<25:42, 2.53s/it] {'loss': 0.9015, 'grad_norm': 38.4710807800293, 'learning_rate': 4.999967061337492e-07, 'fcm_dpo/beta': 0.057439714670181274, 'fcm_dpo/q_t': 0.34699490666389465, 'fcm_dpo/delta': -0.3188778758049011, 'fcm_dpo/margin': 12.117112159729004, 'margin_dpo/margin_mean': 12.117112159729004, 'margin_dpo/margin_std': 11.577495574951172, 'logps/chosen': -65.2446060180664, 'logps/rejected': -112.947998046875, 'logps/ref_chosen': -61.750335693359375, 'logps/ref_rejected': -97.33662414550781, 'KL/chosen_KL_mean': -3.4942684173583984, 'KL/rejected_KL_mean': -15.611381530761719, 'KL/mean': -9.552824020385742, 'KL/std': 10.604869842529297, 'logits/chosen': -0.6384104490280151, 'logits/rejected': -0.5962811708450317, 'epoch': 0.1} + 10%|█ | 71/681 [03:01<25:42, 2.53s/it] 11%|█ | 72/681 [03:04<26:01, 2.56s/it] {'loss': 0.9156, 'grad_norm': 38.119319915771484, 'learning_rate': 4.999868246217933e-07, 'fcm_dpo/beta': 0.05391976609826088, 'fcm_dpo/q_t': 0.3477667570114136, 'fcm_dpo/delta': -0.31035923957824707, 'fcm_dpo/margin': 12.761024475097656, 'margin_dpo/margin_mean': 12.761024475097656, 'margin_dpo/margin_std': 13.06039810180664, 'logps/chosen': -70.16230773925781, 'logps/rejected': -112.15690612792969, 'logps/ref_chosen': -66.05341339111328, 'logps/ref_rejected': -95.2869873046875, 'KL/chosen_KL_mean': -4.108892440795898, 'KL/rejected_KL_mean': -16.869918823242188, 'KL/mean': -10.48940658569336, 'KL/std': 11.388107299804688, 'logits/chosen': -0.6678510904312134, 'logits/rejected': -0.6335985660552979, 'epoch': 0.11} + 11%|█ | 72/681 [03:04<26:01, 2.56s/it] 11%|█ | 73/681 [03:06<26:26, 2.61s/it] {'loss': 1.0098, 'grad_norm': 37.15599822998047, 'learning_rate': 4.999703557245192e-07, 'fcm_dpo/beta': 0.05082736164331436, 'fcm_dpo/q_t': 0.36679285764694214, 'fcm_dpo/delta': -0.27768224477767944, 'fcm_dpo/margin': 12.9429292678833, 'margin_dpo/margin_mean': 12.942928314208984, 'margin_dpo/margin_std': 18.527137756347656, 'logps/chosen': -72.09184265136719, 'logps/rejected': -109.23463439941406, 'logps/ref_chosen': -66.25627136230469, 'logps/ref_rejected': -90.45613098144531, 'KL/chosen_KL_mean': -5.835565567016602, 'KL/rejected_KL_mean': -18.77849578857422, 'KL/mean': -12.30703067779541, 'KL/std': 14.999626159667969, 'logits/chosen': -0.6618713736534119, 'logits/rejected': -0.6186869144439697, 'epoch': 0.11} + 11%|█ | 73/681 [03:06<26:26, 2.61s/it] 11%|█ | 74/681 [03:09<26:09, 2.59s/it] {'loss': 0.9874, 'grad_norm': 38.12542724609375, 'learning_rate': 4.999472998758977e-07, 'fcm_dpo/beta': 0.048035770654678345, 'fcm_dpo/q_t': 0.3622450828552246, 'fcm_dpo/delta': -0.2947743535041809, 'fcm_dpo/margin': 14.029674530029297, 'margin_dpo/margin_mean': 14.029674530029297, 'margin_dpo/margin_std': 20.53775405883789, 'logps/chosen': -59.96833419799805, 'logps/rejected': -116.52006530761719, 'logps/ref_chosen': -53.42488098144531, 'logps/ref_rejected': -95.94693756103516, 'KL/chosen_KL_mean': -6.543451309204102, 'KL/rejected_KL_mean': -20.573123931884766, 'KL/mean': -13.558289527893066, 'KL/std': 16.691537857055664, 'logits/chosen': -0.6320587396621704, 'logits/rejected': -0.6205792427062988, 'epoch': 0.11} + 11%|█ | 74/681 [03:09<26:09, 2.59s/it] 11%|█ | 75/681 [03:11<26:19, 2.61s/it] {'loss': 0.833, 'grad_norm': 34.176265716552734, 'learning_rate': 4.999176576834721e-07, 'fcm_dpo/beta': 0.04416520893573761, 'fcm_dpo/q_t': 0.31639227271080017, 'fcm_dpo/delta': -0.5302780866622925, 'fcm_dpo/margin': 20.05126190185547, 'margin_dpo/margin_mean': 20.05126190185547, 'margin_dpo/margin_std': 19.639450073242188, 'logps/chosen': -58.314205169677734, 'logps/rejected': -137.75778198242188, 'logps/ref_chosen': -51.861663818359375, 'logps/ref_rejected': -111.25398254394531, 'KL/chosen_KL_mean': -6.452543258666992, 'KL/rejected_KL_mean': -26.503795623779297, 'KL/mean': -16.478172302246094, 'KL/std': 17.552452087402344, 'logits/chosen': -0.6823216676712036, 'logits/rejected': -0.6725906133651733, 'epoch': 0.11} + 11%|█ | 75/681 [03:11<26:19, 2.61s/it] 11%|█ | 76/681 [03:14<26:09, 2.60s/it] {'loss': 1.0048, 'grad_norm': 33.384490966796875, 'learning_rate': 4.998814299283415e-07, 'fcm_dpo/beta': 0.04169227182865143, 'fcm_dpo/q_t': 0.3749655485153198, 'fcm_dpo/delta': -0.15833759307861328, 'fcm_dpo/margin': 13.184097290039062, 'margin_dpo/margin_mean': 13.184097290039062, 'margin_dpo/margin_std': 15.775751113891602, 'logps/chosen': -61.39282989501953, 'logps/rejected': -99.52751159667969, 'logps/ref_chosen': -53.26603698730469, 'logps/ref_rejected': -78.21662902832031, 'KL/chosen_KL_mean': -8.126792907714844, 'KL/rejected_KL_mean': -21.31088638305664, 'KL/mean': -14.718840599060059, 'KL/std': 14.556184768676758, 'logits/chosen': -0.692663848400116, 'logits/rejected': -0.6493555307388306, 'epoch': 0.11} + 11%|█ | 76/681 [03:14<26:09, 2.60s/it] 11%|█▏ | 77/681 [03:16<25:12, 2.50s/it] {'loss': 0.8744, 'grad_norm': 34.94086456298828, 'learning_rate': 4.998386175651409e-07, 'fcm_dpo/beta': 0.03837820887565613, 'fcm_dpo/q_t': 0.3239055275917053, 'fcm_dpo/delta': -0.4561229944229126, 'fcm_dpo/margin': 21.237140655517578, 'margin_dpo/margin_mean': 21.237140655517578, 'margin_dpo/margin_std': 21.865249633789062, 'logps/chosen': -65.33612060546875, 'logps/rejected': -122.25019836425781, 'logps/ref_chosen': -58.0966796875, 'logps/ref_rejected': -93.77361297607422, 'KL/chosen_KL_mean': -7.239437103271484, 'KL/rejected_KL_mean': -28.476581573486328, 'KL/mean': -17.858009338378906, 'KL/std': 19.998626708984375, 'logits/chosen': -0.6894793510437012, 'logits/rejected': -0.6502680778503418, 'epoch': 0.11} + 11%|█▏ | 77/681 [03:16<25:12, 2.50s/it] 11%|█▏ | 78/681 [03:19<25:40, 2.55s/it] {'loss': 0.9745, 'grad_norm': 31.626035690307617, 'learning_rate': 4.997892217220159e-07, 'fcm_dpo/beta': 0.03666268289089203, 'fcm_dpo/q_t': 0.3673900067806244, 'fcm_dpo/delta': -0.21514025330543518, 'fcm_dpo/margin': 16.436477661132812, 'margin_dpo/margin_mean': 16.436477661132812, 'margin_dpo/margin_std': 18.627796173095703, 'logps/chosen': -63.12693786621094, 'logps/rejected': -108.88398742675781, 'logps/ref_chosen': -55.61378479003906, 'logps/ref_rejected': -84.93436431884766, 'KL/chosen_KL_mean': -7.513151168823242, 'KL/rejected_KL_mean': -23.949626922607422, 'KL/mean': -15.731389999389648, 'KL/std': 16.417797088623047, 'logits/chosen': -0.6481041312217712, 'logits/rejected': -0.6217755079269409, 'epoch': 0.11} + 11%|█▏ | 78/681 [03:19<25:40, 2.55s/it] 12%|█▏ | 79/681 [03:22<25:53, 2.58s/it] {'loss': 0.9866, 'grad_norm': 27.80043601989746, 'learning_rate': 4.997332437005931e-07, 'fcm_dpo/beta': 0.03483927622437477, 'fcm_dpo/q_t': 0.36786949634552, 'fcm_dpo/delta': -0.24468708038330078, 'fcm_dpo/margin': 18.058425903320312, 'margin_dpo/margin_mean': 18.05842399597168, 'margin_dpo/margin_std': 22.871458053588867, 'logps/chosen': -63.345420837402344, 'logps/rejected': -113.60092163085938, 'logps/ref_chosen': -55.45048522949219, 'logps/ref_rejected': -87.64756774902344, 'KL/chosen_KL_mean': -7.894931793212891, 'KL/rejected_KL_mean': -25.95336151123047, 'KL/mean': -16.924148559570312, 'KL/std': 18.74026870727539, 'logits/chosen': -0.6485146284103394, 'logits/rejected': -0.6170614957809448, 'epoch': 0.12} + 12%|█▏ | 79/681 [03:22<25:53, 2.58s/it] 12%|█▏ | 80/681 [03:24<25:42, 2.57s/it] {'loss': 1.0327, 'grad_norm': 29.989072799682617, 'learning_rate': 4.996706849759452e-07, 'fcm_dpo/beta': 0.03327310085296631, 'fcm_dpo/q_t': 0.38316744565963745, 'fcm_dpo/delta': -0.17127852141857147, 'fcm_dpo/margin': 16.79857063293457, 'margin_dpo/margin_mean': 16.79857063293457, 'margin_dpo/margin_std': 22.803003311157227, 'logps/chosen': -69.29322052001953, 'logps/rejected': -115.1199951171875, 'logps/ref_chosen': -58.519290924072266, 'logps/ref_rejected': -87.54750061035156, 'KL/chosen_KL_mean': -10.773929595947266, 'KL/rejected_KL_mean': -27.572498321533203, 'KL/mean': -19.173215866088867, 'KL/std': 19.74143409729004, 'logits/chosen': -0.7313661575317383, 'logits/rejected': -0.688393235206604, 'epoch': 0.12} + 12%|█▏ | 80/681 [03:24<25:42, 2.57s/it] 12%|█▏ | 81/681 [03:27<25:58, 2.60s/it] {'loss': 0.9305, 'grad_norm': 30.72429847717285, 'learning_rate': 4.996015471965529e-07, 'fcm_dpo/beta': 0.03129996731877327, 'fcm_dpo/q_t': 0.34688568115234375, 'fcm_dpo/delta': -0.36527884006500244, 'fcm_dpo/margin': 23.49043083190918, 'margin_dpo/margin_mean': 23.490432739257812, 'margin_dpo/margin_std': 28.210830688476562, 'logps/chosen': -76.31262969970703, 'logps/rejected': -163.01690673828125, 'logps/ref_chosen': -66.44886779785156, 'logps/ref_rejected': -129.66270446777344, 'KL/chosen_KL_mean': -9.863761901855469, 'KL/rejected_KL_mean': -33.35419464111328, 'KL/mean': -21.608978271484375, 'KL/std': 23.825132369995117, 'logits/chosen': -0.7000492811203003, 'logits/rejected': -0.6688964366912842, 'epoch': 0.12} + 12%|█▏ | 81/681 [03:27<25:58, 2.60s/it] 12%|█▏ | 82/681 [03:29<25:26, 2.55s/it] {'loss': 1.074, 'grad_norm': 33.41337203979492, 'learning_rate': 4.995258321842611e-07, 'fcm_dpo/beta': 0.030443139374256134, 'fcm_dpo/q_t': 0.3823755085468292, 'fcm_dpo/delta': -0.17542892694473267, 'fcm_dpo/margin': 18.563825607299805, 'margin_dpo/margin_mean': 18.563827514648438, 'margin_dpo/margin_std': 29.59918975830078, 'logps/chosen': -64.50035095214844, 'logps/rejected': -121.57504272460938, 'logps/ref_chosen': -52.232383728027344, 'logps/ref_rejected': -90.74325561523438, 'KL/chosen_KL_mean': -12.267969131469727, 'KL/rejected_KL_mean': -30.831790924072266, 'KL/mean': -21.549884796142578, 'KL/std': 21.572769165039062, 'logits/chosen': -0.6645326614379883, 'logits/rejected': -0.6522207260131836, 'epoch': 0.12} + 12%|█▏ | 82/681 [03:29<25:26, 2.55s/it] 12%|█▏ | 83/681 [03:32<25:03, 2.51s/it] {'loss': 0.9787, 'grad_norm': 28.262123107910156, 'learning_rate': 4.994435419342304e-07, 'fcm_dpo/beta': 0.028682120144367218, 'fcm_dpo/q_t': 0.36408424377441406, 'fcm_dpo/delta': -0.24740472435951233, 'fcm_dpo/margin': 21.993192672729492, 'margin_dpo/margin_mean': 21.993192672729492, 'margin_dpo/margin_std': 26.84136390686035, 'logps/chosen': -68.46163940429688, 'logps/rejected': -138.3433380126953, 'logps/ref_chosen': -55.82738494873047, 'logps/ref_rejected': -103.71589660644531, 'KL/chosen_KL_mean': -12.634248733520508, 'KL/rejected_KL_mean': -34.627437591552734, 'KL/mean': -23.630840301513672, 'KL/std': 22.750579833984375, 'logits/chosen': -0.6717352867126465, 'logits/rejected': -0.6345555782318115, 'epoch': 0.12} + 12%|█▏ | 83/681 [03:32<25:03, 2.51s/it] 12%|█▏ | 84/681 [03:34<25:42, 2.58s/it] {'loss': 0.9978, 'grad_norm': 27.846446990966797, 'learning_rate': 4.993546786148857e-07, 'fcm_dpo/beta': 0.027607331052422523, 'fcm_dpo/q_t': 0.37740713357925415, 'fcm_dpo/delta': -0.14001153409481049, 'fcm_dpo/margin': 19.20960235595703, 'margin_dpo/margin_mean': 19.20960235595703, 'margin_dpo/margin_std': 19.999858856201172, 'logps/chosen': -78.81175994873047, 'logps/rejected': -118.143798828125, 'logps/ref_chosen': -67.1761703491211, 'logps/ref_rejected': -87.29859924316406, 'KL/chosen_KL_mean': -11.635591506958008, 'KL/rejected_KL_mean': -30.845199584960938, 'KL/mean': -21.24039649963379, 'KL/std': 19.57217788696289, 'logits/chosen': -0.6538140177726746, 'logits/rejected': -0.6141324639320374, 'epoch': 0.12} + 12%|█▏ | 84/681 [03:34<25:42, 2.58s/it] 12%|█▏ | 85/681 [03:37<26:08, 2.63s/it] {'loss': 1.0228, 'grad_norm': 27.484092712402344, 'learning_rate': 4.992592445678582e-07, 'fcm_dpo/beta': 0.027254024520516396, 'fcm_dpo/q_t': 0.38130825757980347, 'fcm_dpo/delta': -0.1449553519487381, 'fcm_dpo/margin': 19.705352783203125, 'margin_dpo/margin_mean': 19.705352783203125, 'margin_dpo/margin_std': 24.122146606445312, 'logps/chosen': -70.68067169189453, 'logps/rejected': -110.61820983886719, 'logps/ref_chosen': -58.4066162109375, 'logps/ref_rejected': -78.63880157470703, 'KL/chosen_KL_mean': -12.274053573608398, 'KL/rejected_KL_mean': -31.97940444946289, 'KL/mean': -22.126728057861328, 'KL/std': 20.159584045410156, 'logits/chosen': -0.6424893140792847, 'logits/rejected': -0.6098573207855225, 'epoch': 0.12} + 12%|█▏ | 85/681 [03:37<26:08, 2.63s/it] 13%|█▎ | 86/681 [03:40<26:18, 2.65s/it] {'loss': 1.1027, 'grad_norm': 31.304685592651367, 'learning_rate': 4.991572423079235e-07, 'fcm_dpo/beta': 0.026313815265893936, 'fcm_dpo/q_t': 0.3926513195037842, 'fcm_dpo/delta': -0.16080215573310852, 'fcm_dpo/margin': 20.985877990722656, 'margin_dpo/margin_mean': 20.985881805419922, 'margin_dpo/margin_std': 37.65880584716797, 'logps/chosen': -72.19342803955078, 'logps/rejected': -125.16349029541016, 'logps/ref_chosen': -56.13746643066406, 'logps/ref_rejected': -88.12165069580078, 'KL/chosen_KL_mean': -16.05596351623535, 'KL/rejected_KL_mean': -37.041839599609375, 'KL/mean': -26.548908233642578, 'KL/std': 27.3221492767334, 'logits/chosen': -0.6901407241821289, 'logits/rejected': -0.6772359609603882, 'epoch': 0.13} + 13%|█▎ | 86/681 [03:40<26:18, 2.65s/it] 13%|█▎ | 87/681 [03:42<26:09, 2.64s/it] {'loss': 1.0137, 'grad_norm': 26.263225555419922, 'learning_rate': 4.990486745229364e-07, 'fcm_dpo/beta': 0.024988306686282158, 'fcm_dpo/q_t': 0.3679496645927429, 'fcm_dpo/delta': -0.22651353478431702, 'fcm_dpo/margin': 24.446142196655273, 'margin_dpo/margin_mean': 24.446144104003906, 'margin_dpo/margin_std': 32.91810607910156, 'logps/chosen': -71.49484252929688, 'logps/rejected': -135.7724609375, 'logps/ref_chosen': -55.63609313964844, 'logps/ref_rejected': -95.46757507324219, 'KL/chosen_KL_mean': -15.858743667602539, 'KL/rejected_KL_mean': -40.30488204956055, 'KL/mean': -28.08181381225586, 'KL/std': 27.242843627929688, 'logits/chosen': -0.7118654847145081, 'logits/rejected': -0.6854358911514282, 'epoch': 0.13} + 13%|█▎ | 87/681 [03:43<26:09, 2.64s/it] 13%|█▎ | 88/681 [03:45<26:10, 2.65s/it] {'loss': 1.1165, 'grad_norm': 27.875059127807617, 'learning_rate': 4.989335440737586e-07, 'fcm_dpo/beta': 0.0244886577129364, 'fcm_dpo/q_t': 0.40317296981811523, 'fcm_dpo/delta': -0.057016439735889435, 'fcm_dpo/margin': 18.518863677978516, 'margin_dpo/margin_mean': 18.518863677978516, 'margin_dpo/margin_std': 29.796024322509766, 'logps/chosen': -93.74009704589844, 'logps/rejected': -145.29629516601562, 'logps/ref_chosen': -73.67115020751953, 'logps/ref_rejected': -106.70849609375, 'KL/chosen_KL_mean': -20.068950653076172, 'KL/rejected_KL_mean': -38.58780288696289, 'KL/mean': -29.328380584716797, 'KL/std': 28.398073196411133, 'logits/chosen': -0.699777364730835, 'logits/rejected': -0.6927889585494995, 'epoch': 0.13} + 13%|█▎ | 88/681 [03:45<26:10, 2.65s/it] 13%|█▎ | 89/681 [03:48<25:40, 2.60s/it] {'loss': 1.0399, 'grad_norm': 25.647180557250977, 'learning_rate': 4.988118539941847e-07, 'fcm_dpo/beta': 0.024275628849864006, 'fcm_dpo/q_t': 0.3883446455001831, 'fcm_dpo/delta': -0.10855366289615631, 'fcm_dpo/margin': 20.72658920288086, 'margin_dpo/margin_mean': 20.72658920288086, 'margin_dpo/margin_std': 27.33102798461914, 'logps/chosen': -72.8990478515625, 'logps/rejected': -115.08427429199219, 'logps/ref_chosen': -60.624916076660156, 'logps/ref_rejected': -82.08354949951172, 'KL/chosen_KL_mean': -12.274129867553711, 'KL/rejected_KL_mean': -33.0007209777832, 'KL/mean': -22.63742446899414, 'KL/std': 24.061918258666992, 'logits/chosen': -0.7325412631034851, 'logits/rejected': -0.7000647783279419, 'epoch': 0.13} + 13%|█▎ | 89/681 [03:48<25:40, 2.60s/it] 13%|█▎ | 90/681 [03:50<25:11, 2.56s/it] {'loss': 1.0186, 'grad_norm': 27.188581466674805, 'learning_rate': 4.986836074908615e-07, 'fcm_dpo/beta': 0.02329513430595398, 'fcm_dpo/q_t': 0.3700242340564728, 'fcm_dpo/delta': -0.2749367952346802, 'fcm_dpo/margin': 28.22708511352539, 'margin_dpo/margin_mean': 28.22708511352539, 'margin_dpo/margin_std': 40.7965087890625, 'logps/chosen': -69.03775024414062, 'logps/rejected': -155.5242462158203, 'logps/ref_chosen': -53.285308837890625, 'logps/ref_rejected': -111.54470825195312, 'KL/chosen_KL_mean': -15.752443313598633, 'KL/rejected_KL_mean': -43.97953414916992, 'KL/mean': -29.865989685058594, 'KL/std': 32.87799835205078, 'logits/chosen': -0.6703182458877563, 'logits/rejected': -0.681124210357666, 'epoch': 0.13} + 13%|█▎ | 90/681 [03:50<25:11, 2.56s/it] 13%|█▎ | 91/681 [03:53<25:19, 2.58s/it] {'loss': 1.0595, 'grad_norm': 25.46695899963379, 'learning_rate': 4.985488079432037e-07, 'fcm_dpo/beta': 0.022392991930246353, 'fcm_dpo/q_t': 0.3886939287185669, 'fcm_dpo/delta': -0.12847986817359924, 'fcm_dpo/margin': 23.309101104736328, 'margin_dpo/margin_mean': 23.309099197387695, 'margin_dpo/margin_std': 34.23745346069336, 'logps/chosen': -78.412353515625, 'logps/rejected': -127.79244995117188, 'logps/ref_chosen': -61.802955627441406, 'logps/ref_rejected': -87.87395477294922, 'KL/chosen_KL_mean': -16.609392166137695, 'KL/rejected_KL_mean': -39.91849899291992, 'KL/mean': -28.263938903808594, 'KL/std': 26.464740753173828, 'logits/chosen': -0.6979262828826904, 'logits/rejected': -0.6650443077087402, 'epoch': 0.13} + 13%|█▎ | 91/681 [03:53<25:19, 2.58s/it] 14%|█▎ | 92/681 [03:55<24:57, 2.54s/it] {'loss': 1.0512, 'grad_norm': 23.927978515625, 'learning_rate': 4.984074589033043e-07, 'fcm_dpo/beta': 0.021884029731154442, 'fcm_dpo/q_t': 0.38834255933761597, 'fcm_dpo/delta': -0.12284786254167557, 'fcm_dpo/margin': 23.604501724243164, 'margin_dpo/margin_mean': 23.60449981689453, 'margin_dpo/margin_std': 32.96575164794922, 'logps/chosen': -66.70379638671875, 'logps/rejected': -116.5487060546875, 'logps/ref_chosen': -51.640769958496094, 'logps/ref_rejected': -77.88117980957031, 'KL/chosen_KL_mean': -15.063024520874023, 'KL/rejected_KL_mean': -38.66752624511719, 'KL/mean': -26.865272521972656, 'KL/std': 27.186147689819336, 'logits/chosen': -0.7328395247459412, 'logits/rejected': -0.7109937071800232, 'epoch': 0.14} + 14%|█▎ | 92/681 [03:55<24:57, 2.54s/it] 14%|█▎ | 93/681 [03:57<23:46, 2.43s/it] {'loss': 1.0264, 'grad_norm': 24.41376495361328, 'learning_rate': 4.982595640958425e-07, 'fcm_dpo/beta': 0.021284889429807663, 'fcm_dpo/q_t': 0.3872656226158142, 'fcm_dpo/delta': -0.11301136016845703, 'fcm_dpo/margin': 23.836261749267578, 'margin_dpo/margin_mean': 23.836261749267578, 'margin_dpo/margin_std': 29.672298431396484, 'logps/chosen': -69.35418701171875, 'logps/rejected': -117.82196044921875, 'logps/ref_chosen': -52.529239654541016, 'logps/ref_rejected': -77.16075134277344, 'KL/chosen_KL_mean': -16.824951171875, 'KL/rejected_KL_mean': -40.661216735839844, 'KL/mean': -28.743083953857422, 'KL/std': 25.748863220214844, 'logits/chosen': -0.7215616703033447, 'logits/rejected': -0.6693962812423706, 'epoch': 0.14} + 14%|█▎ | 93/681 [03:57<23:46, 2.43s/it] 14%|█▍ | 94/681 [04:00<24:39, 2.52s/it] {'loss': 0.9924, 'grad_norm': 24.020462036132812, 'learning_rate': 4.98105127417984e-07, 'fcm_dpo/beta': 0.02049822360277176, 'fcm_dpo/q_t': 0.37480291724205017, 'fcm_dpo/delta': -0.17131651937961578, 'fcm_dpo/margin': 27.329792022705078, 'margin_dpo/margin_mean': 27.329792022705078, 'margin_dpo/margin_std': 30.807952880859375, 'logps/chosen': -79.50576782226562, 'logps/rejected': -145.21197509765625, 'logps/ref_chosen': -61.22261047363281, 'logps/ref_rejected': -99.59902954101562, 'KL/chosen_KL_mean': -18.283151626586914, 'KL/rejected_KL_mean': -45.61294937133789, 'KL/mean': -31.948049545288086, 'KL/std': 29.418785095214844, 'logits/chosen': -0.6669220924377441, 'logits/rejected': -0.6510493755340576, 'epoch': 0.14} + 14%|█▍ | 94/681 [04:00<24:39, 2.52s/it] 14%|█▍ | 95/681 [04:03<24:28, 2.51s/it] {'loss': 1.0693, 'grad_norm': 22.573118209838867, 'learning_rate': 4.979441529392784e-07, 'fcm_dpo/beta': 0.02020413801074028, 'fcm_dpo/q_t': 0.3971063494682312, 'fcm_dpo/delta': -0.051920242607593536, 'fcm_dpo/margin': 22.231979370117188, 'margin_dpo/margin_mean': 22.231979370117188, 'margin_dpo/margin_std': 29.545318603515625, 'logps/chosen': -69.883544921875, 'logps/rejected': -115.47222900390625, 'logps/ref_chosen': -52.523643493652344, 'logps/ref_rejected': -75.8803482055664, 'KL/chosen_KL_mean': -17.35989761352539, 'KL/rejected_KL_mean': -39.591880798339844, 'KL/mean': -28.47588539123535, 'KL/std': 28.130035400390625, 'logits/chosen': -0.6930748224258423, 'logits/rejected': -0.6638180017471313, 'epoch': 0.14} + 14%|█▍ | 95/681 [04:03<24:28, 2.51s/it] 14%|█▍ | 96/681 [04:05<24:30, 2.51s/it] {'loss': 0.9826, 'grad_norm': 23.293750762939453, 'learning_rate': 4.977766449015534e-07, 'fcm_dpo/beta': 0.01948397234082222, 'fcm_dpo/q_t': 0.3715973496437073, 'fcm_dpo/delta': -0.20268620550632477, 'fcm_dpo/margin': 30.20215606689453, 'margin_dpo/margin_mean': 30.20215606689453, 'margin_dpo/margin_std': 35.575836181640625, 'logps/chosen': -78.96299743652344, 'logps/rejected': -143.60418701171875, 'logps/ref_chosen': -62.15697479248047, 'logps/ref_rejected': -96.59601593017578, 'KL/chosen_KL_mean': -16.8060245513916, 'KL/rejected_KL_mean': -47.0081787109375, 'KL/mean': -31.907100677490234, 'KL/std': 32.275535583496094, 'logits/chosen': -0.7053156495094299, 'logits/rejected': -0.6753140091896057, 'epoch': 0.14} + 14%|█▍ | 96/681 [04:05<24:30, 2.51s/it] 14%|█▍ | 97/681 [04:08<24:38, 2.53s/it] {'loss': 1.0527, 'grad_norm': 23.826488494873047, 'learning_rate': 4.976026077188012e-07, 'fcm_dpo/beta': 0.019416380673646927, 'fcm_dpo/q_t': 0.3948373794555664, 'fcm_dpo/delta': -0.057599060237407684, 'fcm_dpo/margin': 23.38509750366211, 'margin_dpo/margin_mean': 23.38509750366211, 'margin_dpo/margin_std': 26.815166473388672, 'logps/chosen': -72.77313232421875, 'logps/rejected': -118.47660827636719, 'logps/ref_chosen': -54.646366119384766, 'logps/ref_rejected': -76.96475219726562, 'KL/chosen_KL_mean': -18.12676429748535, 'KL/rejected_KL_mean': -41.51185989379883, 'KL/mean': -29.81930923461914, 'KL/std': 26.801036834716797, 'logits/chosen': -0.6331626176834106, 'logits/rejected': -0.5884179472923279, 'epoch': 0.14} + 14%|█▍ | 97/681 [04:08<24:38, 2.53s/it] 14%|█▍ | 98/681 [04:10<24:33, 2.53s/it] {'loss': 1.0547, 'grad_norm': 24.53318214416504, 'learning_rate': 4.974220459770639e-07, 'fcm_dpo/beta': 0.018851084634661674, 'fcm_dpo/q_t': 0.38528013229370117, 'fcm_dpo/delta': -0.10970290005207062, 'fcm_dpo/margin': 26.731998443603516, 'margin_dpo/margin_mean': 26.731998443603516, 'margin_dpo/margin_std': 36.594322204589844, 'logps/chosen': -87.71917724609375, 'logps/rejected': -145.72003173828125, 'logps/ref_chosen': -65.25862884521484, 'logps/ref_rejected': -96.5274887084961, 'KL/chosen_KL_mean': -22.460552215576172, 'KL/rejected_KL_mean': -49.192535400390625, 'KL/mean': -35.82654571533203, 'KL/std': 30.553295135498047, 'logits/chosen': -0.6960965394973755, 'logits/rejected': -0.6801573038101196, 'epoch': 0.14} + 14%|█▍ | 98/681 [04:10<24:33, 2.53s/it] 15%|█▍ | 99/681 [04:12<23:33, 2.43s/it] {'loss': 0.993, 'grad_norm': 21.70009994506836, 'learning_rate': 4.972349644343108e-07, 'fcm_dpo/beta': 0.018170353025197983, 'fcm_dpo/q_t': 0.3738780617713928, 'fcm_dpo/delta': -0.20326459407806396, 'fcm_dpo/margin': 32.502403259277344, 'margin_dpo/margin_mean': 32.502403259277344, 'margin_dpo/margin_std': 41.165550231933594, 'logps/chosen': -63.44392395019531, 'logps/rejected': -136.7457733154297, 'logps/ref_chosen': -45.638484954833984, 'logps/ref_rejected': -86.43793487548828, 'KL/chosen_KL_mean': -17.805437088012695, 'KL/rejected_KL_mean': -50.307838439941406, 'KL/mean': -34.056640625, 'KL/std': 33.01419448852539, 'logits/chosen': -0.6636701822280884, 'logits/rejected': -0.6640149354934692, 'epoch': 0.15} + 15%|█▍ | 99/681 [04:12<23:33, 2.43s/it] 15%|█▍ | 100/681 [04:15<24:06, 2.49s/it] {'loss': 1.1626, 'grad_norm': 24.107803344726562, 'learning_rate': 4.970413680203148e-07, 'fcm_dpo/beta': 0.018238741904497147, 'fcm_dpo/q_t': 0.42096006870269775, 'fcm_dpo/delta': 0.05035646632313728, 'fcm_dpo/margin': 19.239667892456055, 'margin_dpo/margin_mean': 19.239667892456055, 'margin_dpo/margin_std': 34.46852111816406, 'logps/chosen': -77.52592468261719, 'logps/rejected': -113.23182678222656, 'logps/ref_chosen': -57.59397888183594, 'logps/ref_rejected': -74.06021118164062, 'KL/chosen_KL_mean': -19.931947708129883, 'KL/rejected_KL_mean': -39.17161178588867, 'KL/mean': -29.551782608032227, 'KL/std': 26.748775482177734, 'logits/chosen': -0.6563955545425415, 'logits/rejected': -0.6127746105194092, 'epoch': 0.15} + 15%|█▍ | 100/681 [04:15<24:06, 2.49s/it] 15%|█▍ | 101/681 [04:17<23:48, 2.46s/it] {'loss': 1.1263, 'grad_norm': 23.844804763793945, 'learning_rate': 4.968412618365215e-07, 'fcm_dpo/beta': 0.0180535688996315, 'fcm_dpo/q_t': 0.4103754460811615, 'fcm_dpo/delta': -0.020610351115465164, 'fcm_dpo/margin': 23.2081298828125, 'margin_dpo/margin_mean': 23.2081298828125, 'margin_dpo/margin_std': 39.541419982910156, 'logps/chosen': -86.30992126464844, 'logps/rejected': -131.0588836669922, 'logps/ref_chosen': -61.64885330200195, 'logps/ref_rejected': -83.18968200683594, 'KL/chosen_KL_mean': -24.661067962646484, 'KL/rejected_KL_mean': -47.869197845458984, 'KL/mean': -36.265132904052734, 'KL/std': 32.89160919189453, 'logits/chosen': -0.6886243224143982, 'logits/rejected': -0.6581400632858276, 'epoch': 0.15} + 15%|█▍ | 101/681 [04:17<23:48, 2.46s/it] 15%|█▍ | 102/681 [04:20<23:35, 2.44s/it] {'loss': 1.2068, 'grad_norm': 26.781410217285156, 'learning_rate': 4.966346511559149e-07, 'fcm_dpo/beta': 0.018092244863510132, 'fcm_dpo/q_t': 0.43100807070732117, 'fcm_dpo/delta': -0.025346608832478523, 'fcm_dpo/margin': 17.135272979736328, 'margin_dpo/margin_mean': 17.13527488708496, 'margin_dpo/margin_std': 36.66783905029297, 'logps/chosen': -90.83322143554688, 'logps/rejected': -112.07669067382812, 'logps/ref_chosen': -64.0788803100586, 'logps/ref_rejected': -68.18707275390625, 'KL/chosen_KL_mean': -26.75433921813965, 'KL/rejected_KL_mean': -43.889610290527344, 'KL/mean': -35.32197570800781, 'KL/std': 31.045133590698242, 'logits/chosen': -0.6860433220863342, 'logits/rejected': -0.6402877569198608, 'epoch': 0.15} + 15%|█▍ | 102/681 [04:20<23:35, 2.44s/it] 15%|█▌ | 103/681 [04:22<23:16, 2.42s/it] {'loss': 0.9817, 'grad_norm': 22.851566314697266, 'learning_rate': 4.964215414228785e-07, 'fcm_dpo/beta': 0.01744980737566948, 'fcm_dpo/q_t': 0.3697454333305359, 'fcm_dpo/delta': -0.21340087056159973, 'fcm_dpo/margin': 34.39133834838867, 'margin_dpo/margin_mean': 34.39133834838867, 'margin_dpo/margin_std': 41.43703079223633, 'logps/chosen': -82.23300170898438, 'logps/rejected': -148.89776611328125, 'logps/ref_chosen': -61.299278259277344, 'logps/ref_rejected': -93.57270812988281, 'KL/chosen_KL_mean': -20.93372344970703, 'KL/rejected_KL_mean': -55.3250617980957, 'KL/mean': -38.12938690185547, 'KL/std': 34.48286437988281, 'logits/chosen': -0.6631127595901489, 'logits/rejected': -0.6278681755065918, 'epoch': 0.15} + 15%|█▌ | 103/681 [04:22<23:16, 2.42s/it] 15%|█▌ | 104/681 [04:24<22:42, 2.36s/it] {'loss': 1.041, 'grad_norm': 22.517627716064453, 'learning_rate': 4.96201938253052e-07, 'fcm_dpo/beta': 0.01691918447613716, 'fcm_dpo/q_t': 0.3846844732761383, 'fcm_dpo/delta': -0.15939825773239136, 'fcm_dpo/margin': 32.55640411376953, 'margin_dpo/margin_mean': 32.5564079284668, 'margin_dpo/margin_std': 46.39738082885742, 'logps/chosen': -77.45196533203125, 'logps/rejected': -145.2003173828125, 'logps/ref_chosen': -54.372772216796875, 'logps/ref_rejected': -89.5647201538086, 'KL/chosen_KL_mean': -23.079187393188477, 'KL/rejected_KL_mean': -55.635589599609375, 'KL/mean': -39.357391357421875, 'KL/std': 38.08613967895508, 'logits/chosen': -0.7116140127182007, 'logits/rejected': -0.6817853450775146, 'epoch': 0.15} + 15%|█▌ | 104/681 [04:24<22:42, 2.36s/it] 15%|█▌ | 105/681 [04:27<23:22, 2.44s/it] {'loss': 0.8712, 'grad_norm': 22.841474533081055, 'learning_rate': 4.959758474331832e-07, 'fcm_dpo/beta': 0.015895074233412743, 'fcm_dpo/q_t': 0.3344946503639221, 'fcm_dpo/delta': -0.36052238941192627, 'fcm_dpo/margin': 46.197574615478516, 'margin_dpo/margin_mean': 46.19757843017578, 'margin_dpo/margin_std': 40.87889099121094, 'logps/chosen': -76.34972381591797, 'logps/rejected': -165.88186645507812, 'logps/ref_chosen': -54.638946533203125, 'logps/ref_rejected': -97.97351837158203, 'KL/chosen_KL_mean': -21.710777282714844, 'KL/rejected_KL_mean': -67.90835571289062, 'KL/mean': -44.809566497802734, 'KL/std': 37.22575378417969, 'logits/chosen': -0.6764773726463318, 'logits/rejected': -0.6534477472305298, 'epoch': 0.15} + 15%|█▌ | 105/681 [04:27<23:22, 2.44s/it] 16%|█▌ | 106/681 [04:29<23:26, 2.45s/it] {'loss': 1.0575, 'grad_norm': 21.783618927001953, 'learning_rate': 4.957432749209755e-07, 'fcm_dpo/beta': 0.015443746000528336, 'fcm_dpo/q_t': 0.3971561789512634, 'fcm_dpo/delta': -0.04908674955368042, 'fcm_dpo/margin': 28.927536010742188, 'margin_dpo/margin_mean': 28.927536010742188, 'margin_dpo/margin_std': 35.089813232421875, 'logps/chosen': -79.10572814941406, 'logps/rejected': -138.42498779296875, 'logps/ref_chosen': -54.83289337158203, 'logps/ref_rejected': -85.22461700439453, 'KL/chosen_KL_mean': -24.2728328704834, 'KL/rejected_KL_mean': -53.20037078857422, 'KL/mean': -38.736602783203125, 'KL/std': 32.29327392578125, 'logits/chosen': -0.6298633217811584, 'logits/rejected': -0.5977374315261841, 'epoch': 0.16} + 16%|█▌ | 106/681 [04:29<23:26, 2.45s/it] 16%|█▌ | 107/681 [04:32<23:57, 2.50s/it] {'loss': 1.0427, 'grad_norm': 21.18216323852539, 'learning_rate': 4.955042268449307e-07, 'fcm_dpo/beta': 0.015136872418224812, 'fcm_dpo/q_t': 0.3875572979450226, 'fcm_dpo/delta': -0.10162399709224701, 'fcm_dpo/margin': 32.756526947021484, 'margin_dpo/margin_mean': 32.75652313232422, 'margin_dpo/margin_std': 41.57597351074219, 'logps/chosen': -98.64810943603516, 'logps/rejected': -156.43634033203125, 'logps/ref_chosen': -69.70780944824219, 'logps/ref_rejected': -94.73950958251953, 'KL/chosen_KL_mean': -28.940296173095703, 'KL/rejected_KL_mean': -61.69682312011719, 'KL/mean': -45.31855773925781, 'KL/std': 39.5135498046875, 'logits/chosen': -0.697156548500061, 'logits/rejected': -0.6531878113746643, 'epoch': 0.16} + 16%|█▌ | 107/681 [04:32<23:57, 2.50s/it] 16%|█▌ | 108/681 [04:34<23:32, 2.46s/it] {'loss': 1.0372, 'grad_norm': 21.38556671142578, 'learning_rate': 4.952587095041881e-07, 'fcm_dpo/beta': 0.014710919000208378, 'fcm_dpo/q_t': 0.38031500577926636, 'fcm_dpo/delta': -0.1870792806148529, 'fcm_dpo/margin': 39.17702102661133, 'margin_dpo/margin_mean': 39.17702102661133, 'margin_dpo/margin_std': 56.45673370361328, 'logps/chosen': -82.23114013671875, 'logps/rejected': -161.19427490234375, 'logps/ref_chosen': -56.0098876953125, 'logps/ref_rejected': -95.79601287841797, 'KL/chosen_KL_mean': -26.22125816345215, 'KL/rejected_KL_mean': -65.39826965332031, 'KL/mean': -45.80976486206055, 'KL/std': 43.5434684753418, 'logits/chosen': -0.6746413707733154, 'logits/rejected': -0.6529253721237183, 'epoch': 0.16} + 16%|█▌ | 108/681 [04:34<23:32, 2.46s/it] 16%|█▌ | 109/681 [04:37<24:48, 2.60s/it] {'loss': 0.9883, 'grad_norm': 21.996633529663086, 'learning_rate': 4.95006729368358e-07, 'fcm_dpo/beta': 0.014084616675972939, 'fcm_dpo/q_t': 0.36869800090789795, 'fcm_dpo/delta': -0.2030661702156067, 'fcm_dpo/margin': 41.90185546875, 'margin_dpo/margin_mean': 41.901859283447266, 'margin_dpo/margin_std': 49.483802795410156, 'logps/chosen': -87.3345947265625, 'logps/rejected': -165.03671264648438, 'logps/ref_chosen': -62.88549041748047, 'logps/ref_rejected': -98.68573760986328, 'KL/chosen_KL_mean': -24.449106216430664, 'KL/rejected_KL_mean': -66.35096740722656, 'KL/mean': -45.4000358581543, 'KL/std': 42.009151458740234, 'logits/chosen': -0.6177815198898315, 'logits/rejected': -0.5968196392059326, 'epoch': 0.16} + 16%|█▌ | 109/681 [04:37<24:48, 2.60s/it] 16%|█▌ | 110/681 [04:40<24:56, 2.62s/it] {'loss': 1.0508, 'grad_norm': 19.18947982788086, 'learning_rate': 4.947482930773511e-07, 'fcm_dpo/beta': 0.013565946370363235, 'fcm_dpo/q_t': 0.386931836605072, 'fcm_dpo/delta': -0.12388351559638977, 'fcm_dpo/margin': 37.83190155029297, 'margin_dpo/margin_mean': 37.83190155029297, 'margin_dpo/margin_std': 50.25776672363281, 'logps/chosen': -83.83519744873047, 'logps/rejected': -142.66342163085938, 'logps/ref_chosen': -58.753684997558594, 'logps/ref_rejected': -79.75001525878906, 'KL/chosen_KL_mean': -25.081512451171875, 'KL/rejected_KL_mean': -62.91341018676758, 'KL/mean': -43.997459411621094, 'KL/std': 41.81461715698242, 'logits/chosen': -0.6101734638214111, 'logits/rejected': -0.5731357932090759, 'epoch': 0.16} + 16%|█▌ | 110/681 [04:40<24:56, 2.62s/it] 16%|█▋ | 111/681 [04:43<24:39, 2.60s/it] {'loss': 1.0313, 'grad_norm': 22.14818000793457, 'learning_rate': 4.944834074412042e-07, 'fcm_dpo/beta': 0.01321389153599739, 'fcm_dpo/q_t': 0.3765709400177002, 'fcm_dpo/delta': -0.17823287844657898, 'fcm_dpo/margin': 42.87803649902344, 'margin_dpo/margin_mean': 42.87803268432617, 'margin_dpo/margin_std': 58.28101348876953, 'logps/chosen': -97.42596435546875, 'logps/rejected': -170.10873413085938, 'logps/ref_chosen': -68.62410736083984, 'logps/ref_rejected': -98.42886352539062, 'KL/chosen_KL_mean': -28.801855087280273, 'KL/rejected_KL_mean': -71.67987823486328, 'KL/mean': -50.240867614746094, 'KL/std': 47.443092346191406, 'logits/chosen': -0.6801958084106445, 'logits/rejected': -0.6596289873123169, 'epoch': 0.16} + 16%|█▋ | 111/681 [04:43<24:39, 2.60s/it] 16%|█▋ | 112/681 [04:45<23:36, 2.49s/it] {'loss': 1.1236, 'grad_norm': 20.016754150390625, 'learning_rate': 4.942120794399002e-07, 'fcm_dpo/beta': 0.013187635689973831, 'fcm_dpo/q_t': 0.41697975993156433, 'fcm_dpo/delta': 0.039192065596580505, 'fcm_dpo/margin': 27.45223617553711, 'margin_dpo/margin_mean': 27.452232360839844, 'margin_dpo/margin_std': 39.88359069824219, 'logps/chosen': -76.78074645996094, 'logps/rejected': -118.75776672363281, 'logps/ref_chosen': -50.24964141845703, 'logps/ref_rejected': -64.77442932128906, 'KL/chosen_KL_mean': -26.53110122680664, 'KL/rejected_KL_mean': -53.983333587646484, 'KL/mean': -40.25721740722656, 'KL/std': 33.112037658691406, 'logits/chosen': -0.6283408999443054, 'logits/rejected': -0.5890357494354248, 'epoch': 0.16} + 16%|█▋ | 112/681 [04:45<23:36, 2.49s/it] 17%|█▋ | 113/681 [04:47<23:57, 2.53s/it] {'loss': 1.0924, 'grad_norm': 20.27177619934082, 'learning_rate': 4.939343162231841e-07, 'fcm_dpo/beta': 0.013288527727127075, 'fcm_dpo/q_t': 0.4095924198627472, 'fcm_dpo/delta': 0.008226404897868633, 'fcm_dpo/margin': 29.506244659423828, 'margin_dpo/margin_mean': 29.506242752075195, 'margin_dpo/margin_std': 38.19648742675781, 'logps/chosen': -100.16119384765625, 'logps/rejected': -140.92318725585938, 'logps/ref_chosen': -66.71295166015625, 'logps/ref_rejected': -77.96870422363281, 'KL/chosen_KL_mean': -33.448238372802734, 'KL/rejected_KL_mean': -62.954490661621094, 'KL/mean': -48.20136642456055, 'KL/std': 34.57713317871094, 'logits/chosen': -0.6053781509399414, 'logits/rejected': -0.5618308782577515, 'epoch': 0.17} + 17%|█▋ | 113/681 [04:47<23:57, 2.53s/it] 17%|█▋ | 114/681 [04:50<23:46, 2.52s/it] {'loss': 0.9983, 'grad_norm': 21.58888053894043, 'learning_rate': 4.936501251103751e-07, 'fcm_dpo/beta': 0.012787006795406342, 'fcm_dpo/q_t': 0.37409037351608276, 'fcm_dpo/delta': -0.21655288338661194, 'fcm_dpo/margin': 47.027565002441406, 'margin_dpo/margin_mean': 47.027557373046875, 'margin_dpo/margin_std': 63.04608154296875, 'logps/chosen': -88.382080078125, 'logps/rejected': -164.73422241210938, 'logps/ref_chosen': -57.78507995605469, 'logps/ref_rejected': -87.10966491699219, 'KL/chosen_KL_mean': -30.597002029418945, 'KL/rejected_KL_mean': -77.62455749511719, 'KL/mean': -54.11078643798828, 'KL/std': 51.99464797973633, 'logits/chosen': -0.6126964092254639, 'logits/rejected': -0.5797730684280396, 'epoch': 0.17} + 17%|█▋ | 114/681 [04:50<23:46, 2.52s/it] 17%|█▋ | 115/681 [04:53<24:11, 2.56s/it] {'loss': 1.1653, 'grad_norm': 28.310956954956055, 'learning_rate': 4.933595135901732e-07, 'fcm_dpo/beta': 0.012744484469294548, 'fcm_dpo/q_t': 0.4138960838317871, 'fcm_dpo/delta': -0.012245994061231613, 'fcm_dpo/margin': 32.29592514038086, 'margin_dpo/margin_mean': 32.29592514038086, 'margin_dpo/margin_std': 65.29641723632812, 'logps/chosen': -105.84144592285156, 'logps/rejected': -171.12025451660156, 'logps/ref_chosen': -65.5826416015625, 'logps/ref_rejected': -98.56552124023438, 'KL/chosen_KL_mean': -40.25880813598633, 'KL/rejected_KL_mean': -72.55473327636719, 'KL/mean': -56.40677261352539, 'KL/std': 49.674827575683594, 'logits/chosen': -0.6323011517524719, 'logits/rejected': -0.6113392114639282, 'epoch': 0.17} + 17%|█▋ | 115/681 [04:53<24:11, 2.56s/it] 17%|█▋ | 116/681 [04:55<23:28, 2.49s/it] {'loss': 1.0586, 'grad_norm': 22.138639450073242, 'learning_rate': 4.930624893204624e-07, 'fcm_dpo/beta': 0.012649480253458023, 'fcm_dpo/q_t': 0.39788171648979187, 'fcm_dpo/delta': -0.055032700300216675, 'fcm_dpo/margin': 35.764259338378906, 'margin_dpo/margin_mean': 35.76426315307617, 'margin_dpo/margin_std': 45.07810974121094, 'logps/chosen': -81.47024536132812, 'logps/rejected': -146.35604858398438, 'logps/ref_chosen': -51.40031433105469, 'logps/ref_rejected': -80.5218505859375, 'KL/chosen_KL_mean': -30.069929122924805, 'KL/rejected_KL_mean': -65.83419799804688, 'KL/mean': -47.952064514160156, 'KL/std': 41.692527770996094, 'logits/chosen': -0.6037485599517822, 'logits/rejected': -0.5925810933113098, 'epoch': 0.17} + 17%|█▋ | 116/681 [04:55<23:28, 2.49s/it] 17%|█▋ | 117/681 [04:57<23:19, 2.48s/it] {'loss': 1.1423, 'grad_norm': 27.81166648864746, 'learning_rate': 4.927590601281083e-07, 'fcm_dpo/beta': 0.012619540095329285, 'fcm_dpo/q_t': 0.41701966524124146, 'fcm_dpo/delta': 0.030971404165029526, 'fcm_dpo/margin': 29.332874298095703, 'margin_dpo/margin_mean': 29.332870483398438, 'margin_dpo/margin_std': 50.427330017089844, 'logps/chosen': -107.90147399902344, 'logps/rejected': -134.51992797851562, 'logps/ref_chosen': -69.29840850830078, 'logps/ref_rejected': -66.583984375, 'KL/chosen_KL_mean': -38.60306930541992, 'KL/rejected_KL_mean': -67.93594360351562, 'KL/mean': -53.26951217651367, 'KL/std': 44.515228271484375, 'logits/chosen': -0.5994728803634644, 'logits/rejected': -0.5634751319885254, 'epoch': 0.17} + 17%|█▋ | 117/681 [04:57<23:19, 2.48s/it] 17%|█▋ | 118/681 [05:00<23:21, 2.49s/it] {'loss': 1.0623, 'grad_norm': 21.09397315979004, 'learning_rate': 4.924492340087524e-07, 'fcm_dpo/beta': 0.01254614070057869, 'fcm_dpo/q_t': 0.3989246189594269, 'fcm_dpo/delta': -0.05177786946296692, 'fcm_dpo/margin': 35.82543182373047, 'margin_dpo/margin_mean': 35.82543182373047, 'margin_dpo/margin_std': 46.26477813720703, 'logps/chosen': -86.39814758300781, 'logps/rejected': -142.25164794921875, 'logps/ref_chosen': -55.6409797668457, 'logps/ref_rejected': -75.66905975341797, 'KL/chosen_KL_mean': -30.75716781616211, 'KL/rejected_KL_mean': -66.58258819580078, 'KL/mean': -48.66988754272461, 'KL/std': 40.41304397583008, 'logits/chosen': -0.6358990669250488, 'logits/rejected': -0.6165393590927124, 'epoch': 0.17} + 17%|█▋ | 118/681 [05:00<23:21, 2.49s/it] 17%|█▋ | 119/681 [05:03<23:53, 2.55s/it] {'loss': 1.1009, 'grad_norm': 23.629602432250977, 'learning_rate': 4.92133019126601e-07, 'fcm_dpo/beta': 0.012348956428468227, 'fcm_dpo/q_t': 0.40484434366226196, 'fcm_dpo/delta': -0.04121140390634537, 'fcm_dpo/margin': 35.49361801147461, 'margin_dpo/margin_mean': 35.49361801147461, 'margin_dpo/margin_std': 55.28799057006836, 'logps/chosen': -115.8270492553711, 'logps/rejected': -180.7877655029297, 'logps/ref_chosen': -73.51019287109375, 'logps/ref_rejected': -102.977294921875, 'KL/chosen_KL_mean': -42.31685256958008, 'KL/rejected_KL_mean': -77.81047058105469, 'KL/mean': -60.063663482666016, 'KL/std': 45.433780670166016, 'logits/chosen': -0.6368391513824463, 'logits/rejected': -0.6250006556510925, 'epoch': 0.17} + 17%|█▋ | 119/681 [05:03<23:53, 2.55s/it] 18%|█▊ | 120/681 [05:05<24:15, 2.60s/it] {'loss': 0.9948, 'grad_norm': 22.18862533569336, 'learning_rate': 4.918104238142103e-07, 'fcm_dpo/beta': 0.012037184089422226, 'fcm_dpo/q_t': 0.3716173470020294, 'fcm_dpo/delta': -0.197471484541893, 'fcm_dpo/margin': 48.66258239746094, 'margin_dpo/margin_mean': 48.66257858276367, 'margin_dpo/margin_std': 59.4996223449707, 'logps/chosen': -120.55703735351562, 'logps/rejected': -200.4625244140625, 'logps/ref_chosen': -76.78083801269531, 'logps/ref_rejected': -108.02374267578125, 'KL/chosen_KL_mean': -43.77620315551758, 'KL/rejected_KL_mean': -92.43879699707031, 'KL/mean': -68.10749816894531, 'KL/std': 56.72157287597656, 'logits/chosen': -0.614201009273529, 'logits/rejected': -0.5810754299163818, 'epoch': 0.18} + 18%|█▊ | 120/681 [05:05<24:15, 2.60s/it] 18%|█▊ | 121/681 [05:08<23:58, 2.57s/it] {'loss': 0.9958, 'grad_norm': 24.160505294799805, 'learning_rate': 4.91481456572267e-07, 'fcm_dpo/beta': 0.011453816667199135, 'fcm_dpo/q_t': 0.3670397102832794, 'fcm_dpo/delta': -0.2350277304649353, 'fcm_dpo/margin': 54.022308349609375, 'margin_dpo/margin_mean': 54.022308349609375, 'margin_dpo/margin_std': 69.530029296875, 'logps/chosen': -103.8895263671875, 'logps/rejected': -206.11651611328125, 'logps/ref_chosen': -61.789894104003906, 'logps/ref_rejected': -109.99456787109375, 'KL/chosen_KL_mean': -42.099632263183594, 'KL/rejected_KL_mean': -96.12193298339844, 'KL/mean': -69.11078643798828, 'KL/std': 55.44459915161133, 'logits/chosen': -0.5971692800521851, 'logits/rejected': -0.5936212539672852, 'epoch': 0.18} + 18%|█▊ | 121/681 [05:08<23:58, 2.57s/it] 18%|█▊ | 122/681 [05:10<23:15, 2.50s/it] {'loss': 0.893, 'grad_norm': 23.03303337097168, 'learning_rate': 4.911461260693638e-07, 'fcm_dpo/beta': 0.010804468765854836, 'fcm_dpo/q_t': 0.3403121829032898, 'fcm_dpo/delta': -0.35293835401535034, 'fcm_dpo/margin': 67.32794952392578, 'margin_dpo/margin_mean': 67.32794952392578, 'margin_dpo/margin_std': 66.18780517578125, 'logps/chosen': -85.25987243652344, 'logps/rejected': -212.39981079101562, 'logps/ref_chosen': -46.9022102355957, 'logps/ref_rejected': -106.71418762207031, 'KL/chosen_KL_mean': -38.35765838623047, 'KL/rejected_KL_mean': -105.68560791015625, 'KL/mean': -72.02163696289062, 'KL/std': 63.397361755371094, 'logits/chosen': -0.5504162311553955, 'logits/rejected': -0.5663501024246216, 'epoch': 0.18} + 18%|█▊ | 122/681 [05:10<23:15, 2.50s/it] 18%|█▊ | 123/681 [05:13<23:50, 2.56s/it] {'loss': 1.1089, 'grad_norm': 21.118499755859375, 'learning_rate': 4.908044411417711e-07, 'fcm_dpo/beta': 0.010443691164255142, 'fcm_dpo/q_t': 0.401348352432251, 'fcm_dpo/delta': -0.06393568962812424, 'fcm_dpo/margin': 44.054771423339844, 'margin_dpo/margin_mean': 44.054771423339844, 'margin_dpo/margin_std': 73.05723571777344, 'logps/chosen': -103.87744903564453, 'logps/rejected': -174.36898803710938, 'logps/ref_chosen': -61.33863830566406, 'logps/ref_rejected': -87.775390625, 'KL/chosen_KL_mean': -42.53881072998047, 'KL/rejected_KL_mean': -86.59358215332031, 'KL/mean': -64.56620025634766, 'KL/std': 54.408897399902344, 'logits/chosen': -0.5548320412635803, 'logits/rejected': -0.537066638469696, 'epoch': 0.18} + 18%|█▊ | 123/681 [05:13<23:50, 2.56s/it] 18%|█▊ | 124/681 [05:15<23:50, 2.57s/it] {'loss': 1.0158, 'grad_norm': 26.101245880126953, 'learning_rate': 4.904564107932048e-07, 'fcm_dpo/beta': 0.010025800205767155, 'fcm_dpo/q_t': 0.3692609667778015, 'fcm_dpo/delta': -0.26980358362197876, 'fcm_dpo/margin': 64.96229553222656, 'margin_dpo/margin_mean': 64.96229553222656, 'margin_dpo/margin_std': 93.43049621582031, 'logps/chosen': -119.86493682861328, 'logps/rejected': -230.95947265625, 'logps/ref_chosen': -71.44833374023438, 'logps/ref_rejected': -117.58056640625, 'KL/chosen_KL_mean': -48.416603088378906, 'KL/rejected_KL_mean': -113.37890625, 'KL/mean': -80.89775085449219, 'KL/std': 74.14457702636719, 'logits/chosen': -0.5674383640289307, 'logits/rejected': -0.5700336694717407, 'epoch': 0.18} + 18%|█▊ | 124/681 [05:15<23:50, 2.57s/it] 18%|█▊ | 125/681 [05:18<23:32, 2.54s/it] {'loss': 1.0263, 'grad_norm': 19.212617874145508, 'learning_rate': 4.90102044194588e-07, 'fcm_dpo/beta': 0.009690500795841217, 'fcm_dpo/q_t': 0.3799913227558136, 'fcm_dpo/delta': -0.16801846027374268, 'fcm_dpo/margin': 57.67080307006836, 'margin_dpo/margin_mean': 57.670806884765625, 'margin_dpo/margin_std': 77.20497131347656, 'logps/chosen': -89.89334106445312, 'logps/rejected': -181.4158172607422, 'logps/ref_chosen': -50.136940002441406, 'logps/ref_rejected': -83.98861694335938, 'KL/chosen_KL_mean': -39.75640106201172, 'KL/rejected_KL_mean': -97.42720031738281, 'KL/mean': -68.591796875, 'KL/std': 62.443931579589844, 'logits/chosen': -0.5034211874008179, 'logits/rejected': -0.5046522617340088, 'epoch': 0.18} + 18%|█▊ | 125/681 [05:18<23:32, 2.54s/it] 19%|█▊ | 126/681 [05:21<23:46, 2.57s/it] {'loss': 1.043, 'grad_norm': 20.403451919555664, 'learning_rate': 4.897413506838102e-07, 'fcm_dpo/beta': 0.009411858394742012, 'fcm_dpo/q_t': 0.38875728845596313, 'fcm_dpo/delta': -0.11267369985580444, 'fcm_dpo/margin': 53.836448669433594, 'margin_dpo/margin_mean': 53.836448669433594, 'margin_dpo/margin_std': 71.72261810302734, 'logps/chosen': -98.8342514038086, 'logps/rejected': -195.13339233398438, 'logps/ref_chosen': -55.66706848144531, 'logps/ref_rejected': -98.1297607421875, 'KL/chosen_KL_mean': -43.16718292236328, 'KL/rejected_KL_mean': -97.00362396240234, 'KL/mean': -70.08540344238281, 'KL/std': 56.352882385253906, 'logits/chosen': -0.5015150308609009, 'logits/rejected': -0.4946970045566559, 'epoch': 0.19} + 19%|█▊ | 126/681 [05:21<23:46, 2.57s/it] 19%|█▊ | 127/681 [05:23<23:56, 2.59s/it] {'loss': 1.1294, 'grad_norm': 20.757905960083008, 'learning_rate': 4.89374339765481e-07, 'fcm_dpo/beta': 0.009420674294233322, 'fcm_dpo/q_t': 0.4141026735305786, 'fcm_dpo/delta': 0.028969500213861465, 'fcm_dpo/margin': 39.496803283691406, 'margin_dpo/margin_mean': 39.496803283691406, 'margin_dpo/margin_std': 62.23881149291992, 'logps/chosen': -98.39008331298828, 'logps/rejected': -158.12799072265625, 'logps/ref_chosen': -56.55467987060547, 'logps/ref_rejected': -76.7957763671875, 'KL/chosen_KL_mean': -41.83540344238281, 'KL/rejected_KL_mean': -81.33221435546875, 'KL/mean': -61.58380889892578, 'KL/std': 49.37230682373047, 'logits/chosen': -0.5394914150238037, 'logits/rejected': -0.5201703310012817, 'epoch': 0.19} + 19%|█▊ | 127/681 [05:23<23:56, 2.59s/it] 19%|█▉ | 128/681 [05:26<24:14, 2.63s/it] {'loss': 1.143, 'grad_norm': 29.94881248474121, 'learning_rate': 4.890010211106795e-07, 'fcm_dpo/beta': 0.009480522945523262, 'fcm_dpo/q_t': 0.41293513774871826, 'fcm_dpo/delta': 0.007658433169126511, 'fcm_dpo/margin': 41.37461853027344, 'margin_dpo/margin_mean': 41.37461853027344, 'margin_dpo/margin_std': 72.8564453125, 'logps/chosen': -103.11402893066406, 'logps/rejected': -162.806640625, 'logps/ref_chosen': -58.12095642089844, 'logps/ref_rejected': -76.43896484375, 'KL/chosen_KL_mean': -44.993072509765625, 'KL/rejected_KL_mean': -86.36769104003906, 'KL/mean': -65.68038177490234, 'KL/std': 58.315940856933594, 'logits/chosen': -0.5278192162513733, 'logits/rejected': -0.5076951384544373, 'epoch': 0.19} + 19%|█▉ | 128/681 [05:26<24:14, 2.63s/it] 19%|█▉ | 129/681 [05:28<24:01, 2.61s/it] {'loss': 1.1482, 'grad_norm': 20.718914031982422, 'learning_rate': 4.88621404556699e-07, 'fcm_dpo/beta': 0.009432371705770493, 'fcm_dpo/q_t': 0.4137777090072632, 'fcm_dpo/delta': -0.013336148113012314, 'fcm_dpo/margin': 43.762451171875, 'margin_dpo/margin_mean': 43.762454986572266, 'margin_dpo/margin_std': 83.50243377685547, 'logps/chosen': -121.18595886230469, 'logps/rejected': -194.67425537109375, 'logps/ref_chosen': -66.91637420654297, 'logps/ref_rejected': -96.6422119140625, 'KL/chosen_KL_mean': -54.26958465576172, 'KL/rejected_KL_mean': -98.03204345703125, 'KL/mean': -76.15081024169922, 'KL/std': 64.25934600830078, 'logits/chosen': -0.5499258637428284, 'logits/rejected': -0.539750337600708, 'epoch': 0.19} + 19%|█▉ | 129/681 [05:29<24:01, 2.61s/it] 19%|█▉ | 130/681 [05:31<23:22, 2.55s/it] {'loss': 0.9951, 'grad_norm': 21.168210983276367, 'learning_rate': 4.882355001067891e-07, 'fcm_dpo/beta': 0.009208977222442627, 'fcm_dpo/q_t': 0.36926034092903137, 'fcm_dpo/delta': -0.22258631885051727, 'fcm_dpo/margin': 66.1448974609375, 'margin_dpo/margin_mean': 66.1448974609375, 'margin_dpo/margin_std': 80.59586334228516, 'logps/chosen': -84.85818481445312, 'logps/rejected': -189.11788940429688, 'logps/ref_chosen': -44.66685104370117, 'logps/ref_rejected': -82.78165435791016, 'KL/chosen_KL_mean': -40.19133758544922, 'KL/rejected_KL_mean': -106.33623504638672, 'KL/mean': -73.26377868652344, 'KL/std': 66.14134216308594, 'logits/chosen': -0.48520392179489136, 'logits/rejected': -0.47909384965896606, 'epoch': 0.19} + 19%|█▉ | 130/681 [05:31<23:22, 2.55s/it] 19%|█▉ | 131/681 [05:33<23:11, 2.53s/it] {'loss': 0.9777, 'grad_norm': 28.092988967895508, 'learning_rate': 4.878433179298909e-07, 'fcm_dpo/beta': 0.008744207210838795, 'fcm_dpo/q_t': 0.3681938648223877, 'fcm_dpo/delta': -0.19108328223228455, 'fcm_dpo/margin': 66.29625701904297, 'margin_dpo/margin_mean': 66.29625701904297, 'margin_dpo/margin_std': 71.81926727294922, 'logps/chosen': -80.99191284179688, 'logps/rejected': -190.80758666992188, 'logps/ref_chosen': -44.924591064453125, 'logps/ref_rejected': -88.44401550292969, 'KL/chosen_KL_mean': -36.06732177734375, 'KL/rejected_KL_mean': -102.36358642578125, 'KL/mean': -69.21546173095703, 'KL/std': 65.6218490600586, 'logits/chosen': -0.48882123827934265, 'logits/rejected': -0.4953649342060089, 'epoch': 0.19} + 19%|█▉ | 131/681 [05:33<23:11, 2.53s/it] 19%|█▉ | 132/681 [05:36<23:20, 2.55s/it] {'loss': 1.0883, 'grad_norm': 19.968385696411133, 'learning_rate': 4.874448683603694e-07, 'fcm_dpo/beta': 0.00856691226363182, 'fcm_dpo/q_t': 0.4011274576187134, 'fcm_dpo/delta': -0.06736327707767487, 'fcm_dpo/margin': 54.165130615234375, 'margin_dpo/margin_mean': 54.165130615234375, 'margin_dpo/margin_std': 85.23381042480469, 'logps/chosen': -107.04725646972656, 'logps/rejected': -190.10345458984375, 'logps/ref_chosen': -59.00108337402344, 'logps/ref_rejected': -87.89215087890625, 'KL/chosen_KL_mean': -48.046173095703125, 'KL/rejected_KL_mean': -102.2113037109375, 'KL/mean': -75.12873840332031, 'KL/std': 65.57552337646484, 'logits/chosen': -0.5298300385475159, 'logits/rejected': -0.5281400680541992, 'epoch': 0.19} + 19%|█▉ | 132/681 [05:36<23:20, 2.55s/it] 20%|█▉ | 133/681 [05:39<23:34, 2.58s/it] {'loss': 1.109, 'grad_norm': 25.56277847290039, 'learning_rate': 4.870401618977415e-07, 'fcm_dpo/beta': 0.008522960357367992, 'fcm_dpo/q_t': 0.40982773900032043, 'fcm_dpo/delta': -0.011873488314449787, 'fcm_dpo/margin': 48.256553649902344, 'margin_dpo/margin_mean': 48.256561279296875, 'margin_dpo/margin_std': 75.63455200195312, 'logps/chosen': -123.40351867675781, 'logps/rejected': -201.38912963867188, 'logps/ref_chosen': -66.60449981689453, 'logps/ref_rejected': -96.33355712890625, 'KL/chosen_KL_mean': -56.799015045166016, 'KL/rejected_KL_mean': -105.05557250976562, 'KL/mean': -80.92729187011719, 'KL/std': 59.583778381347656, 'logits/chosen': -0.506017804145813, 'logits/rejected': -0.4915475845336914, 'epoch': 0.2} + 20%|█▉ | 133/681 [05:39<23:34, 2.58s/it] 20%|█▉ | 134/681 [05:41<22:55, 2.52s/it] {'loss': 1.0664, 'grad_norm': 18.941587448120117, 'learning_rate': 4.866292092063986e-07, 'fcm_dpo/beta': 0.00851006992161274, 'fcm_dpo/q_t': 0.4014682173728943, 'fcm_dpo/delta': -0.040991440415382385, 'fcm_dpo/margin': 51.59928894042969, 'margin_dpo/margin_mean': 51.59928894042969, 'margin_dpo/margin_std': 67.03329467773438, 'logps/chosen': -96.90866088867188, 'logps/rejected': -184.09320068359375, 'logps/ref_chosen': -52.06925582885742, 'logps/ref_rejected': -87.6545181274414, 'KL/chosen_KL_mean': -44.83940505981445, 'KL/rejected_KL_mean': -96.43869018554688, 'KL/mean': -70.63905334472656, 'KL/std': 57.99497604370117, 'logits/chosen': -0.49966758489608765, 'logits/rejected': -0.4865725636482239, 'epoch': 0.2} + 20%|█▉ | 134/681 [05:41<22:55, 2.52s/it] 20%|█▉ | 135/681 [05:43<22:43, 2.50s/it] {'loss': 0.9939, 'grad_norm': 22.25084686279297, 'learning_rate': 4.862120211153265e-07, 'fcm_dpo/beta': 0.008192040026187897, 'fcm_dpo/q_t': 0.37052643299102783, 'fcm_dpo/delta': -0.21948286890983582, 'fcm_dpo/margin': 73.99042510986328, 'margin_dpo/margin_mean': 73.99043273925781, 'margin_dpo/margin_std': 92.94728088378906, 'logps/chosen': -100.07471466064453, 'logps/rejected': -239.69102478027344, 'logps/ref_chosen': -50.353858947753906, 'logps/ref_rejected': -115.97975158691406, 'KL/chosen_KL_mean': -49.720855712890625, 'KL/rejected_KL_mean': -123.71127319335938, 'KL/mean': -86.716064453125, 'KL/std': 76.8663330078125, 'logits/chosen': -0.4897315800189972, 'logits/rejected': -0.5235172510147095, 'epoch': 0.2} + 20%|█▉ | 135/681 [05:43<22:43, 2.50s/it] 20%|█▉ | 136/681 [05:46<23:13, 2.56s/it] {'loss': 1.1425, 'grad_norm': 20.361692428588867, 'learning_rate': 4.857886086178193e-07, 'fcm_dpo/beta': 0.008057507686316967, 'fcm_dpo/q_t': 0.4188251495361328, 'fcm_dpo/delta': 0.003085322678089142, 'fcm_dpo/margin': 49.19337844848633, 'margin_dpo/margin_mean': 49.193382263183594, 'margin_dpo/margin_std': 89.2242431640625, 'logps/chosen': -124.13349151611328, 'logps/rejected': -204.57559204101562, 'logps/ref_chosen': -65.072509765625, 'logps/ref_rejected': -96.32122802734375, 'KL/chosen_KL_mean': -59.06098175048828, 'KL/rejected_KL_mean': -108.25436401367188, 'KL/mean': -83.65766906738281, 'KL/std': 69.63789367675781, 'logits/chosen': -0.48390525579452515, 'logits/rejected': -0.4752395749092102, 'epoch': 0.2} + 20%|█▉ | 136/681 [05:46<23:13, 2.56s/it] 20%|██ | 137/681 [05:49<23:05, 2.55s/it] {'loss': 1.025, 'grad_norm': 18.20696258544922, 'learning_rate': 4.853589828711902e-07, 'fcm_dpo/beta': 0.007838413119316101, 'fcm_dpo/q_t': 0.37673407793045044, 'fcm_dpo/delta': -0.21924690902233124, 'fcm_dpo/margin': 77.27558898925781, 'margin_dpo/margin_mean': 77.27558898925781, 'margin_dpo/margin_std': 110.96575927734375, 'logps/chosen': -105.69461059570312, 'logps/rejected': -248.07485961914062, 'logps/ref_chosen': -48.759117126464844, 'logps/ref_rejected': -113.86376953125, 'KL/chosen_KL_mean': -56.935489654541016, 'KL/rejected_KL_mean': -134.21109008789062, 'KL/mean': -95.57328796386719, 'KL/std': 92.69071960449219, 'logits/chosen': -0.45655950903892517, 'logits/rejected': -0.48352983593940735, 'epoch': 0.2} + 20%|██ | 137/681 [05:49<23:05, 2.55s/it] 20%|██ | 138/681 [05:51<22:25, 2.48s/it] {'loss': 1.0551, 'grad_norm': 21.46184730529785, 'learning_rate': 4.849231551964771e-07, 'fcm_dpo/beta': 0.00770821887999773, 'fcm_dpo/q_t': 0.3963842988014221, 'fcm_dpo/delta': -0.055549122393131256, 'fcm_dpo/margin': 58.77688217163086, 'margin_dpo/margin_mean': 58.77688217163086, 'margin_dpo/margin_std': 72.2972640991211, 'logps/chosen': -119.67478942871094, 'logps/rejected': -211.12896728515625, 'logps/ref_chosen': -60.519649505615234, 'logps/ref_rejected': -93.19694519042969, 'KL/chosen_KL_mean': -59.1551399230957, 'KL/rejected_KL_mean': -117.9320068359375, 'KL/mean': -88.54357147216797, 'KL/std': 70.5914306640625, 'logits/chosen': -0.4387979507446289, 'logits/rejected': -0.4272313714027405, 'epoch': 0.2} + 20%|██ | 138/681 [05:51<22:25, 2.48s/it] 20%|██ | 139/681 [05:53<22:09, 2.45s/it] {'loss': 1.0186, 'grad_norm': 18.216188430786133, 'learning_rate': 4.844811370781446e-07, 'fcm_dpo/beta': 0.007536326535046101, 'fcm_dpo/q_t': 0.3828091025352478, 'fcm_dpo/delta': -0.13581906259059906, 'fcm_dpo/margin': 70.17027282714844, 'margin_dpo/margin_mean': 70.17028045654297, 'margin_dpo/margin_std': 86.85481262207031, 'logps/chosen': -96.81980895996094, 'logps/rejected': -199.82669067382812, 'logps/ref_chosen': -46.89138412475586, 'logps/ref_rejected': -79.72798156738281, 'KL/chosen_KL_mean': -49.92842483520508, 'KL/rejected_KL_mean': -120.09870910644531, 'KL/mean': -85.01356506347656, 'KL/std': 67.0115737915039, 'logits/chosen': -0.4372691512107849, 'logits/rejected': -0.42744114995002747, 'epoch': 0.2} + 20%|██ | 139/681 [05:53<22:09, 2.45s/it] 21%|██ | 140/681 [05:56<21:52, 2.43s/it] {'loss': 1.0673, 'grad_norm': 21.68344497680664, 'learning_rate': 4.840329401637809e-07, 'fcm_dpo/beta': 0.0073799854144454, 'fcm_dpo/q_t': 0.3952398896217346, 'fcm_dpo/delta': -0.07513369619846344, 'fcm_dpo/margin': 63.89509582519531, 'margin_dpo/margin_mean': 63.89509201049805, 'margin_dpo/margin_std': 89.70909118652344, 'logps/chosen': -119.85308837890625, 'logps/rejected': -208.05758666992188, 'logps/ref_chosen': -58.97471618652344, 'logps/ref_rejected': -83.28410339355469, 'KL/chosen_KL_mean': -60.878379821777344, 'KL/rejected_KL_mean': -124.77346801757812, 'KL/mean': -92.82592010498047, 'KL/std': 74.88318634033203, 'logits/chosen': -0.4220992922782898, 'logits/rejected': -0.40758657455444336, 'epoch': 0.21} + 21%|██ | 140/681 [05:56<21:52, 2.43s/it] 21%|██ | 141/681 [05:58<22:18, 2.48s/it] {'loss': 1.1025, 'grad_norm': 26.641067504882812, 'learning_rate': 4.83578576263792e-07, 'fcm_dpo/beta': 0.007316044997423887, 'fcm_dpo/q_t': 0.40099745988845825, 'fcm_dpo/delta': -0.04346423223614693, 'fcm_dpo/margin': 60.355979919433594, 'margin_dpo/margin_mean': 60.355979919433594, 'margin_dpo/margin_std': 95.37814331054688, 'logps/chosen': -143.77337646484375, 'logps/rejected': -227.24595642089844, 'logps/ref_chosen': -75.07566833496094, 'logps/ref_rejected': -98.1922607421875, 'KL/chosen_KL_mean': -68.69772338867188, 'KL/rejected_KL_mean': -129.05369567871094, 'KL/mean': -98.87570190429688, 'KL/std': 82.59878540039062, 'logits/chosen': -0.43964171409606934, 'logits/rejected': -0.42764222621917725, 'epoch': 0.21} + 21%|██ | 141/681 [05:58<22:18, 2.48s/it] 21%|██ | 142/681 [06:01<22:59, 2.56s/it] {'loss': 1.0857, 'grad_norm': 28.223947525024414, 'learning_rate': 4.83118057351089e-07, 'fcm_dpo/beta': 0.0072142817080020905, 'fcm_dpo/q_t': 0.39290913939476013, 'fcm_dpo/delta': -0.10188804566860199, 'fcm_dpo/margin': 68.87380981445312, 'margin_dpo/margin_mean': 68.87380981445312, 'margin_dpo/margin_std': 106.27733612060547, 'logps/chosen': -128.7484893798828, 'logps/rejected': -234.1765899658203, 'logps/ref_chosen': -58.027931213378906, 'logps/ref_rejected': -94.58222961425781, 'KL/chosen_KL_mean': -70.7205581665039, 'KL/rejected_KL_mean': -139.5943603515625, 'KL/mean': -105.15746307373047, 'KL/std': 90.38099670410156, 'logits/chosen': -0.4045884907245636, 'logits/rejected': -0.40342068672180176, 'epoch': 0.21} + 21%|██ | 142/681 [06:01<22:59, 2.56s/it] 21%|██ | 143/681 [06:04<23:28, 2.62s/it] {'loss': 1.1973, 'grad_norm': 23.867572784423828, 'learning_rate': 4.826513955607734e-07, 'fcm_dpo/beta': 0.007206078618764877, 'fcm_dpo/q_t': 0.4320971667766571, 'fcm_dpo/delta': 0.08411475270986557, 'fcm_dpo/margin': 44.20860290527344, 'margin_dpo/margin_mean': 44.20860290527344, 'margin_dpo/margin_std': 93.17940521240234, 'logps/chosen': -131.47650146484375, 'logps/rejected': -197.08822631835938, 'logps/ref_chosen': -57.59645080566406, 'logps/ref_rejected': -78.99957275390625, 'KL/chosen_KL_mean': -73.88005065917969, 'KL/rejected_KL_mean': -118.08866119384766, 'KL/mean': -95.9843521118164, 'KL/std': 79.61582946777344, 'logits/chosen': -0.40776681900024414, 'logits/rejected': -0.4014623761177063, 'epoch': 0.21} + 21%|██ | 143/681 [06:04<23:28, 2.62s/it] 21%|██ | 144/681 [06:07<23:37, 2.64s/it] {'loss': 1.1054, 'grad_norm': 20.957622528076172, 'learning_rate': 4.821786031898176e-07, 'fcm_dpo/beta': 0.007265343330800533, 'fcm_dpo/q_t': 0.41054314374923706, 'fcm_dpo/delta': 0.004837200976908207, 'fcm_dpo/margin': 54.41508483886719, 'margin_dpo/margin_mean': 54.41508483886719, 'margin_dpo/margin_std': 78.49324035644531, 'logps/chosen': -124.96160888671875, 'logps/rejected': -201.4705810546875, 'logps/ref_chosen': -59.90636444091797, 'logps/ref_rejected': -82.00025939941406, 'KL/chosen_KL_mean': -65.05524444580078, 'KL/rejected_KL_mean': -119.47032165527344, 'KL/mean': -92.26278686523438, 'KL/std': 66.92149353027344, 'logits/chosen': -0.3970365524291992, 'logits/rejected': -0.3828931450843811, 'epoch': 0.21} + 21%|██ | 144/681 [06:07<23:37, 2.64s/it] 21%|██▏ | 145/681 [06:09<23:19, 2.61s/it] {'loss': 1.0899, 'grad_norm': 24.766576766967773, 'learning_rate': 4.816996926967401e-07, 'fcm_dpo/beta': 0.007246987894177437, 'fcm_dpo/q_t': 0.40498581528663635, 'fcm_dpo/delta': -0.020106535404920578, 'fcm_dpo/margin': 57.850833892822266, 'margin_dpo/margin_mean': 57.850833892822266, 'margin_dpo/margin_std': 81.18896484375, 'logps/chosen': -119.11579132080078, 'logps/rejected': -198.23226928710938, 'logps/ref_chosen': -56.60066604614258, 'logps/ref_rejected': -77.86631774902344, 'KL/chosen_KL_mean': -62.5151252746582, 'KL/rejected_KL_mean': -120.36595153808594, 'KL/mean': -91.44053649902344, 'KL/std': 66.93643188476562, 'logits/chosen': -0.4346858263015747, 'logits/rejected': -0.4181862473487854, 'epoch': 0.21} + 21%|██▏ | 145/681 [06:09<23:19, 2.61s/it] 21%|██▏ | 146/681 [06:12<23:10, 2.60s/it] {'loss': 1.1855, 'grad_norm': 26.79239273071289, 'learning_rate': 4.812146767012779e-07, 'fcm_dpo/beta': 0.007306361570954323, 'fcm_dpo/q_t': 0.4260821044445038, 'fcm_dpo/delta': 0.07646190375089645, 'fcm_dpo/margin': 44.6298828125, 'margin_dpo/margin_mean': 44.6298828125, 'margin_dpo/margin_std': 87.585693359375, 'logps/chosen': -150.92311096191406, 'logps/rejected': -211.25531005859375, 'logps/ref_chosen': -66.00045013427734, 'logps/ref_rejected': -81.70278930664062, 'KL/chosen_KL_mean': -84.92266082763672, 'KL/rejected_KL_mean': -129.55252075195312, 'KL/mean': -107.23759460449219, 'KL/std': 72.51426696777344, 'logits/chosen': -0.40483713150024414, 'logits/rejected': -0.3770599961280823, 'epoch': 0.21} + 21%|██▏ | 146/681 [06:12<23:10, 2.60s/it] 22%|██▏ | 147/681 [06:14<23:10, 2.60s/it] {'loss': 1.0871, 'grad_norm': 20.187551498413086, 'learning_rate': 4.807235679840536e-07, 'fcm_dpo/beta': 0.007286765147000551, 'fcm_dpo/q_t': 0.40113240480422974, 'fcm_dpo/delta': -0.050357475876808167, 'fcm_dpo/margin': 61.472869873046875, 'margin_dpo/margin_mean': 61.472869873046875, 'margin_dpo/margin_std': 91.1969985961914, 'logps/chosen': -115.55127716064453, 'logps/rejected': -195.00927734375, 'logps/ref_chosen': -53.405487060546875, 'logps/ref_rejected': -71.39060974121094, 'KL/chosen_KL_mean': -62.145790100097656, 'KL/rejected_KL_mean': -123.61865997314453, 'KL/mean': -92.88223266601562, 'KL/std': 73.63186645507812, 'logits/chosen': -0.4601389765739441, 'logits/rejected': -0.4417986273765564, 'epoch': 0.22} + 22%|██▏ | 147/681 [06:14<23:10, 2.60s/it] 22%|██▏ | 148/681 [06:17<23:01, 2.59s/it] {'loss': 1.1251, 'grad_norm': 18.90130043029785, 'learning_rate': 4.802263794862384e-07, 'fcm_dpo/beta': 0.007221372798085213, 'fcm_dpo/q_t': 0.41665685176849365, 'fcm_dpo/delta': -0.08385775983333588, 'fcm_dpo/margin': 52.114810943603516, 'margin_dpo/margin_mean': 52.114810943603516, 'margin_dpo/margin_std': 76.679443359375, 'logps/chosen': -125.7578125, 'logps/rejected': -216.02938842773438, 'logps/ref_chosen': -64.93708038330078, 'logps/ref_rejected': -103.09384155273438, 'KL/chosen_KL_mean': -60.82073211669922, 'KL/rejected_KL_mean': -112.935546875, 'KL/mean': -86.87813568115234, 'KL/std': 72.8238296508789, 'logits/chosen': -0.4921185076236725, 'logits/rejected': -0.4849007725715637, 'epoch': 0.22} + 22%|██▏ | 148/681 [06:17<23:01, 2.59s/it] 22%|██▏ | 149/681 [06:19<23:13, 2.62s/it] {'loss': 1.0541, 'grad_norm': 18.19388198852539, 'learning_rate': 4.797231243092118e-07, 'fcm_dpo/beta': 0.0070372275076806545, 'fcm_dpo/q_t': 0.3951166570186615, 'fcm_dpo/delta': -0.0631796270608902, 'fcm_dpo/margin': 65.15960693359375, 'margin_dpo/margin_mean': 65.15960693359375, 'margin_dpo/margin_std': 78.02362060546875, 'logps/chosen': -116.41766357421875, 'logps/rejected': -222.4182586669922, 'logps/ref_chosen': -58.47376251220703, 'logps/ref_rejected': -99.31474304199219, 'KL/chosen_KL_mean': -57.94389724731445, 'KL/rejected_KL_mean': -123.103515625, 'KL/mean': -90.5237045288086, 'KL/std': 65.8777847290039, 'logits/chosen': -0.49327534437179565, 'logits/rejected': -0.47827810049057007, 'epoch': 0.22} + 22%|██▏ | 149/681 [06:20<23:13, 2.62s/it] 22%|██▏ | 150/681 [06:22<23:08, 2.61s/it] {'loss': 1.0829, 'grad_norm': 17.938838958740234, 'learning_rate': 4.792138157142157e-07, 'fcm_dpo/beta': 0.006972650997340679, 'fcm_dpo/q_t': 0.4047049582004547, 'fcm_dpo/delta': -0.04591844975948334, 'fcm_dpo/margin': 63.47180938720703, 'margin_dpo/margin_mean': 63.47180938720703, 'margin_dpo/margin_std': 92.78956604003906, 'logps/chosen': -97.14087677001953, 'logps/rejected': -198.25445556640625, 'logps/ref_chosen': -45.705810546875, 'logps/ref_rejected': -83.34759521484375, 'KL/chosen_KL_mean': -51.43506622314453, 'KL/rejected_KL_mean': -114.90686798095703, 'KL/mean': -83.17096710205078, 'KL/std': 77.47767639160156, 'logits/chosen': -0.46106183528900146, 'logits/rejected': -0.4645771384239197, 'epoch': 0.22} + 22%|██▏ | 150/681 [06:22<23:08, 2.61s/it] 22%|██▏ | 151/681 [06:25<22:32, 2.55s/it] {'loss': 1.061, 'grad_norm': 19.721174240112305, 'learning_rate': 4.786984671220053e-07, 'fcm_dpo/beta': 0.006956371478736401, 'fcm_dpo/q_t': 0.398138165473938, 'fcm_dpo/delta': -0.046492453664541245, 'fcm_dpo/margin': 63.88605499267578, 'margin_dpo/margin_mean': 63.88605499267578, 'margin_dpo/margin_std': 79.7131118774414, 'logps/chosen': -134.43002319335938, 'logps/rejected': -228.2090606689453, 'logps/ref_chosen': -70.57083129882812, 'logps/ref_rejected': -100.46382141113281, 'KL/chosen_KL_mean': -63.859195709228516, 'KL/rejected_KL_mean': -127.74524688720703, 'KL/mean': -95.80221557617188, 'KL/std': 72.78681945800781, 'logits/chosen': -0.5282187461853027, 'logits/rejected': -0.5002726912498474, 'epoch': 0.22} + 22%|██▏ | 151/681 [06:25<22:32, 2.55s/it] 22%|██▏ | 152/681 [06:27<22:45, 2.58s/it] {'loss': 1.0175, 'grad_norm': 20.134836196899414, 'learning_rate': 4.78177092112495e-07, 'fcm_dpo/beta': 0.0068196142092347145, 'fcm_dpo/q_t': 0.38243043422698975, 'fcm_dpo/delta': -0.13479407131671906, 'fcm_dpo/margin': 77.40454864501953, 'margin_dpo/margin_mean': 77.40455627441406, 'margin_dpo/margin_std': 91.77665710449219, 'logps/chosen': -115.86285400390625, 'logps/rejected': -239.24346923828125, 'logps/ref_chosen': -60.16438674926758, 'logps/ref_rejected': -106.14045715332031, 'KL/chosen_KL_mean': -55.69847106933594, 'KL/rejected_KL_mean': -133.10302734375, 'KL/mean': -94.40074157714844, 'KL/std': 75.01636505126953, 'logits/chosen': -0.5108896493911743, 'logits/rejected': -0.5100568532943726, 'epoch': 0.22} + 22%|██▏ | 152/681 [06:27<22:45, 2.58s/it] 22%|██▏ | 153/681 [06:30<22:45, 2.59s/it] {'loss': 1.0897, 'grad_norm': 15.487606048583984, 'learning_rate': 4.776497044244016e-07, 'fcm_dpo/beta': 0.0067241257056593895, 'fcm_dpo/q_t': 0.40355831384658813, 'fcm_dpo/delta': -0.04522576555609703, 'fcm_dpo/margin': 65.91687774658203, 'margin_dpo/margin_mean': 65.91687774658203, 'margin_dpo/margin_std': 99.65748596191406, 'logps/chosen': -112.83929443359375, 'logps/rejected': -208.0967254638672, 'logps/ref_chosen': -56.315277099609375, 'logps/ref_rejected': -85.65583801269531, 'KL/chosen_KL_mean': -56.524017333984375, 'KL/rejected_KL_mean': -122.4408950805664, 'KL/mean': -89.48245239257812, 'KL/std': 81.44053649902344, 'logits/chosen': -0.4853493571281433, 'logits/rejected': -0.48000335693359375, 'epoch': 0.22} + 22%|██▏ | 153/681 [06:30<22:45, 2.59s/it] 23%|██▎ | 154/681 [06:32<23:00, 2.62s/it] {'loss': 1.1211, 'grad_norm': 18.857498168945312, 'learning_rate': 4.771163179548808e-07, 'fcm_dpo/beta': 0.006699780933558941, 'fcm_dpo/q_t': 0.4071503281593323, 'fcm_dpo/delta': -0.023667776957154274, 'fcm_dpo/margin': 63.05192947387695, 'margin_dpo/margin_mean': 63.05193328857422, 'margin_dpo/margin_std': 104.29759216308594, 'logps/chosen': -130.59840393066406, 'logps/rejected': -235.15194702148438, 'logps/ref_chosen': -62.74256896972656, 'logps/ref_rejected': -104.24420166015625, 'KL/chosen_KL_mean': -67.8558349609375, 'KL/rejected_KL_mean': -130.9077606201172, 'KL/mean': -99.38180541992188, 'KL/std': 81.84564208984375, 'logits/chosen': -0.4654182493686676, 'logits/rejected': -0.4673847556114197, 'epoch': 0.23} + 23%|██▎ | 154/681 [06:32<23:00, 2.62s/it] 23%|██▎ | 155/681 [06:35<22:58, 2.62s/it] {'loss': 1.0941, 'grad_norm': 19.28769302368164, 'learning_rate': 4.7657694675916247e-07, 'fcm_dpo/beta': 0.006656583398580551, 'fcm_dpo/q_t': 0.4044332206249237, 'fcm_dpo/delta': -0.026573501527309418, 'fcm_dpo/margin': 63.900962829589844, 'margin_dpo/margin_mean': 63.900962829589844, 'margin_dpo/margin_std': 94.01338195800781, 'logps/chosen': -122.43425750732422, 'logps/rejected': -203.1742401123047, 'logps/ref_chosen': -60.65318298339844, 'logps/ref_rejected': -77.49220275878906, 'KL/chosen_KL_mean': -61.78107452392578, 'KL/rejected_KL_mean': -125.68203735351562, 'KL/mean': -93.73155975341797, 'KL/std': 76.08834838867188, 'logits/chosen': -0.48790478706359863, 'logits/rejected': -0.468170702457428, 'epoch': 0.23} + 23%|██▎ | 155/681 [06:35<22:58, 2.62s/it] 23%|██▎ | 156/681 [06:38<22:58, 2.63s/it] {'loss': 1.276, 'grad_norm': 27.65213966369629, 'learning_rate': 4.7603160505017893e-07, 'fcm_dpo/beta': 0.006699825637042522, 'fcm_dpo/q_t': 0.44462156295776367, 'fcm_dpo/delta': 0.053138453513383865, 'fcm_dpo/margin': 36.46794128417969, 'margin_dpo/margin_mean': 36.46794128417969, 'margin_dpo/margin_std': 106.41633605957031, 'logps/chosen': -156.38607788085938, 'logps/rejected': -200.53143310546875, 'logps/ref_chosen': -69.49188232421875, 'logps/ref_rejected': -77.16929626464844, 'KL/chosen_KL_mean': -86.89419555664062, 'KL/rejected_KL_mean': -123.36213684082031, 'KL/mean': -105.12815856933594, 'KL/std': 80.70298767089844, 'logits/chosen': -0.419753760099411, 'logits/rejected': -0.41109997034072876, 'epoch': 0.23} + 23%|██▎ | 156/681 [06:38<22:58, 2.63s/it] 23%|██▎ | 157/681 [06:40<22:24, 2.57s/it] {'loss': 1.0267, 'grad_norm': 23.294269561767578, 'learning_rate': 4.7548030719819154e-07, 'fcm_dpo/beta': 0.00652310810983181, 'fcm_dpo/q_t': 0.3782350420951843, 'fcm_dpo/delta': -0.14256341755390167, 'fcm_dpo/margin': 81.80308532714844, 'margin_dpo/margin_mean': 81.80308532714844, 'margin_dpo/margin_std': 100.67677307128906, 'logps/chosen': -139.71270751953125, 'logps/rejected': -267.793701171875, 'logps/ref_chosen': -61.368438720703125, 'logps/ref_rejected': -107.64636993408203, 'KL/chosen_KL_mean': -78.34427642822266, 'KL/rejected_KL_mean': -160.1473388671875, 'KL/mean': -119.24581909179688, 'KL/std': 88.42298889160156, 'logits/chosen': -0.40555089712142944, 'logits/rejected': -0.4124807119369507, 'epoch': 0.23} + 23%|██▎ | 157/681 [06:40<22:24, 2.57s/it] 23%|██▎ | 158/681 [06:43<22:31, 2.58s/it] {'loss': 1.0524, 'grad_norm': 20.690141677856445, 'learning_rate': 4.7492306773041136e-07, 'fcm_dpo/beta': 0.006357924081385136, 'fcm_dpo/q_t': 0.386168897151947, 'fcm_dpo/delta': -0.16277411580085754, 'fcm_dpo/margin': 87.10411834716797, 'margin_dpo/margin_mean': 87.10411071777344, 'margin_dpo/margin_std': 131.93109130859375, 'logps/chosen': -136.56137084960938, 'logps/rejected': -279.74725341796875, 'logps/ref_chosen': -57.612918853759766, 'logps/ref_rejected': -113.6946792602539, 'KL/chosen_KL_mean': -78.94845581054688, 'KL/rejected_KL_mean': -166.05255126953125, 'KL/mean': -122.50050354003906, 'KL/std': 109.67285919189453, 'logits/chosen': -0.3853977918624878, 'logits/rejected': -0.4026561975479126, 'epoch': 0.23} + 23%|██▎ | 158/681 [06:43<22:31, 2.58s/it] 23%|██▎ | 159/681 [06:45<22:38, 2.60s/it] {'loss': 1.1467, 'grad_norm': 21.42896270751953, 'learning_rate': 4.743599013306165e-07, 'fcm_dpo/beta': 0.0063509754836559296, 'fcm_dpo/q_t': 0.4156301021575928, 'fcm_dpo/delta': 0.02134835720062256, 'fcm_dpo/margin': 59.67415237426758, 'margin_dpo/margin_mean': 59.674156188964844, 'margin_dpo/margin_std': 104.58900451660156, 'logps/chosen': -171.38925170898438, 'logps/rejected': -238.40176391601562, 'logps/ref_chosen': -81.56034851074219, 'logps/ref_rejected': -88.89871215820312, 'KL/chosen_KL_mean': -89.82890319824219, 'KL/rejected_KL_mean': -149.5030517578125, 'KL/mean': -119.66598510742188, 'KL/std': 97.01811218261719, 'logits/chosen': -0.4047996401786804, 'logits/rejected': -0.37329649925231934, 'epoch': 0.23} + 23%|██▎ | 159/681 [06:45<22:38, 2.60s/it] 23%|██▎ | 160/681 [06:48<22:23, 2.58s/it] {'loss': 1.091, 'grad_norm': 22.98140525817871, 'learning_rate': 4.737908228387656e-07, 'fcm_dpo/beta': 0.006208137609064579, 'fcm_dpo/q_t': 0.39641568064689636, 'fcm_dpo/delta': -0.09941543638706207, 'fcm_dpo/margin': 79.51667785644531, 'margin_dpo/margin_mean': 79.51667785644531, 'margin_dpo/margin_std': 128.43882751464844, 'logps/chosen': -156.92312622070312, 'logps/rejected': -267.9267578125, 'logps/ref_chosen': -65.73088073730469, 'logps/ref_rejected': -97.21781921386719, 'KL/chosen_KL_mean': -91.19225311279297, 'KL/rejected_KL_mean': -170.70892333984375, 'KL/mean': -130.95059204101562, 'KL/std': 102.61758422851562, 'logits/chosen': -0.3908793032169342, 'logits/rejected': -0.38329264521598816, 'epoch': 0.23} + 23%|██▎ | 160/681 [06:48<22:23, 2.58s/it] 24%|██▎ | 161/681 [06:50<21:28, 2.48s/it] {'loss': 1.0944, 'grad_norm': 21.47429084777832, 'learning_rate': 4.7321584725060594e-07, 'fcm_dpo/beta': 0.00617564469575882, 'fcm_dpo/q_t': 0.4049830436706543, 'fcm_dpo/delta': -0.03067013993859291, 'fcm_dpo/margin': 69.5224380493164, 'margin_dpo/margin_mean': 69.5224380493164, 'margin_dpo/margin_std': 102.78611755371094, 'logps/chosen': -130.89999389648438, 'logps/rejected': -231.41690063476562, 'logps/ref_chosen': -52.43647003173828, 'logps/ref_rejected': -83.43095397949219, 'KL/chosen_KL_mean': -78.4635238647461, 'KL/rejected_KL_mean': -147.9859619140625, 'KL/mean': -113.22474670410156, 'KL/std': 81.19598388671875, 'logits/chosen': -0.3746282160282135, 'logits/rejected': -0.3739486634731293, 'epoch': 0.24} + 24%|██▎ | 161/681 [06:50<21:28, 2.48s/it] 24%|██▍ | 162/681 [06:53<22:10, 2.56s/it] {'loss': 1.108, 'grad_norm': 23.615018844604492, 'learning_rate': 4.7263498971727905e-07, 'fcm_dpo/beta': 0.006094048731029034, 'fcm_dpo/q_t': 0.4067990183830261, 'fcm_dpo/delta': -0.02500341832637787, 'fcm_dpo/margin': 69.31608581542969, 'margin_dpo/margin_mean': 69.31608581542969, 'margin_dpo/margin_std': 106.76126861572266, 'logps/chosen': -138.17245483398438, 'logps/rejected': -234.26853942871094, 'logps/ref_chosen': -62.6105842590332, 'logps/ref_rejected': -89.39057922363281, 'KL/chosen_KL_mean': -75.56187438964844, 'KL/rejected_KL_mean': -144.87796020507812, 'KL/mean': -110.21990966796875, 'KL/std': 90.19256591796875, 'logits/chosen': -0.4356382191181183, 'logits/rejected': -0.41989463567733765, 'epoch': 0.24} + 24%|██▍ | 162/681 [06:53<22:10, 2.56s/it] 24%|██▍ | 163/681 [06:55<22:04, 2.56s/it] {'loss': 1.1182, 'grad_norm': 22.441957473754883, 'learning_rate': 4.720482655449212e-07, 'fcm_dpo/beta': 0.006128158885985613, 'fcm_dpo/q_t': 0.41041916608810425, 'fcm_dpo/delta': -0.010875340551137924, 'fcm_dpo/margin': 66.97450256347656, 'margin_dpo/margin_mean': 66.97450256347656, 'margin_dpo/margin_std': 109.2835693359375, 'logps/chosen': -140.66371154785156, 'logps/rejected': -228.03482055664062, 'logps/ref_chosen': -55.021629333496094, 'logps/ref_rejected': -75.418212890625, 'KL/chosen_KL_mean': -85.64208221435547, 'KL/rejected_KL_mean': -152.61660766601562, 'KL/mean': -119.12933349609375, 'KL/std': 90.75747680664062, 'logits/chosen': -0.32167524099349976, 'logits/rejected': -0.3008124828338623, 'epoch': 0.24} + 24%|██▍ | 163/681 [06:56<22:04, 2.56s/it] 24%|██▍ | 164/681 [06:58<21:55, 2.55s/it] {'loss': 1.0369, 'grad_norm': 22.113636016845703, 'learning_rate': 4.714556901942599e-07, 'fcm_dpo/beta': 0.005989897530525923, 'fcm_dpo/q_t': 0.38811802864074707, 'fcm_dpo/delta': -0.10233054310083389, 'fcm_dpo/margin': 82.75530242919922, 'margin_dpo/margin_mean': 82.75530242919922, 'margin_dpo/margin_std': 102.402587890625, 'logps/chosen': -133.78253173828125, 'logps/rejected': -240.56179809570312, 'logps/ref_chosen': -55.64066696166992, 'logps/ref_rejected': -79.66463470458984, 'KL/chosen_KL_mean': -78.14186096191406, 'KL/rejected_KL_mean': -160.89715576171875, 'KL/mean': -119.51951599121094, 'KL/std': 89.18498992919922, 'logits/chosen': -0.3749139904975891, 'logits/rejected': -0.36253267526626587, 'epoch': 0.24} + 24%|██▍ | 164/681 [06:58<21:55, 2.55s/it] 24%|██▍ | 165/681 [07:01<21:53, 2.54s/it] {'loss': 1.1723, 'grad_norm': 23.23404312133789, 'learning_rate': 4.708572792802069e-07, 'fcm_dpo/beta': 0.0060513936914503574, 'fcm_dpo/q_t': 0.4266318678855896, 'fcm_dpo/delta': 0.07517173886299133, 'fcm_dpo/margin': 54.094295501708984, 'margin_dpo/margin_mean': 54.09429931640625, 'margin_dpo/margin_std': 100.12647247314453, 'logps/chosen': -144.8949737548828, 'logps/rejected': -211.34918212890625, 'logps/ref_chosen': -61.310691833496094, 'logps/ref_rejected': -73.67060852050781, 'KL/chosen_KL_mean': -83.58428192138672, 'KL/rejected_KL_mean': -137.67855834960938, 'KL/mean': -110.63142395019531, 'KL/std': 75.4303970336914, 'logits/chosen': -0.37104758620262146, 'logits/rejected': -0.3426979184150696, 'epoch': 0.24} + 24%|██▍ | 165/681 [07:01<21:53, 2.54s/it] 24%|██▍ | 166/681 [07:03<21:00, 2.45s/it] {'loss': 1.0172, 'grad_norm': 18.252317428588867, 'learning_rate': 4.702530485714461e-07, 'fcm_dpo/beta': 0.005887492559850216, 'fcm_dpo/q_t': 0.38029175996780396, 'fcm_dpo/delta': -0.1977493166923523, 'fcm_dpo/margin': 99.35612487792969, 'margin_dpo/margin_mean': 99.35612487792969, 'margin_dpo/margin_std': 138.07797241210938, 'logps/chosen': -125.40187072753906, 'logps/rejected': -271.8695068359375, 'logps/ref_chosen': -50.98360061645508, 'logps/ref_rejected': -98.09512329101562, 'KL/chosen_KL_mean': -74.41826629638672, 'KL/rejected_KL_mean': -173.77439880371094, 'KL/mean': -124.09632873535156, 'KL/std': 109.63383483886719, 'logits/chosen': -0.3085266351699829, 'logits/rejected': -0.3185623288154602, 'epoch': 0.24} + 24%|██▍ | 166/681 [07:03<21:00, 2.45s/it] 25%|██▍ | 167/681 [07:05<21:24, 2.50s/it] {'loss': 0.9685, 'grad_norm': 20.504474639892578, 'learning_rate': 4.6964301399001877e-07, 'fcm_dpo/beta': 0.005670108832418919, 'fcm_dpo/q_t': 0.36611077189445496, 'fcm_dpo/delta': -0.21297289431095123, 'fcm_dpo/margin': 105.83956909179688, 'margin_dpo/margin_mean': 105.83956909179688, 'margin_dpo/margin_std': 115.47872924804688, 'logps/chosen': -125.37691497802734, 'logps/rejected': -276.82281494140625, 'logps/ref_chosen': -50.424095153808594, 'logps/ref_rejected': -96.03042602539062, 'KL/chosen_KL_mean': -74.95281982421875, 'KL/rejected_KL_mean': -180.79238891601562, 'KL/mean': -127.87260437011719, 'KL/std': 100.11784362792969, 'logits/chosen': -0.32652735710144043, 'logits/rejected': -0.32873308658599854, 'epoch': 0.25} + 25%|██▍ | 167/681 [07:05<21:24, 2.50s/it] 25%|██▍ | 168/681 [07:08<21:35, 2.53s/it] {'loss': 1.0813, 'grad_norm': 20.050628662109375, 'learning_rate': 4.690271916109034e-07, 'fcm_dpo/beta': 0.00557487178593874, 'fcm_dpo/q_t': 0.40447282791137695, 'fcm_dpo/delta': -0.027986720204353333, 'fcm_dpo/margin': 76.5313491821289, 'margin_dpo/margin_mean': 76.53134155273438, 'margin_dpo/margin_std': 104.77227783203125, 'logps/chosen': -130.77529907226562, 'logps/rejected': -233.15237426757812, 'logps/ref_chosen': -49.462825775146484, 'logps/ref_rejected': -75.30855560302734, 'KL/chosen_KL_mean': -81.31246948242188, 'KL/rejected_KL_mean': -157.84381103515625, 'KL/mean': -119.5781478881836, 'KL/std': 93.51333618164062, 'logits/chosen': -0.34561973810195923, 'logits/rejected': -0.33558547496795654, 'epoch': 0.25} + 25%|██▍ | 168/681 [07:08<21:35, 2.53s/it] 25%|██▍ | 169/681 [07:11<22:05, 2.59s/it] {'loss': 1.1616, 'grad_norm': 20.767568588256836, 'learning_rate': 4.6840559766159235e-07, 'fcm_dpo/beta': 0.005499421618878841, 'fcm_dpo/q_t': 0.42043811082839966, 'fcm_dpo/delta': -0.07137174159288406, 'fcm_dpo/margin': 66.98680114746094, 'margin_dpo/margin_mean': 66.98680877685547, 'margin_dpo/margin_std': 126.373779296875, 'logps/chosen': -143.52255249023438, 'logps/rejected': -234.05165100097656, 'logps/ref_chosen': -59.803443908691406, 'logps/ref_rejected': -83.34574890136719, 'KL/chosen_KL_mean': -83.71910095214844, 'KL/rejected_KL_mean': -150.70590209960938, 'KL/mean': -117.21250915527344, 'KL/std': 92.11152648925781, 'logits/chosen': -0.38283443450927734, 'logits/rejected': -0.3673766255378723, 'epoch': 0.25} + 25%|██▍ | 169/681 [07:11<22:05, 2.59s/it] 25%|██▍ | 170/681 [07:13<22:34, 2.65s/it] {'loss': 1.0797, 'grad_norm': 18.433393478393555, 'learning_rate': 4.6777824852166437e-07, 'fcm_dpo/beta': 0.005440497770905495, 'fcm_dpo/q_t': 0.4014926552772522, 'fcm_dpo/delta': -0.032409437000751495, 'fcm_dpo/margin': 79.06198120117188, 'margin_dpo/margin_mean': 79.06198120117188, 'margin_dpo/margin_std': 104.669189453125, 'logps/chosen': -123.37162780761719, 'logps/rejected': -228.87918090820312, 'logps/ref_chosen': -49.471771240234375, 'logps/ref_rejected': -75.91734313964844, 'KL/chosen_KL_mean': -73.89985656738281, 'KL/rejected_KL_mean': -152.96185302734375, 'KL/mean': -113.43084716796875, 'KL/std': 87.49028778076172, 'logits/chosen': -0.32298341393470764, 'logits/rejected': -0.3117997348308563, 'epoch': 0.25} + 25%|██▍ | 170/681 [07:14<22:34, 2.65s/it] 25%|██▌ | 171/681 [07:16<21:41, 2.55s/it] {'loss': 1.1822, 'grad_norm': 27.85107421875, 'learning_rate': 4.6714516072235273e-07, 'fcm_dpo/beta': 0.005503002088516951, 'fcm_dpo/q_t': 0.42633184790611267, 'fcm_dpo/delta': 0.05219453573226929, 'fcm_dpo/margin': 63.538963317871094, 'margin_dpo/margin_mean': 63.538963317871094, 'margin_dpo/margin_std': 132.4144287109375, 'logps/chosen': -193.5714569091797, 'logps/rejected': -281.99322509765625, 'logps/ref_chosen': -84.49931335449219, 'logps/ref_rejected': -109.38209533691406, 'KL/chosen_KL_mean': -109.0721435546875, 'KL/rejected_KL_mean': -172.61111450195312, 'KL/mean': -140.8416290283203, 'KL/std': 102.56979370117188, 'logits/chosen': -0.3782072067260742, 'logits/rejected': -0.36336031556129456, 'epoch': 0.25} + 25%|██▌ | 171/681 [07:16<21:41, 2.55s/it] 25%|██▌ | 172/681 [07:18<21:27, 2.53s/it] {'loss': 1.1328, 'grad_norm': 19.190082550048828, 'learning_rate': 4.6650635094610966e-07, 'fcm_dpo/beta': 0.0055364081636071205, 'fcm_dpo/q_t': 0.41614508628845215, 'fcm_dpo/delta': 0.02648979052901268, 'fcm_dpo/margin': 67.64393615722656, 'margin_dpo/margin_mean': 67.64393615722656, 'margin_dpo/margin_std': 111.18234252929688, 'logps/chosen': -163.73956298828125, 'logps/rejected': -248.166259765625, 'logps/ref_chosen': -68.65391540527344, 'logps/ref_rejected': -85.43667602539062, 'KL/chosen_KL_mean': -95.08564758300781, 'KL/rejected_KL_mean': -162.72958374023438, 'KL/mean': -128.90762329101562, 'KL/std': 98.64192199707031, 'logits/chosen': -0.3858957886695862, 'logits/rejected': -0.36709922552108765, 'epoch': 0.25} + 25%|██▌ | 172/681 [07:18<21:27, 2.53s/it] 25%|██▌ | 173/681 [07:21<21:14, 2.51s/it] {'loss': 1.1132, 'grad_norm': 20.20654296875, 'learning_rate': 4.6586183602616687e-07, 'fcm_dpo/beta': 0.005593603476881981, 'fcm_dpo/q_t': 0.4151589870452881, 'fcm_dpo/delta': 0.027135606855154037, 'fcm_dpo/margin': 66.7756118774414, 'margin_dpo/margin_mean': 66.7756118774414, 'margin_dpo/margin_std': 95.39866638183594, 'logps/chosen': -149.4697265625, 'logps/rejected': -231.87838745117188, 'logps/ref_chosen': -63.050880432128906, 'logps/ref_rejected': -78.68392181396484, 'KL/chosen_KL_mean': -86.4188461303711, 'KL/rejected_KL_mean': -153.1944580078125, 'KL/mean': -119.80665588378906, 'KL/std': 91.71368408203125, 'logits/chosen': -0.4084116816520691, 'logits/rejected': -0.38007980585098267, 'epoch': 0.25} + 25%|██▌ | 173/681 [07:21<21:14, 2.51s/it] 26%|██▌ | 174/681 [07:23<21:14, 2.51s/it] {'loss': 1.089, 'grad_norm': 21.535198211669922, 'learning_rate': 4.652116329460919e-07, 'fcm_dpo/beta': 0.005577293690294027, 'fcm_dpo/q_t': 0.40260159969329834, 'fcm_dpo/delta': -0.045830775052309036, 'fcm_dpo/margin': 79.48760986328125, 'margin_dpo/margin_mean': 79.48760986328125, 'margin_dpo/margin_std': 115.94807434082031, 'logps/chosen': -135.59408569335938, 'logps/rejected': -263.62994384765625, 'logps/ref_chosen': -53.36296844482422, 'logps/ref_rejected': -101.91120910644531, 'KL/chosen_KL_mean': -82.23112487792969, 'KL/rejected_KL_mean': -161.71873474121094, 'KL/mean': -121.97492980957031, 'KL/std': 96.5755615234375, 'logits/chosen': -0.320295512676239, 'logits/rejected': -0.338248610496521, 'epoch': 0.26} + 26%|██▌ | 174/681 [07:23<21:14, 2.51s/it] 26%|██▌ | 175/681 [07:26<21:27, 2.54s/it] {'loss': 0.959, 'grad_norm': 27.933094024658203, 'learning_rate': 4.645557588393406e-07, 'fcm_dpo/beta': 0.00536087341606617, 'fcm_dpo/q_t': 0.3664923906326294, 'fcm_dpo/delta': -0.19861072301864624, 'fcm_dpo/margin': 109.42562866210938, 'margin_dpo/margin_mean': 109.42562866210938, 'margin_dpo/margin_std': 109.13046264648438, 'logps/chosen': -121.00715637207031, 'logps/rejected': -274.52081298828125, 'logps/ref_chosen': -45.417762756347656, 'logps/ref_rejected': -89.50579833984375, 'KL/chosen_KL_mean': -75.58938598632812, 'KL/rejected_KL_mean': -185.0150146484375, 'KL/mean': -130.3022003173828, 'KL/std': 103.86154174804688, 'logits/chosen': -0.31206628680229187, 'logits/rejected': -0.29794448614120483, 'epoch': 0.26} + 26%|██▌ | 175/681 [07:26<21:27, 2.54s/it] 26%|██▌ | 176/681 [07:28<20:45, 2.47s/it] {'loss': 1.0458, 'grad_norm': 19.47554588317871, 'learning_rate': 4.638942309888058e-07, 'fcm_dpo/beta': 0.0052553461864590645, 'fcm_dpo/q_t': 0.3937104344367981, 'fcm_dpo/delta': -0.08676035702228546, 'fcm_dpo/margin': 91.84110260009766, 'margin_dpo/margin_mean': 91.84110260009766, 'margin_dpo/margin_std': 117.94707489013672, 'logps/chosen': -131.26634216308594, 'logps/rejected': -268.21356201171875, 'logps/ref_chosen': -50.452842712402344, 'logps/ref_rejected': -95.5589599609375, 'KL/chosen_KL_mean': -80.81350708007812, 'KL/rejected_KL_mean': -172.65460205078125, 'KL/mean': -126.73405456542969, 'KL/std': 101.8584976196289, 'logits/chosen': -0.3381134867668152, 'logits/rejected': -0.35593676567077637, 'epoch': 0.26} + 26%|██▌ | 176/681 [07:28<20:45, 2.47s/it] 26%|██▌ | 177/681 [07:31<21:04, 2.51s/it] {'loss': 1.053, 'grad_norm': 30.431970596313477, 'learning_rate': 4.6322706682636137e-07, 'fcm_dpo/beta': 0.005180859938263893, 'fcm_dpo/q_t': 0.39563536643981934, 'fcm_dpo/delta': -0.07426586002111435, 'fcm_dpo/margin': 90.87483215332031, 'margin_dpo/margin_mean': 90.87483215332031, 'margin_dpo/margin_std': 118.72608947753906, 'logps/chosen': -156.4290771484375, 'logps/rejected': -281.98126220703125, 'logps/ref_chosen': -61.216468811035156, 'logps/ref_rejected': -95.89378356933594, 'KL/chosen_KL_mean': -95.21261596679688, 'KL/rejected_KL_mean': -186.08746337890625, 'KL/mean': -140.65003967285156, 'KL/std': 111.35872650146484, 'logits/chosen': -0.35000866651535034, 'logits/rejected': -0.3415108621120453, 'epoch': 0.26} + 26%|██▌ | 177/681 [07:31<21:04, 2.51s/it] 26%|██▌ | 178/681 [07:33<20:57, 2.50s/it] {'loss': 1.0007, 'grad_norm': 27.06403350830078, 'learning_rate': 4.6255428393240354e-07, 'fcm_dpo/beta': 0.004986546002328396, 'fcm_dpo/q_t': 0.37574303150177, 'fcm_dpo/delta': -0.1844998002052307, 'fcm_dpo/margin': 114.88732147216797, 'margin_dpo/margin_mean': 114.88731384277344, 'margin_dpo/margin_std': 142.7379150390625, 'logps/chosen': -163.16500854492188, 'logps/rejected': -325.15283203125, 'logps/ref_chosen': -58.26478958129883, 'logps/ref_rejected': -105.3653335571289, 'KL/chosen_KL_mean': -104.90020751953125, 'KL/rejected_KL_mean': -219.78750610351562, 'KL/mean': -162.3438720703125, 'KL/std': 130.68798828125, 'logits/chosen': -0.25030016899108887, 'logits/rejected': -0.2403268814086914, 'epoch': 0.26} + 26%|██▌ | 178/681 [07:33<20:57, 2.50s/it] 26%|██▋ | 179/681 [07:36<21:27, 2.56s/it] {'loss': 1.1354, 'grad_norm': 36.69063949584961, 'learning_rate': 4.6187590003538724e-07, 'fcm_dpo/beta': 0.004935364704579115, 'fcm_dpo/q_t': 0.4115809500217438, 'fcm_dpo/delta': -0.0025902092456817627, 'fcm_dpo/margin': 81.46771240234375, 'margin_dpo/margin_mean': 81.46771240234375, 'margin_dpo/margin_std': 140.07054138183594, 'logps/chosen': -169.73358154296875, 'logps/rejected': -280.6708068847656, 'logps/ref_chosen': -61.05832290649414, 'logps/ref_rejected': -90.52782440185547, 'KL/chosen_KL_mean': -108.67526245117188, 'KL/rejected_KL_mean': -190.14297485351562, 'KL/mean': -149.40911865234375, 'KL/std': 112.27867126464844, 'logits/chosen': -0.3055553138256073, 'logits/rejected': -0.315255343914032, 'epoch': 0.26} + 26%|██▋ | 179/681 [07:36<21:27, 2.56s/it] 26%|██▋ | 180/681 [07:38<21:02, 2.52s/it] {'loss': 1.0254, 'grad_norm': 20.31671142578125, 'learning_rate': 4.611919330113591e-07, 'fcm_dpo/beta': 0.0048674289137125015, 'fcm_dpo/q_t': 0.38437995314598083, 'fcm_dpo/delta': -0.11503924429416656, 'fcm_dpo/margin': 104.57914733886719, 'margin_dpo/margin_mean': 104.57914733886719, 'margin_dpo/margin_std': 125.9128646850586, 'logps/chosen': -147.77914428710938, 'logps/rejected': -296.2274169921875, 'logps/ref_chosen': -54.34272003173828, 'logps/ref_rejected': -98.21183776855469, 'KL/chosen_KL_mean': -93.43641662597656, 'KL/rejected_KL_mean': -198.01556396484375, 'KL/mean': -145.72601318359375, 'KL/std': 100.83357238769531, 'logits/chosen': -0.28088757395744324, 'logits/rejected': -0.27398407459259033, 'epoch': 0.26} + 26%|██▋ | 180/681 [07:38<21:02, 2.52s/it] 27%|██▋ | 181/681 [07:41<21:14, 2.55s/it] {'loss': 1.1687, 'grad_norm': 17.186668395996094, 'learning_rate': 4.605024008834863e-07, 'fcm_dpo/beta': 0.00491193775087595, 'fcm_dpo/q_t': 0.42752861976623535, 'fcm_dpo/delta': 0.0835873931646347, 'fcm_dpo/margin': 64.92919158935547, 'margin_dpo/margin_mean': 64.92919921875, 'margin_dpo/margin_std': 116.01119995117188, 'logps/chosen': -136.22694396972656, 'logps/rejected': -207.81185913085938, 'logps/ref_chosen': -55.000457763671875, 'logps/ref_rejected': -61.656166076660156, 'KL/chosen_KL_mean': -81.22648620605469, 'KL/rejected_KL_mean': -146.1556854248047, 'KL/mean': -113.69107818603516, 'KL/std': 92.112060546875, 'logits/chosen': -0.3439704179763794, 'logits/rejected': -0.32105350494384766, 'epoch': 0.27} + 27%|██▋ | 181/681 [07:41<21:14, 2.55s/it] 27%|██▋ | 182/681 [07:44<21:35, 2.60s/it] {'loss': 1.0138, 'grad_norm': 19.202186584472656, 'learning_rate': 4.598073218215817e-07, 'fcm_dpo/beta': 0.0048008207231760025, 'fcm_dpo/q_t': 0.3778771162033081, 'fcm_dpo/delta': -0.14640963077545166, 'fcm_dpo/margin': 111.99693298339844, 'margin_dpo/margin_mean': 111.99693298339844, 'margin_dpo/margin_std': 133.52532958984375, 'logps/chosen': -118.15953063964844, 'logps/rejected': -278.5701904296875, 'logps/ref_chosen': -41.107852935791016, 'logps/ref_rejected': -89.5215835571289, 'KL/chosen_KL_mean': -77.05168151855469, 'KL/rejected_KL_mean': -189.04859924316406, 'KL/mean': -133.05014038085938, 'KL/std': 110.59321594238281, 'logits/chosen': -0.30555886030197144, 'logits/rejected': -0.3154027462005615, 'epoch': 0.27} + 27%|██▋ | 182/681 [07:44<21:35, 2.60s/it] 27%|██▋ | 183/681 [07:46<20:54, 2.52s/it] {'loss': 1.1824, 'grad_norm': 21.197261810302734, 'learning_rate': 4.5910671414162484e-07, 'fcm_dpo/beta': 0.00474231131374836, 'fcm_dpo/q_t': 0.4329318106174469, 'fcm_dpo/delta': -0.043582916259765625, 'fcm_dpo/margin': 60.724796295166016, 'margin_dpo/margin_mean': 60.72479248046875, 'margin_dpo/margin_std': 101.34217834472656, 'logps/chosen': -171.76657104492188, 'logps/rejected': -250.94252014160156, 'logps/ref_chosen': -57.52456283569336, 'logps/ref_rejected': -75.97572326660156, 'KL/chosen_KL_mean': -114.24200439453125, 'KL/rejected_KL_mean': -174.966796875, 'KL/mean': -144.60440063476562, 'KL/std': 92.102294921875, 'logits/chosen': -0.32367801666259766, 'logits/rejected': -0.31501567363739014, 'epoch': 0.27} + 27%|██▋ | 183/681 [07:46<20:54, 2.52s/it] 27%|██▋ | 184/681 [07:49<21:22, 2.58s/it] {'loss': 1.1707, 'grad_norm': 20.073440551757812, 'learning_rate': 4.5840059630527985e-07, 'fcm_dpo/beta': 0.004741538781672716, 'fcm_dpo/q_t': 0.430799275636673, 'fcm_dpo/delta': -0.00162951136007905, 'fcm_dpo/margin': 64.11538696289062, 'margin_dpo/margin_mean': 64.11538696289062, 'margin_dpo/margin_std': 109.54376220703125, 'logps/chosen': -153.45635986328125, 'logps/rejected': -235.6608428955078, 'logps/ref_chosen': -58.544952392578125, 'logps/ref_rejected': -76.63406372070312, 'KL/chosen_KL_mean': -94.91139221191406, 'KL/rejected_KL_mean': -159.0267791748047, 'KL/mean': -126.96908569335938, 'KL/std': 86.96229553222656, 'logits/chosen': -0.35681042075157166, 'logits/rejected': -0.34759992361068726, 'epoch': 0.27} + 27%|██▋ | 184/681 [07:49<21:22, 2.58s/it] 27%|██▋ | 185/681 [07:51<20:58, 2.54s/it] {'loss': 1.2336, 'grad_norm': 19.03368377685547, 'learning_rate': 4.5768898691940836e-07, 'fcm_dpo/beta': 0.004851914010941982, 'fcm_dpo/q_t': 0.44705960154533386, 'fcm_dpo/delta': 0.15838554501533508, 'fcm_dpo/margin': 50.61490249633789, 'margin_dpo/margin_mean': 50.61490249633789, 'margin_dpo/margin_std': 120.33627319335938, 'logps/chosen': -164.19671630859375, 'logps/rejected': -226.5483856201172, 'logps/ref_chosen': -62.025848388671875, 'logps/ref_rejected': -73.7625961303711, 'KL/chosen_KL_mean': -102.1708755493164, 'KL/rejected_KL_mean': -152.78579711914062, 'KL/mean': -127.47833251953125, 'KL/std': 99.92794799804688, 'logits/chosen': -0.33275556564331055, 'logits/rejected': -0.3096786439418793, 'epoch': 0.27} + 27%|██▋ | 185/681 [07:51<20:58, 2.54s/it] 27%|██▋ | 186/681 [07:54<20:50, 2.53s/it] {'loss': 1.0476, 'grad_norm': 30.832712173461914, 'learning_rate': 4.5697190473557947e-07, 'fcm_dpo/beta': 0.00484071671962738, 'fcm_dpo/q_t': 0.39502984285354614, 'fcm_dpo/delta': -0.06518108397722244, 'fcm_dpo/margin': 95.45602416992188, 'margin_dpo/margin_mean': 95.45602416992188, 'margin_dpo/margin_std': 115.35481262207031, 'logps/chosen': -163.056884765625, 'logps/rejected': -277.23187255859375, 'logps/ref_chosen': -69.35346984863281, 'logps/ref_rejected': -88.07244873046875, 'KL/chosen_KL_mean': -93.70341491699219, 'KL/rejected_KL_mean': -189.159423828125, 'KL/mean': -141.43142700195312, 'KL/std': 100.74044799804688, 'logits/chosen': -0.35813000798225403, 'logits/rejected': -0.3332071304321289, 'epoch': 0.27} + 27%|██▋ | 186/681 [07:54<20:50, 2.53s/it] 27%|██▋ | 187/681 [07:56<20:13, 2.46s/it] {'loss': 1.0976, 'grad_norm': 24.247724533081055, 'learning_rate': 4.5624936864957555e-07, 'fcm_dpo/beta': 0.004859459586441517, 'fcm_dpo/q_t': 0.4108119606971741, 'fcm_dpo/delta': 0.011555861681699753, 'fcm_dpo/margin': 79.94140625, 'margin_dpo/margin_mean': 79.94140625, 'margin_dpo/margin_std': 105.25481414794922, 'logps/chosen': -141.647216796875, 'logps/rejected': -250.80126953125, 'logps/ref_chosen': -52.7564582824707, 'logps/ref_rejected': -81.96910095214844, 'KL/chosen_KL_mean': -88.89076232910156, 'KL/rejected_KL_mean': -168.83216857910156, 'KL/mean': -128.86146545410156, 'KL/std': 96.70646667480469, 'logits/chosen': -0.32936474680900574, 'logits/rejected': -0.3230019807815552, 'epoch': 0.27} + 27%|██▋ | 187/681 [07:56<20:13, 2.46s/it] 28%|██▊ | 188/681 [07:59<20:26, 2.49s/it] {'loss': 1.0429, 'grad_norm': 34.55025863647461, 'learning_rate': 4.5552139770089454e-07, 'fcm_dpo/beta': 0.004792365245521069, 'fcm_dpo/q_t': 0.393817663192749, 'fcm_dpo/delta': -0.07467129826545715, 'fcm_dpo/margin': 98.32379150390625, 'margin_dpo/margin_mean': 98.32378387451172, 'margin_dpo/margin_std': 119.06608581542969, 'logps/chosen': -133.17462158203125, 'logps/rejected': -271.62335205078125, 'logps/ref_chosen': -49.415489196777344, 'logps/ref_rejected': -89.54043579101562, 'KL/chosen_KL_mean': -83.75914001464844, 'KL/rejected_KL_mean': -182.08291625976562, 'KL/mean': -132.92103576660156, 'KL/std': 108.79667663574219, 'logits/chosen': -0.33150649070739746, 'logits/rejected': -0.338370680809021, 'epoch': 0.28} + 28%|██▊ | 188/681 [07:59<20:26, 2.49s/it] 28%|██▊ | 189/681 [08:01<20:11, 2.46s/it] {'loss': 1.1305, 'grad_norm': 29.447795867919922, 'learning_rate': 4.5478801107224794e-07, 'fcm_dpo/beta': 0.00478787487372756, 'fcm_dpo/q_t': 0.41624516248703003, 'fcm_dpo/delta': 0.019290301948785782, 'fcm_dpo/margin': 79.6611328125, 'margin_dpo/margin_mean': 79.6611328125, 'margin_dpo/margin_std': 133.5395050048828, 'logps/chosen': -151.4128875732422, 'logps/rejected': -250.84242248535156, 'logps/ref_chosen': -52.39896011352539, 'logps/ref_rejected': -72.16735076904297, 'KL/chosen_KL_mean': -99.01392364501953, 'KL/rejected_KL_mean': -178.67507934570312, 'KL/mean': -138.84449768066406, 'KL/std': 109.14806365966797, 'logits/chosen': -0.334136962890625, 'logits/rejected': -0.31781691312789917, 'epoch': 0.28} + 28%|██▊ | 189/681 [08:01<20:11, 2.46s/it] 28%|██▊ | 190/681 [08:03<19:37, 2.40s/it] {'loss': 1.0824, 'grad_norm': 18.84038543701172, 'learning_rate': 4.5404922808905543e-07, 'fcm_dpo/beta': 0.004786365665495396, 'fcm_dpo/q_t': 0.39865192770957947, 'fcm_dpo/delta': -0.061055850237607956, 'fcm_dpo/margin': 95.53968811035156, 'margin_dpo/margin_mean': 95.53968811035156, 'margin_dpo/margin_std': 135.1875, 'logps/chosen': -169.75241088867188, 'logps/rejected': -303.1595458984375, 'logps/ref_chosen': -64.68305969238281, 'logps/ref_rejected': -102.55052185058594, 'KL/chosen_KL_mean': -105.0693359375, 'KL/rejected_KL_mean': -200.60903930664062, 'KL/mean': -152.8391876220703, 'KL/std': 116.60220336914062, 'logits/chosen': -0.34232112765312195, 'logits/rejected': -0.3296660780906677, 'epoch': 0.28} + 28%|██▊ | 190/681 [08:03<19:37, 2.40s/it] 28%|██▊ | 191/681 [08:06<20:26, 2.50s/it] {'loss': 0.9544, 'grad_norm': 21.52570152282715, 'learning_rate': 4.5330506821893565e-07, 'fcm_dpo/beta': 0.0045428648591041565, 'fcm_dpo/q_t': 0.36233189702033997, 'fcm_dpo/delta': -0.2434038668870926, 'fcm_dpo/margin': 137.9830322265625, 'margin_dpo/margin_mean': 137.9830322265625, 'margin_dpo/margin_std': 151.53329467773438, 'logps/chosen': -167.56503295898438, 'logps/rejected': -347.02886962890625, 'logps/ref_chosen': -68.65887451171875, 'logps/ref_rejected': -110.1396713256836, 'KL/chosen_KL_mean': -98.90615844726562, 'KL/rejected_KL_mean': -236.88919067382812, 'KL/mean': -167.89767456054688, 'KL/std': 137.1860809326172, 'logits/chosen': -0.34583958983421326, 'logits/rejected': -0.3241385817527771, 'epoch': 0.28} + 28%|██▊ | 191/681 [08:06<20:26, 2.50s/it] 28%|██▊ | 192/681 [08:08<20:19, 2.49s/it] {'loss': 1.115, 'grad_norm': 25.540145874023438, 'learning_rate': 4.5255555107119336e-07, 'fcm_dpo/beta': 0.004507323727011681, 'fcm_dpo/q_t': 0.4093227982521057, 'fcm_dpo/delta': -0.014000019058585167, 'fcm_dpo/margin': 91.70048522949219, 'margin_dpo/margin_mean': 91.70048522949219, 'margin_dpo/margin_std': 148.0252685546875, 'logps/chosen': -197.26083374023438, 'logps/rejected': -322.55572509765625, 'logps/ref_chosen': -69.72691345214844, 'logps/ref_rejected': -103.32135009765625, 'KL/chosen_KL_mean': -127.5339126586914, 'KL/rejected_KL_mean': -219.23440551757812, 'KL/mean': -173.3841552734375, 'KL/std': 119.96187591552734, 'logits/chosen': -0.2959958016872406, 'logits/rejected': -0.296117901802063, 'epoch': 0.28} + 28%|██▊ | 192/681 [08:08<20:19, 2.49s/it] 28%|██▊ | 193/681 [08:11<20:00, 2.46s/it] {'loss': 1.2572, 'grad_norm': 29.541507720947266, 'learning_rate': 4.5180069639630236e-07, 'fcm_dpo/beta': 0.004510689992457628, 'fcm_dpo/q_t': 0.44376885890960693, 'fcm_dpo/delta': 0.040593214333057404, 'fcm_dpo/margin': 52.65251159667969, 'margin_dpo/margin_mean': 52.65251159667969, 'margin_dpo/margin_std': 139.58816528320312, 'logps/chosen': -187.24169921875, 'logps/rejected': -256.11126708984375, 'logps/ref_chosen': -60.19049835205078, 'logps/ref_rejected': -76.40755462646484, 'KL/chosen_KL_mean': -127.05119323730469, 'KL/rejected_KL_mean': -179.70370483398438, 'KL/mean': -153.37745666503906, 'KL/std': 109.23312377929688, 'logits/chosen': -0.2886780798435211, 'logits/rejected': -0.27803605794906616, 'epoch': 0.28} + 28%|██▊ | 193/681 [08:11<20:00, 2.46s/it] 28%|██▊ | 194/681 [08:13<19:39, 2.42s/it] {'loss': 1.0818, 'grad_norm': 18.051904678344727, 'learning_rate': 4.510405240853854e-07, 'fcm_dpo/beta': 0.004522847011685371, 'fcm_dpo/q_t': 0.40855488181114197, 'fcm_dpo/delta': 0.01011504977941513, 'fcm_dpo/margin': 86.26107788085938, 'margin_dpo/margin_mean': 86.26107788085938, 'margin_dpo/margin_std': 98.7254638671875, 'logps/chosen': -116.146728515625, 'logps/rejected': -225.25222778320312, 'logps/ref_chosen': -37.84037399291992, 'logps/ref_rejected': -60.684783935546875, 'KL/chosen_KL_mean': -78.30635833740234, 'KL/rejected_KL_mean': -164.56744384765625, 'KL/mean': -121.43690490722656, 'KL/std': 90.03581237792969, 'logits/chosen': -0.25177642703056335, 'logits/rejected': -0.2375318706035614, 'epoch': 0.28} + 28%|██▊ | 194/681 [08:13<19:39, 2.42s/it] 29%|██▊ | 195/681 [08:16<20:20, 2.51s/it] {'loss': 1.0745, 'grad_norm': 21.63848876953125, 'learning_rate': 4.5027505416968985e-07, 'fcm_dpo/beta': 0.004522291943430901, 'fcm_dpo/q_t': 0.4035479426383972, 'fcm_dpo/delta': -0.022319436073303223, 'fcm_dpo/margin': 93.17718505859375, 'margin_dpo/margin_mean': 93.17718505859375, 'margin_dpo/margin_std': 118.36261749267578, 'logps/chosen': -180.14068603515625, 'logps/rejected': -315.197265625, 'logps/ref_chosen': -54.891571044921875, 'logps/ref_rejected': -96.77095794677734, 'KL/chosen_KL_mean': -125.2491226196289, 'KL/rejected_KL_mean': -218.42633056640625, 'KL/mean': -171.83770751953125, 'KL/std': 114.07196807861328, 'logits/chosen': -0.2544304132461548, 'logits/rejected': -0.27329152822494507, 'epoch': 0.29} + 29%|██▊ | 195/681 [08:16<20:20, 2.51s/it] 29%|██▉ | 196/681 [08:18<20:23, 2.52s/it] {'loss': 1.0612, 'grad_norm': 17.6580753326416, 'learning_rate': 4.495043068200599e-07, 'fcm_dpo/beta': 0.0044434089213609695, 'fcm_dpo/q_t': 0.3950856924057007, 'fcm_dpo/delta': -0.07507769018411636, 'fcm_dpo/margin': 105.90283966064453, 'margin_dpo/margin_mean': 105.90283966064453, 'margin_dpo/margin_std': 138.68316650390625, 'logps/chosen': -149.79953002929688, 'logps/rejected': -278.51007080078125, 'logps/ref_chosen': -53.245243072509766, 'logps/ref_rejected': -76.05294799804688, 'KL/chosen_KL_mean': -96.55429077148438, 'KL/rejected_KL_mean': -202.45712280273438, 'KL/mean': -149.50570678710938, 'KL/std': 115.13066101074219, 'logits/chosen': -0.29589658975601196, 'logits/rejected': -0.2812860608100891, 'epoch': 0.29} + 29%|██▉ | 196/681 [08:18<20:23, 2.52s/it] 29%|██▉ | 197/681 [08:21<20:31, 2.54s/it] {'loss': 1.113, 'grad_norm': 18.291038513183594, 'learning_rate': 4.4872830234640493e-07, 'fcm_dpo/beta': 0.004487765487283468, 'fcm_dpo/q_t': 0.41627001762390137, 'fcm_dpo/delta': 0.03489822521805763, 'fcm_dpo/margin': 81.57408142089844, 'margin_dpo/margin_mean': 81.5740737915039, 'margin_dpo/margin_std': 112.14630889892578, 'logps/chosen': -159.1299285888672, 'logps/rejected': -257.4925842285156, 'logps/ref_chosen': -60.42033386230469, 'logps/ref_rejected': -77.20890808105469, 'KL/chosen_KL_mean': -98.7095947265625, 'KL/rejected_KL_mean': -180.28367614746094, 'KL/mean': -139.4966278076172, 'KL/std': 99.37328338623047, 'logits/chosen': -0.28296738862991333, 'logits/rejected': -0.27726900577545166, 'epoch': 0.29} + 29%|██▉ | 197/681 [08:21<20:31, 2.54s/it] 29%|██▉ | 198/681 [08:24<20:36, 2.56s/it] {'loss': 1.0634, 'grad_norm': 21.28237533569336, 'learning_rate': 4.479470611971645e-07, 'fcm_dpo/beta': 0.00444161519408226, 'fcm_dpo/q_t': 0.39844024181365967, 'fcm_dpo/delta': -0.06254196166992188, 'fcm_dpo/margin': 103.49454498291016, 'margin_dpo/margin_mean': 103.49453735351562, 'margin_dpo/margin_std': 140.37669372558594, 'logps/chosen': -166.69100952148438, 'logps/rejected': -312.3926086425781, 'logps/ref_chosen': -55.03618621826172, 'logps/ref_rejected': -97.24325561523438, 'KL/chosen_KL_mean': -111.65481567382812, 'KL/rejected_KL_mean': -215.14935302734375, 'KL/mean': -163.402099609375, 'KL/std': 123.84089660644531, 'logits/chosen': -0.29576927423477173, 'logits/rejected': -0.2956548035144806, 'epoch': 0.29} + 29%|██▉ | 198/681 [08:24<20:36, 2.56s/it] 29%|██▉ | 199/681 [08:26<20:53, 2.60s/it] {'loss': 1.0671, 'grad_norm': 25.73158836364746, 'learning_rate': 4.471606039587695e-07, 'fcm_dpo/beta': 0.004363642539829016, 'fcm_dpo/q_t': 0.39733150601387024, 'fcm_dpo/delta': -0.057444989681243896, 'fcm_dpo/margin': 104.01869201660156, 'margin_dpo/margin_mean': 104.01869201660156, 'margin_dpo/margin_std': 136.8724365234375, 'logps/chosen': -162.328125, 'logps/rejected': -294.16619873046875, 'logps/ref_chosen': -56.828826904296875, 'logps/ref_rejected': -84.64820861816406, 'KL/chosen_KL_mean': -105.4992904663086, 'KL/rejected_KL_mean': -209.5179901123047, 'KL/mean': -157.50863647460938, 'KL/std': 112.66731262207031, 'logits/chosen': -0.3182041049003601, 'logits/rejected': -0.3008995056152344, 'epoch': 0.29} + 29%|██▉ | 199/681 [08:26<20:53, 2.60s/it] 29%|██▉ | 200/681 [08:29<20:52, 2.60s/it] {'loss': 1.091, 'grad_norm': 21.32215690612793, 'learning_rate': 4.4636895135509966e-07, 'fcm_dpo/beta': 0.004329666495323181, 'fcm_dpo/q_t': 0.4015011191368103, 'fcm_dpo/delta': -0.047804687172174454, 'fcm_dpo/margin': 102.84647369384766, 'margin_dpo/margin_mean': 102.84646606445312, 'margin_dpo/margin_std': 154.68792724609375, 'logps/chosen': -157.58596801757812, 'logps/rejected': -287.97381591796875, 'logps/ref_chosen': -53.06706237792969, 'logps/ref_rejected': -80.60843658447266, 'KL/chosen_KL_mean': -104.5189208984375, 'KL/rejected_KL_mean': -207.36537170410156, 'KL/mean': -155.94215393066406, 'KL/std': 121.28309631347656, 'logits/chosen': -0.2771759629249573, 'logits/rejected': -0.25995227694511414, 'epoch': 0.29} + 29%|██▉ | 200/681 [08:29<20:52, 2.60s/it] 30%|██▉ | 201/681 [08:32<20:46, 2.60s/it] {'loss': 1.0822, 'grad_norm': 19.30495262145996, 'learning_rate': 4.455721242469372e-07, 'fcm_dpo/beta': 0.004319292958825827, 'fcm_dpo/q_t': 0.4014075696468353, 'fcm_dpo/delta': -0.03909054771065712, 'fcm_dpo/margin': 101.25636291503906, 'margin_dpo/margin_mean': 101.25636291503906, 'margin_dpo/margin_std': 141.83740234375, 'logps/chosen': -183.1385955810547, 'logps/rejected': -323.80096435546875, 'logps/ref_chosen': -75.4022216796875, 'logps/ref_rejected': -114.80821990966797, 'KL/chosen_KL_mean': -107.73637390136719, 'KL/rejected_KL_mean': -208.99273681640625, 'KL/mean': -158.36456298828125, 'KL/std': 125.75869750976562, 'logits/chosen': -0.3616677224636078, 'logits/rejected': -0.3575963079929352, 'epoch': 0.3} + 30%|██▉ | 201/681 [08:32<20:46, 2.60s/it] 30%|██▉ | 202/681 [08:34<20:46, 2.60s/it] {'loss': 1.1852, 'grad_norm': 21.131303787231445, 'learning_rate': 4.4477014363141755e-07, 'fcm_dpo/beta': 0.004360673949122429, 'fcm_dpo/q_t': 0.4304364323616028, 'fcm_dpo/delta': 0.08905763924121857, 'fcm_dpo/margin': 71.93099975585938, 'margin_dpo/margin_mean': 71.93099975585938, 'margin_dpo/margin_std': 141.54080200195312, 'logps/chosen': -161.19305419921875, 'logps/rejected': -270.0077819824219, 'logps/ref_chosen': -50.101318359375, 'logps/ref_rejected': -86.98503112792969, 'KL/chosen_KL_mean': -111.09174346923828, 'KL/rejected_KL_mean': -183.02273559570312, 'KL/mean': -147.0572509765625, 'KL/std': 109.2310791015625, 'logits/chosen': -0.3155418336391449, 'logits/rejected': -0.32926225662231445, 'epoch': 0.3} + 30%|██▉ | 202/681 [08:34<20:46, 2.60s/it] 30%|██▉ | 203/681 [08:37<21:01, 2.64s/it] {'loss': 1.1006, 'grad_norm': 20.558147430419922, 'learning_rate': 4.439630306414758e-07, 'fcm_dpo/beta': 0.004380302503705025, 'fcm_dpo/q_t': 0.41108816862106323, 'fcm_dpo/delta': 0.011728717014193535, 'fcm_dpo/margin': 88.74185180664062, 'margin_dpo/margin_mean': 88.74185180664062, 'margin_dpo/margin_std': 121.73361206054688, 'logps/chosen': -169.769775390625, 'logps/rejected': -283.79791259765625, 'logps/ref_chosen': -60.60969543457031, 'logps/ref_rejected': -85.89596557617188, 'KL/chosen_KL_mean': -109.16009521484375, 'KL/rejected_KL_mean': -197.90194702148438, 'KL/mean': -153.531005859375, 'KL/std': 110.72138977050781, 'logits/chosen': -0.3222927153110504, 'logits/rejected': -0.3095286190509796, 'epoch': 0.3} + 30%|██▉ | 203/681 [08:37<21:01, 2.64s/it] 30%|██▉ | 204/681 [08:40<21:10, 2.66s/it] {'loss': 1.1481, 'grad_norm': 21.446792602539062, 'learning_rate': 4.431508065452897e-07, 'fcm_dpo/beta': 0.0044115157797932625, 'fcm_dpo/q_t': 0.4205179810523987, 'fcm_dpo/delta': 0.04016388952732086, 'fcm_dpo/margin': 81.89998626708984, 'margin_dpo/margin_mean': 81.89998626708984, 'margin_dpo/margin_std': 141.7510528564453, 'logps/chosen': -201.25341796875, 'logps/rejected': -290.684326171875, 'logps/ref_chosen': -80.16496276855469, 'logps/ref_rejected': -87.69590759277344, 'KL/chosen_KL_mean': -121.08845520019531, 'KL/rejected_KL_mean': -202.98841857910156, 'KL/mean': -162.0384521484375, 'KL/std': 120.96675109863281, 'logits/chosen': -0.42557811737060547, 'logits/rejected': -0.3860868215560913, 'epoch': 0.3} + 30%|██▉ | 204/681 [08:40<21:10, 2.66s/it] 30%|███ | 205/681 [08:42<21:04, 2.66s/it] {'loss': 1.0557, 'grad_norm': 22.160913467407227, 'learning_rate': 4.4233349274571974e-07, 'fcm_dpo/beta': 0.0043370481580495834, 'fcm_dpo/q_t': 0.39190131425857544, 'fcm_dpo/delta': -0.07669728994369507, 'fcm_dpo/margin': 108.635009765625, 'margin_dpo/margin_mean': 108.635009765625, 'margin_dpo/margin_std': 135.24069213867188, 'logps/chosen': -176.93746948242188, 'logps/rejected': -311.31280517578125, 'logps/ref_chosen': -59.384735107421875, 'logps/ref_rejected': -85.12505340576172, 'KL/chosen_KL_mean': -117.55274200439453, 'KL/rejected_KL_mean': -226.18775939941406, 'KL/mean': -171.8702392578125, 'KL/std': 123.94536590576172, 'logits/chosen': -0.3477054834365845, 'logits/rejected': -0.3181983232498169, 'epoch': 0.3} + 30%|███ | 205/681 [08:42<21:04, 2.66s/it] 30%|███ | 206/681 [08:45<20:10, 2.55s/it] {'loss': 1.0153, 'grad_norm': 24.27658462524414, 'learning_rate': 4.415111107797445e-07, 'fcm_dpo/beta': 0.004273426253348589, 'fcm_dpo/q_t': 0.3835982382297516, 'fcm_dpo/delta': -0.10639244318008423, 'fcm_dpo/margin': 117.12614440917969, 'margin_dpo/margin_mean': 117.12614440917969, 'margin_dpo/margin_std': 126.34098052978516, 'logps/chosen': -154.2874755859375, 'logps/rejected': -323.4026184082031, 'logps/ref_chosen': -46.964500427246094, 'logps/ref_rejected': -98.9534912109375, 'KL/chosen_KL_mean': -107.3229751586914, 'KL/rejected_KL_mean': -224.44912719726562, 'KL/mean': -165.88604736328125, 'KL/std': 115.81060791015625, 'logits/chosen': -0.25635069608688354, 'logits/rejected': -0.2581319212913513, 'epoch': 0.3} + 30%|███ | 206/681 [08:45<20:10, 2.55s/it] 30%|███ | 207/681 [08:47<20:08, 2.55s/it] {'loss': 0.993, 'grad_norm': 23.397769927978516, 'learning_rate': 4.4068368231789365e-07, 'fcm_dpo/beta': 0.0041627888567745686, 'fcm_dpo/q_t': 0.37616121768951416, 'fcm_dpo/delta': -0.1690816581249237, 'fcm_dpo/margin': 134.46800231933594, 'margin_dpo/margin_mean': 134.46800231933594, 'margin_dpo/margin_std': 155.12615966796875, 'logps/chosen': -155.7295684814453, 'logps/rejected': -318.589111328125, 'logps/ref_chosen': -56.05625915527344, 'logps/ref_rejected': -84.44779968261719, 'KL/chosen_KL_mean': -99.67330932617188, 'KL/rejected_KL_mean': -234.1413116455078, 'KL/mean': -166.9073028564453, 'KL/std': 132.98760986328125, 'logits/chosen': -0.3611038029193878, 'logits/rejected': -0.3351825773715973, 'epoch': 0.3} + 30%|███ | 207/681 [08:47<20:08, 2.55s/it] 31%|███ | 208/681 [08:50<20:18, 2.58s/it] {'loss': 1.0912, 'grad_norm': 26.311290740966797, 'learning_rate': 4.398512291636768e-07, 'fcm_dpo/beta': 0.004091139882802963, 'fcm_dpo/q_t': 0.40143436193466187, 'fcm_dpo/delta': -0.037054985761642456, 'fcm_dpo/margin': 106.39715576171875, 'margin_dpo/margin_mean': 106.39714813232422, 'margin_dpo/margin_std': 155.67181396484375, 'logps/chosen': -220.87564086914062, 'logps/rejected': -354.4920654296875, 'logps/ref_chosen': -67.06761169433594, 'logps/ref_rejected': -94.28689575195312, 'KL/chosen_KL_mean': -153.80804443359375, 'KL/rejected_KL_mean': -260.2052001953125, 'KL/mean': -207.006591796875, 'KL/std': 128.0139617919922, 'logits/chosen': -0.37917637825012207, 'logits/rejected': -0.3616452217102051, 'epoch': 0.31} + 31%|███ | 208/681 [08:50<20:18, 2.58s/it] 31%|███ | 209/681 [08:52<19:42, 2.51s/it] {'loss': 1.1266, 'grad_norm': 29.005294799804688, 'learning_rate': 4.3901377325300857e-07, 'fcm_dpo/beta': 0.004103041719645262, 'fcm_dpo/q_t': 0.4129374623298645, 'fcm_dpo/delta': 0.01593739353120327, 'fcm_dpo/margin': 93.7518081665039, 'margin_dpo/margin_mean': 93.75180053710938, 'margin_dpo/margin_std': 147.88401794433594, 'logps/chosen': -186.9494171142578, 'logps/rejected': -305.4610595703125, 'logps/ref_chosen': -56.18169403076172, 'logps/ref_rejected': -80.94152069091797, 'KL/chosen_KL_mean': -130.76773071289062, 'KL/rejected_KL_mean': -224.51953125, 'KL/mean': -177.64361572265625, 'KL/std': 116.13549041748047, 'logits/chosen': -0.28290677070617676, 'logits/rejected': -0.27193692326545715, 'epoch': 0.31} + 31%|███ | 209/681 [08:52<19:42, 2.51s/it] 31%|███ | 210/681 [08:54<19:31, 2.49s/it] {'loss': 1.0743, 'grad_norm': 26.729955673217773, 'learning_rate': 4.381713366536311e-07, 'fcm_dpo/beta': 0.0040941243059933186, 'fcm_dpo/q_t': 0.400329053401947, 'fcm_dpo/delta': -0.04651525244116783, 'fcm_dpo/margin': 108.5168228149414, 'margin_dpo/margin_mean': 108.51681518554688, 'margin_dpo/margin_std': 146.48370361328125, 'logps/chosen': -165.3853759765625, 'logps/rejected': -304.2120056152344, 'logps/ref_chosen': -46.371822357177734, 'logps/ref_rejected': -76.68162536621094, 'KL/chosen_KL_mean': -119.01356506347656, 'KL/rejected_KL_mean': -227.5303955078125, 'KL/mean': -173.27197265625, 'KL/std': 119.99595642089844, 'logits/chosen': -0.2923848628997803, 'logits/rejected': -0.2843049168586731, 'epoch': 0.31} + 31%|███ | 210/681 [08:55<19:31, 2.49s/it] 31%|███ | 211/681 [08:57<19:03, 2.43s/it] {'loss': 1.1436, 'grad_norm': 36.12271499633789, 'learning_rate': 4.373239415645323e-07, 'fcm_dpo/beta': 0.004082635045051575, 'fcm_dpo/q_t': 0.41833657026290894, 'fcm_dpo/delta': 0.021537447348237038, 'fcm_dpo/margin': 92.89524841308594, 'margin_dpo/margin_mean': 92.89524841308594, 'margin_dpo/margin_std': 161.31678771972656, 'logps/chosen': -249.98175048828125, 'logps/rejected': -350.765625, 'logps/ref_chosen': -78.93235778808594, 'logps/ref_rejected': -86.82098388671875, 'KL/chosen_KL_mean': -171.04937744140625, 'KL/rejected_KL_mean': -263.94464111328125, 'KL/mean': -217.49700927734375, 'KL/std': 137.9586944580078, 'logits/chosen': -0.3229391574859619, 'logits/rejected': -0.2848234474658966, 'epoch': 0.31} + 31%|███ | 211/681 [08:57<19:03, 2.43s/it] 31%|███ | 212/681 [08:59<19:14, 2.46s/it] {'loss': 1.0326, 'grad_norm': 24.483768463134766, 'learning_rate': 4.3647161031536086e-07, 'fcm_dpo/beta': 0.003975285217165947, 'fcm_dpo/q_t': 0.3824244737625122, 'fcm_dpo/delta': -0.13405390083789825, 'fcm_dpo/margin': 132.0928955078125, 'margin_dpo/margin_mean': 132.0928955078125, 'margin_dpo/margin_std': 163.81600952148438, 'logps/chosen': -198.94692993164062, 'logps/rejected': -375.9006652832031, 'logps/ref_chosen': -58.19701385498047, 'logps/ref_rejected': -103.05785369873047, 'KL/chosen_KL_mean': -140.74990844726562, 'KL/rejected_KL_mean': -272.84283447265625, 'KL/mean': -206.79635620117188, 'KL/std': 147.50723266601562, 'logits/chosen': -0.2860155701637268, 'logits/rejected': -0.27637436985969543, 'epoch': 0.31} + 31%|███ | 212/681 [08:59<19:14, 2.46s/it] 31%|███▏ | 213/681 [09:02<19:32, 2.51s/it] {'loss': 1.0354, 'grad_norm': 35.1922607421875, 'learning_rate': 4.3561436536583774e-07, 'fcm_dpo/beta': 0.0039049675688147545, 'fcm_dpo/q_t': 0.38795384764671326, 'fcm_dpo/delta': -0.09785507619380951, 'fcm_dpo/margin': 126.10995483398438, 'margin_dpo/margin_mean': 126.10995483398438, 'margin_dpo/margin_std': 152.4547119140625, 'logps/chosen': -198.36026000976562, 'logps/rejected': -350.8721923828125, 'logps/ref_chosen': -67.51271057128906, 'logps/ref_rejected': -93.91471862792969, 'KL/chosen_KL_mean': -130.84754943847656, 'KL/rejected_KL_mean': -256.9574890136719, 'KL/mean': -193.90252685546875, 'KL/std': 128.05874633789062, 'logits/chosen': -0.3229708671569824, 'logits/rejected': -0.29631006717681885, 'epoch': 0.31} + 31%|███▏ | 213/681 [09:02<19:32, 2.51s/it] 31%|███▏ | 214/681 [09:04<18:54, 2.43s/it] {'loss': 1.0675, 'grad_norm': 24.354455947875977, 'learning_rate': 4.3475222930516473e-07, 'fcm_dpo/beta': 0.003874241840094328, 'fcm_dpo/q_t': 0.4002940356731415, 'fcm_dpo/delta': -0.04264100641012192, 'fcm_dpo/margin': 113.76599884033203, 'margin_dpo/margin_mean': 113.76599884033203, 'margin_dpo/margin_std': 146.8253173828125, 'logps/chosen': -153.70370483398438, 'logps/rejected': -303.3822326660156, 'logps/ref_chosen': -41.604888916015625, 'logps/ref_rejected': -77.51741027832031, 'KL/chosen_KL_mean': -112.09881591796875, 'KL/rejected_KL_mean': -225.86480712890625, 'KL/mean': -168.9818115234375, 'KL/std': 126.0855712890625, 'logits/chosen': -0.23961499333381653, 'logits/rejected': -0.24341589212417603, 'epoch': 0.31} + 31%|███▏ | 214/681 [09:04<18:54, 2.43s/it] 32%|███▏ | 215/681 [09:07<19:21, 2.49s/it] {'loss': 1.0476, 'grad_norm': 24.70524787902832, 'learning_rate': 4.3388522485142885e-07, 'fcm_dpo/beta': 0.0038247781340032816, 'fcm_dpo/q_t': 0.3958727717399597, 'fcm_dpo/delta': -0.05516364052891731, 'fcm_dpo/margin': 118.28255462646484, 'margin_dpo/margin_mean': 118.28255462646484, 'margin_dpo/margin_std': 136.10702514648438, 'logps/chosen': -184.97625732421875, 'logps/rejected': -339.9442138671875, 'logps/ref_chosen': -53.279266357421875, 'logps/ref_rejected': -89.96464538574219, 'KL/chosen_KL_mean': -131.69699096679688, 'KL/rejected_KL_mean': -249.97955322265625, 'KL/mean': -190.83827209472656, 'KL/std': 130.4374237060547, 'logits/chosen': -0.2865249514579773, 'logits/rejected': -0.2768559455871582, 'epoch': 0.32} + 32%|███▏ | 215/681 [09:07<19:21, 2.49s/it] 32%|███▏ | 216/681 [09:10<20:08, 2.60s/it] {'loss': 1.0813, 'grad_norm': 23.230796813964844, 'learning_rate': 4.330133748510036e-07, 'fcm_dpo/beta': 0.0038109051529318094, 'fcm_dpo/q_t': 0.3997488021850586, 'fcm_dpo/delta': -0.04979248717427254, 'fcm_dpo/margin': 117.41053009033203, 'margin_dpo/margin_mean': 117.4105224609375, 'margin_dpo/margin_std': 166.27999877929688, 'logps/chosen': -182.37811279296875, 'logps/rejected': -328.09979248046875, 'logps/ref_chosen': -48.887794494628906, 'logps/ref_rejected': -77.19892883300781, 'KL/chosen_KL_mean': -133.49032592773438, 'KL/rejected_KL_mean': -250.90084838867188, 'KL/mean': -192.19558715820312, 'KL/std': 134.2762451171875, 'logits/chosen': -0.3021494150161743, 'logits/rejected': -0.28650131821632385, 'epoch': 0.32} + 32%|███▏ | 216/681 [09:10<20:08, 2.60s/it] 32%|███▏ | 217/681 [09:12<20:04, 2.60s/it] {'loss': 1.0148, 'grad_norm': 21.052268981933594, 'learning_rate': 4.3213670227794757e-07, 'fcm_dpo/beta': 0.003715306520462036, 'fcm_dpo/q_t': 0.3847663104534149, 'fcm_dpo/delta': -0.11458480358123779, 'fcm_dpo/margin': 136.8664093017578, 'margin_dpo/margin_mean': 136.8664093017578, 'margin_dpo/margin_std': 154.27218627929688, 'logps/chosen': -185.30398559570312, 'logps/rejected': -372.4034118652344, 'logps/ref_chosen': -49.845306396484375, 'logps/ref_rejected': -100.07832336425781, 'KL/chosen_KL_mean': -135.45867919921875, 'KL/rejected_KL_mean': -272.3250732421875, 'KL/mean': -203.8918914794922, 'KL/std': 136.49188232421875, 'logits/chosen': -0.270561158657074, 'logits/rejected': -0.26448899507522583, 'epoch': 0.32} + 32%|███▏ | 217/681 [09:12<20:04, 2.60s/it] 32%|███▏ | 218/681 [09:15<20:04, 2.60s/it] {'loss': 1.1123, 'grad_norm': 20.975133895874023, 'learning_rate': 4.3125523023339815e-07, 'fcm_dpo/beta': 0.0036980193108320236, 'fcm_dpo/q_t': 0.41142043471336365, 'fcm_dpo/delta': 0.0066223908215761185, 'fcm_dpo/margin': 106.43551635742188, 'margin_dpo/margin_mean': 106.43551635742188, 'margin_dpo/margin_std': 159.1250457763672, 'logps/chosen': -201.72775268554688, 'logps/rejected': -337.4329833984375, 'logps/ref_chosen': -58.576683044433594, 'logps/ref_rejected': -87.84639739990234, 'KL/chosen_KL_mean': -143.15106201171875, 'KL/rejected_KL_mean': -249.58657836914062, 'KL/mean': -196.36883544921875, 'KL/std': 135.85345458984375, 'logits/chosen': -0.2958596646785736, 'logits/rejected': -0.28984978795051575, 'epoch': 0.32} + 32%|███▏ | 218/681 [09:15<20:04, 2.60s/it] 32%|███▏ | 219/681 [09:17<20:04, 2.61s/it] {'loss': 1.1677, 'grad_norm': 29.867891311645508, 'learning_rate': 4.303689819449636e-07, 'fcm_dpo/beta': 0.0037533333525061607, 'fcm_dpo/q_t': 0.4223693311214447, 'fcm_dpo/delta': 0.05362574756145477, 'fcm_dpo/margin': 92.58407592773438, 'margin_dpo/margin_mean': 92.58406066894531, 'margin_dpo/margin_std': 172.917236328125, 'logps/chosen': -213.9130096435547, 'logps/rejected': -331.24365234375, 'logps/ref_chosen': -61.083858489990234, 'logps/ref_rejected': -85.83042907714844, 'KL/chosen_KL_mean': -152.82916259765625, 'KL/rejected_KL_mean': -245.41322326660156, 'KL/mean': -199.12118530273438, 'KL/std': 141.51058959960938, 'logits/chosen': -0.31183797121047974, 'logits/rejected': -0.30414023995399475, 'epoch': 0.32} + 32%|███▏ | 219/681 [09:18<20:04, 2.61s/it] 32%|███▏ | 220/681 [09:20<20:01, 2.61s/it] {'loss': 1.1694, 'grad_norm': 25.090055465698242, 'learning_rate': 4.2947798076611047e-07, 'fcm_dpo/beta': 0.0037990869022905827, 'fcm_dpo/q_t': 0.43133461475372314, 'fcm_dpo/delta': 0.10947298258543015, 'fcm_dpo/margin': 77.35502624511719, 'margin_dpo/margin_mean': 77.35502624511719, 'margin_dpo/margin_std': 126.34854888916016, 'logps/chosen': -242.8420867919922, 'logps/rejected': -337.8513488769531, 'logps/ref_chosen': -70.03128051757812, 'logps/ref_rejected': -87.68551635742188, 'KL/chosen_KL_mean': -172.810791015625, 'KL/rejected_KL_mean': -250.16583251953125, 'KL/mean': -211.48831176757812, 'KL/std': 122.16136169433594, 'logits/chosen': -0.2918081283569336, 'logits/rejected': -0.26791825890541077, 'epoch': 0.32} + 32%|███▏ | 220/681 [09:20<20:01, 2.61s/it] 32%|███▏ | 221/681 [09:23<19:44, 2.57s/it] {'loss': 0.9327, 'grad_norm': 26.41898536682129, 'learning_rate': 4.285822501755485e-07, 'fcm_dpo/beta': 0.003700793255120516, 'fcm_dpo/q_t': 0.35590487718582153, 'fcm_dpo/delta': -0.24753707647323608, 'fcm_dpo/margin': 170.87864685058594, 'margin_dpo/margin_mean': 170.878662109375, 'margin_dpo/margin_std': 160.57461547851562, 'logps/chosen': -199.983154296875, 'logps/rejected': -425.1748046875, 'logps/ref_chosen': -52.15470886230469, 'logps/ref_rejected': -106.46768188476562, 'KL/chosen_KL_mean': -147.82846069335938, 'KL/rejected_KL_mean': -318.70709228515625, 'KL/mean': -233.2677764892578, 'KL/std': 156.06951904296875, 'logits/chosen': -0.2831432819366455, 'logits/rejected': -0.2893243730068207, 'epoch': 0.32} + 32%|███▏ | 221/681 [09:23<19:44, 2.57s/it] 33%|███▎ | 222/681 [09:25<19:39, 2.57s/it] {'loss': 1.0542, 'grad_norm': 20.189659118652344, 'learning_rate': 4.276818137766118e-07, 'fcm_dpo/beta': 0.003622027114033699, 'fcm_dpo/q_t': 0.39438772201538086, 'fcm_dpo/delta': -0.06790776550769806, 'fcm_dpo/margin': 128.28977966308594, 'margin_dpo/margin_mean': 128.28977966308594, 'margin_dpo/margin_std': 161.27833557128906, 'logps/chosen': -216.27880859375, 'logps/rejected': -383.5986633300781, 'logps/ref_chosen': -60.971099853515625, 'logps/ref_rejected': -100.00115203857422, 'KL/chosen_KL_mean': -155.30770874023438, 'KL/rejected_KL_mean': -283.5975341796875, 'KL/mean': -219.45260620117188, 'KL/std': 144.82785034179688, 'logits/chosen': -0.31003278493881226, 'logits/rejected': -0.31088048219680786, 'epoch': 0.33} + 33%|███▎ | 222/681 [09:25<19:39, 2.57s/it] 33%|███▎ | 223/681 [09:27<18:43, 2.45s/it] {'loss': 1.1327, 'grad_norm': 24.665771484375, 'learning_rate': 4.2677669529663686e-07, 'fcm_dpo/beta': 0.003598616225644946, 'fcm_dpo/q_t': 0.4134790301322937, 'fcm_dpo/delta': 0.01532889436930418, 'fcm_dpo/margin': 107.05719757080078, 'margin_dpo/margin_mean': 107.05718994140625, 'margin_dpo/margin_std': 178.87998962402344, 'logps/chosen': -217.2523956298828, 'logps/rejected': -354.4940185546875, 'logps/ref_chosen': -52.64057540893555, 'logps/ref_rejected': -82.82502746582031, 'KL/chosen_KL_mean': -164.61181640625, 'KL/rejected_KL_mean': -271.66900634765625, 'KL/mean': -218.14041137695312, 'KL/std': 140.00912475585938, 'logits/chosen': -0.252638041973114, 'logits/rejected': -0.24833783507347107, 'epoch': 0.33} + 33%|███▎ | 223/681 [09:27<18:43, 2.45s/it] 33%|███▎ | 224/681 [09:29<17:53, 2.35s/it] {'loss': 1.089, 'grad_norm': 26.398706436157227, 'learning_rate': 4.2586691858633747e-07, 'fcm_dpo/beta': 0.003553580492734909, 'fcm_dpo/q_t': 0.4029054641723633, 'fcm_dpo/delta': -0.04542648792266846, 'fcm_dpo/margin': 124.37174987792969, 'margin_dpo/margin_mean': 124.37174987792969, 'margin_dpo/margin_std': 181.30401611328125, 'logps/chosen': -194.6832275390625, 'logps/rejected': -347.5760498046875, 'logps/ref_chosen': -48.59541320800781, 'logps/ref_rejected': -77.11648559570312, 'KL/chosen_KL_mean': -146.0878143310547, 'KL/rejected_KL_mean': -270.4595642089844, 'KL/mean': -208.273681640625, 'KL/std': 157.24331665039062, 'logits/chosen': -0.27615243196487427, 'logits/rejected': -0.25664016604423523, 'epoch': 0.33} + 33%|███▎ | 224/681 [09:29<17:53, 2.35s/it] 33%|███▎ | 225/681 [09:32<17:41, 2.33s/it] {'loss': 1.0314, 'grad_norm': 20.279191970825195, 'learning_rate': 4.249525076191759e-07, 'fcm_dpo/beta': 0.003504401072859764, 'fcm_dpo/q_t': 0.38479962944984436, 'fcm_dpo/delta': -0.12130744010210037, 'fcm_dpo/margin': 146.93002319335938, 'margin_dpo/margin_mean': 146.93002319335938, 'margin_dpo/margin_std': 186.25558471679688, 'logps/chosen': -227.44268798828125, 'logps/rejected': -416.275146484375, 'logps/ref_chosen': -58.000465393066406, 'logps/ref_rejected': -99.90291595458984, 'KL/chosen_KL_mean': -169.44223022460938, 'KL/rejected_KL_mean': -316.37225341796875, 'KL/mean': -242.9072265625, 'KL/std': 153.35971069335938, 'logits/chosen': -0.2664515972137451, 'logits/rejected': -0.2539185881614685, 'epoch': 0.33} + 33%|███▎ | 225/681 [09:32<17:41, 2.33s/it] 33%|███▎ | 226/681 [09:34<18:18, 2.41s/it] {'loss': 1.0981, 'grad_norm': 25.379802703857422, 'learning_rate': 4.2403348649073167e-07, 'fcm_dpo/beta': 0.0034582829102873802, 'fcm_dpo/q_t': 0.4084652364253998, 'fcm_dpo/delta': -0.01282452791929245, 'fcm_dpo/margin': 118.97482299804688, 'margin_dpo/margin_mean': 118.9748306274414, 'margin_dpo/margin_std': 169.6277313232422, 'logps/chosen': -199.84872436523438, 'logps/rejected': -338.61248779296875, 'logps/ref_chosen': -58.898799896240234, 'logps/ref_rejected': -78.68775939941406, 'KL/chosen_KL_mean': -140.94992065429688, 'KL/rejected_KL_mean': -259.92474365234375, 'KL/mean': -200.4373321533203, 'KL/std': 148.1170654296875, 'logits/chosen': -0.35269731283187866, 'logits/rejected': -0.31248384714126587, 'epoch': 0.33} + 33%|███▎ | 226/681 [09:34<18:18, 2.41s/it] 33%|███▎ | 227/681 [09:37<18:05, 2.39s/it] {'loss': 1.0323, 'grad_norm': 25.049428939819336, 'learning_rate': 4.2310987941806615e-07, 'fcm_dpo/beta': 0.0034146863035857677, 'fcm_dpo/q_t': 0.38793981075286865, 'fcm_dpo/delta': -0.09776041656732559, 'fcm_dpo/margin': 144.17855834960938, 'margin_dpo/margin_mean': 144.17855834960938, 'margin_dpo/margin_std': 172.8520050048828, 'logps/chosen': -222.4957275390625, 'logps/rejected': -407.0144958496094, 'logps/ref_chosen': -59.072181701660156, 'logps/ref_rejected': -99.41236877441406, 'KL/chosen_KL_mean': -163.4235382080078, 'KL/rejected_KL_mean': -307.60211181640625, 'KL/mean': -235.5128173828125, 'KL/std': 166.22787475585938, 'logits/chosen': -0.37827032804489136, 'logits/rejected': -0.3669503331184387, 'epoch': 0.33} + 33%|███▎ | 227/681 [09:37<18:05, 2.39s/it] 33%|███▎ | 228/681 [09:39<18:58, 2.51s/it] {'loss': 1.139, 'grad_norm': 24.645570755004883, 'learning_rate': 4.2218171073908463e-07, 'fcm_dpo/beta': 0.0034392657689750195, 'fcm_dpo/q_t': 0.4187896251678467, 'fcm_dpo/delta': 0.046966154128313065, 'fcm_dpo/margin': 103.10169982910156, 'margin_dpo/margin_mean': 103.1017074584961, 'margin_dpo/margin_std': 165.435546875, 'logps/chosen': -229.6811981201172, 'logps/rejected': -357.94036865234375, 'logps/ref_chosen': -65.89128875732422, 'logps/ref_rejected': -91.04875183105469, 'KL/chosen_KL_mean': -163.7899169921875, 'KL/rejected_KL_mean': -266.8916015625, 'KL/mean': -215.34075927734375, 'KL/std': 135.03140258789062, 'logits/chosen': -0.35644814372062683, 'logits/rejected': -0.3388446569442749, 'epoch': 0.33} + 33%|███▎ | 228/681 [09:40<18:58, 2.51s/it] 34%|███▎ | 229/681 [09:42<18:50, 2.50s/it] {'loss': 1.1093, 'grad_norm': 30.438304901123047, 'learning_rate': 4.212490049118951e-07, 'fcm_dpo/beta': 0.003454534336924553, 'fcm_dpo/q_t': 0.41169029474258423, 'fcm_dpo/delta': 0.018272558227181435, 'fcm_dpo/margin': 110.69569396972656, 'margin_dpo/margin_mean': 110.69569396972656, 'margin_dpo/margin_std': 158.08216857910156, 'logps/chosen': -229.31982421875, 'logps/rejected': -353.8365478515625, 'logps/ref_chosen': -70.70637512207031, 'logps/ref_rejected': -84.52741241455078, 'KL/chosen_KL_mean': -158.61346435546875, 'KL/rejected_KL_mean': -269.30914306640625, 'KL/mean': -213.9613037109375, 'KL/std': 150.13076782226562, 'logits/chosen': -0.44706737995147705, 'logits/rejected': -0.41668009757995605, 'epoch': 0.34} + 34%|███▎ | 229/681 [09:42<18:50, 2.50s/it] 34%|███▍ | 230/681 [09:44<18:29, 2.46s/it] {'loss': 0.9808, 'grad_norm': 34.35503005981445, 'learning_rate': 4.203117865141635e-07, 'fcm_dpo/beta': 0.0033752424642443657, 'fcm_dpo/q_t': 0.37446969747543335, 'fcm_dpo/delta': -0.1495116651058197, 'fcm_dpo/margin': 160.35025024414062, 'margin_dpo/margin_mean': 160.35025024414062, 'margin_dpo/margin_std': 157.2830810546875, 'logps/chosen': -157.69544982910156, 'logps/rejected': -364.3856201171875, 'logps/ref_chosen': -39.282005310058594, 'logps/ref_rejected': -85.62191009521484, 'KL/chosen_KL_mean': -118.41344451904297, 'KL/rejected_KL_mean': -278.76373291015625, 'KL/mean': -198.5885772705078, 'KL/std': 142.54483032226562, 'logits/chosen': -0.33029061555862427, 'logits/rejected': -0.3350130319595337, 'epoch': 0.34} + 34%|███▍ | 230/681 [09:44<18:29, 2.46s/it] 34%|███▍ | 231/681 [09:47<18:48, 2.51s/it] {'loss': 1.0974, 'grad_norm': 26.74052619934082, 'learning_rate': 4.1937008024246625e-07, 'fcm_dpo/beta': 0.0033622784540057182, 'fcm_dpo/q_t': 0.41282835602760315, 'fcm_dpo/delta': 0.017147505655884743, 'fcm_dpo/margin': 114.06205749511719, 'margin_dpo/margin_mean': 114.06205749511719, 'margin_dpo/margin_std': 149.9913330078125, 'logps/chosen': -207.95321655273438, 'logps/rejected': -332.86273193359375, 'logps/ref_chosen': -63.27644348144531, 'logps/ref_rejected': -74.1239013671875, 'KL/chosen_KL_mean': -144.67678833007812, 'KL/rejected_KL_mean': -258.7388610839844, 'KL/mean': -201.7078094482422, 'KL/std': 129.312255859375, 'logits/chosen': -0.4000471532344818, 'logits/rejected': -0.3726590871810913, 'epoch': 0.34} + 34%|███▍ | 231/681 [09:47<18:48, 2.51s/it] 34%|███▍ | 232/681 [09:50<19:12, 2.57s/it] {'loss': 1.1607, 'grad_norm': 25.34986686706543, 'learning_rate': 4.1842391091163933e-07, 'fcm_dpo/beta': 0.0034015290439128876, 'fcm_dpo/q_t': 0.4293164014816284, 'fcm_dpo/delta': 0.0846027284860611, 'fcm_dpo/margin': 93.53819274902344, 'margin_dpo/margin_mean': 93.53819274902344, 'margin_dpo/margin_std': 159.80599975585938, 'logps/chosen': -254.642822265625, 'logps/rejected': -361.40936279296875, 'logps/ref_chosen': -70.74876403808594, 'logps/ref_rejected': -83.97706604003906, 'KL/chosen_KL_mean': -183.89407348632812, 'KL/rejected_KL_mean': -277.4322814941406, 'KL/mean': -230.66317749023438, 'KL/std': 155.87942504882812, 'logits/chosen': -0.3858756422996521, 'logits/rejected': -0.3642328381538391, 'epoch': 0.34} + 34%|███▍ | 232/681 [09:50<19:12, 2.57s/it] 34%|███▍ | 233/681 [09:52<19:31, 2.62s/it] {'loss': 1.0648, 'grad_norm': 26.03354835510254, 'learning_rate': 4.174733034541245e-07, 'fcm_dpo/beta': 0.0033752245362848043, 'fcm_dpo/q_t': 0.3929908871650696, 'fcm_dpo/delta': -0.09889530390501022, 'fcm_dpo/margin': 146.382568359375, 'margin_dpo/margin_mean': 146.382568359375, 'margin_dpo/margin_std': 210.76010131835938, 'logps/chosen': -222.91705322265625, 'logps/rejected': -421.89678955078125, 'logps/ref_chosen': -54.8829345703125, 'logps/ref_rejected': -107.4800796508789, 'KL/chosen_KL_mean': -168.03411865234375, 'KL/rejected_KL_mean': -314.41668701171875, 'KL/mean': -241.22540283203125, 'KL/std': 164.896728515625, 'logits/chosen': -0.38083887100219727, 'logits/rejected': -0.384868860244751, 'epoch': 0.34} + 34%|███▍ | 233/681 [09:52<19:31, 2.62s/it] 34%|███▍ | 234/681 [09:55<19:37, 2.63s/it] {'loss': 1.0276, 'grad_norm': 51.05461502075195, 'learning_rate': 4.165182829193126e-07, 'fcm_dpo/beta': 0.0032870229333639145, 'fcm_dpo/q_t': 0.38880789279937744, 'fcm_dpo/delta': -0.09375564754009247, 'fcm_dpo/margin': 148.3834686279297, 'margin_dpo/margin_mean': 148.3834686279297, 'margin_dpo/margin_std': 165.46307373046875, 'logps/chosen': -206.4053192138672, 'logps/rejected': -410.7008972167969, 'logps/ref_chosen': -44.094520568847656, 'logps/ref_rejected': -100.00663757324219, 'KL/chosen_KL_mean': -162.310791015625, 'KL/rejected_KL_mean': -310.69427490234375, 'KL/mean': -236.50253295898438, 'KL/std': 145.2280731201172, 'logits/chosen': -0.33859947323799133, 'logits/rejected': -0.36448922753334045, 'epoch': 0.34} + 34%|███▍ | 234/681 [09:55<19:37, 2.63s/it] 35%|███▍ | 235/681 [09:57<19:08, 2.58s/it] {'loss': 1.1602, 'grad_norm': 30.565860748291016, 'learning_rate': 4.1555887447288255e-07, 'fcm_dpo/beta': 0.0033407763112336397, 'fcm_dpo/q_t': 0.42465054988861084, 'fcm_dpo/delta': 0.07434496283531189, 'fcm_dpo/margin': 98.07133483886719, 'margin_dpo/margin_mean': 98.07133483886719, 'margin_dpo/margin_std': 167.26129150390625, 'logps/chosen': -255.794677734375, 'logps/rejected': -382.0231628417969, 'logps/ref_chosen': -62.237911224365234, 'logps/ref_rejected': -90.39506530761719, 'KL/chosen_KL_mean': -193.5567626953125, 'KL/rejected_KL_mean': -291.62811279296875, 'KL/mean': -242.59243774414062, 'KL/std': 139.42709350585938, 'logits/chosen': -0.4063182473182678, 'logits/rejected': -0.388034462928772, 'epoch': 0.35} + 35%|███▍ | 235/681 [09:58<19:08, 2.58s/it] 35%|███▍ | 236/681 [10:00<19:21, 2.61s/it] {'loss': 0.9904, 'grad_norm': 56.562007904052734, 'learning_rate': 4.1459510339613946e-07, 'fcm_dpo/beta': 0.003285345621407032, 'fcm_dpo/q_t': 0.38063254952430725, 'fcm_dpo/delta': -0.11568379402160645, 'fcm_dpo/margin': 155.1697235107422, 'margin_dpo/margin_mean': 155.1697235107422, 'margin_dpo/margin_std': 141.39013671875, 'logps/chosen': -191.2288055419922, 'logps/rejected': -400.56878662109375, 'logps/ref_chosen': -49.34136199951172, 'logps/ref_rejected': -103.51162719726562, 'KL/chosen_KL_mean': -141.887451171875, 'KL/rejected_KL_mean': -297.0571594238281, 'KL/mean': -219.4722900390625, 'KL/std': 149.41790771484375, 'logits/chosen': -0.3532152771949768, 'logits/rejected': -0.35226863622665405, 'epoch': 0.35} + 35%|███▍ | 236/681 [10:00<19:21, 2.61s/it] 35%|███▍ | 237/681 [10:03<19:17, 2.61s/it] {'loss': 1.1075, 'grad_norm': 27.002674102783203, 'learning_rate': 4.136269950853473e-07, 'fcm_dpo/beta': 0.0032739704474806786, 'fcm_dpo/q_t': 0.4108693599700928, 'fcm_dpo/delta': 0.00752119068056345, 'fcm_dpo/margin': 119.95466613769531, 'margin_dpo/margin_mean': 119.95466613769531, 'margin_dpo/margin_std': 175.58291625976562, 'logps/chosen': -242.7032470703125, 'logps/rejected': -403.2701416015625, 'logps/ref_chosen': -54.168121337890625, 'logps/ref_rejected': -94.78036499023438, 'KL/chosen_KL_mean': -188.53512573242188, 'KL/rejected_KL_mean': -308.48980712890625, 'KL/mean': -248.512451171875, 'KL/std': 152.22921752929688, 'logits/chosen': -0.3559209108352661, 'logits/rejected': -0.3510690927505493, 'epoch': 0.35} + 35%|███▍ | 237/681 [10:03<19:17, 2.61s/it] 35%|███▍ | 238/681 [10:05<19:29, 2.64s/it] {'loss': 1.1065, 'grad_norm': 23.35243034362793, 'learning_rate': 4.126545750510605e-07, 'fcm_dpo/beta': 0.0032602387946099043, 'fcm_dpo/q_t': 0.4138892889022827, 'fcm_dpo/delta': 0.01038980484008789, 'fcm_dpo/margin': 119.50453186035156, 'margin_dpo/margin_mean': 119.50453186035156, 'margin_dpo/margin_std': 172.2950439453125, 'logps/chosen': -219.11459350585938, 'logps/rejected': -374.06396484375, 'logps/ref_chosen': -53.973121643066406, 'logps/ref_rejected': -89.41795349121094, 'KL/chosen_KL_mean': -165.1414794921875, 'KL/rejected_KL_mean': -284.64599609375, 'KL/mean': -224.89373779296875, 'KL/std': 151.98275756835938, 'logits/chosen': -0.329600989818573, 'logits/rejected': -0.3452579975128174, 'epoch': 0.35} + 35%|███▍ | 238/681 [10:06<19:29, 2.64s/it] 35%|███▌ | 239/681 [10:08<18:42, 2.54s/it] {'loss': 1.0714, 'grad_norm': 35.462642669677734, 'learning_rate': 4.116778689174514e-07, 'fcm_dpo/beta': 0.003244359279051423, 'fcm_dpo/q_t': 0.4014323949813843, 'fcm_dpo/delta': -0.02737080305814743, 'fcm_dpo/margin': 131.09304809570312, 'margin_dpo/margin_mean': 131.09304809570312, 'margin_dpo/margin_std': 158.469970703125, 'logps/chosen': -232.735107421875, 'logps/rejected': -399.3232727050781, 'logps/ref_chosen': -58.09782409667969, 'logps/ref_rejected': -93.59294128417969, 'KL/chosen_KL_mean': -174.6372833251953, 'KL/rejected_KL_mean': -305.7303466796875, 'KL/mean': -240.18380737304688, 'KL/std': 140.24794006347656, 'logits/chosen': -0.35082727670669556, 'logits/rejected': -0.33832210302352905, 'epoch': 0.35} + 35%|███▌ | 239/681 [10:08<18:42, 2.54s/it] 35%|███▌ | 240/681 [10:11<19:09, 2.61s/it] {'loss': 1.1409, 'grad_norm': 37.75619125366211, 'learning_rate': 4.106969024216348e-07, 'fcm_dpo/beta': 0.003269023261964321, 'fcm_dpo/q_t': 0.4180099368095398, 'fcm_dpo/delta': 0.041884519159793854, 'fcm_dpo/margin': 109.98141479492188, 'margin_dpo/margin_mean': 109.98140716552734, 'margin_dpo/margin_std': 178.82452392578125, 'logps/chosen': -241.0057373046875, 'logps/rejected': -364.4911804199219, 'logps/ref_chosen': -60.6144905090332, 'logps/ref_rejected': -74.1185302734375, 'KL/chosen_KL_mean': -180.39125061035156, 'KL/rejected_KL_mean': -290.3726501464844, 'KL/mean': -235.38194274902344, 'KL/std': 150.54681396484375, 'logits/chosen': -0.4051019549369812, 'logits/rejected': -0.38454490900039673, 'epoch': 0.35} + 35%|███▌ | 240/681 [10:11<19:09, 2.61s/it] 35%|███▌ | 241/681 [10:13<18:46, 2.56s/it] {'loss': 0.9959, 'grad_norm': 28.70929527282715, 'learning_rate': 4.097117014129903e-07, 'fcm_dpo/beta': 0.0032152351923286915, 'fcm_dpo/q_t': 0.3768247365951538, 'fcm_dpo/delta': -0.1620943695306778, 'fcm_dpo/margin': 172.10183715820312, 'margin_dpo/margin_mean': 172.10183715820312, 'margin_dpo/margin_std': 193.75653076171875, 'logps/chosen': -215.71958923339844, 'logps/rejected': -409.791259765625, 'logps/ref_chosen': -66.091064453125, 'logps/ref_rejected': -88.06088256835938, 'KL/chosen_KL_mean': -149.62850952148438, 'KL/rejected_KL_mean': -321.7303771972656, 'KL/mean': -235.679443359375, 'KL/std': 166.00146484375, 'logits/chosen': -0.4123689532279968, 'logits/rejected': -0.38682758808135986, 'epoch': 0.35} + 35%|███▌ | 241/681 [10:13<18:46, 2.56s/it] 36%|███▌ | 242/681 [10:15<18:28, 2.53s/it] {'loss': 1.097, 'grad_norm': 33.44797134399414, 'learning_rate': 4.087222918524807e-07, 'fcm_dpo/beta': 0.003198289545252919, 'fcm_dpo/q_t': 0.41040879487991333, 'fcm_dpo/delta': 0.0038902349770069122, 'fcm_dpo/margin': 123.77262115478516, 'margin_dpo/margin_mean': 123.77262115478516, 'margin_dpo/margin_std': 168.39976501464844, 'logps/chosen': -240.05770874023438, 'logps/rejected': -379.32672119140625, 'logps/ref_chosen': -67.86392974853516, 'logps/ref_rejected': -83.36033630371094, 'KL/chosen_KL_mean': -172.19378662109375, 'KL/rejected_KL_mean': -295.96636962890625, 'KL/mean': -234.080078125, 'KL/std': 142.62332153320312, 'logits/chosen': -0.3743210732936859, 'logits/rejected': -0.3515356183052063, 'epoch': 0.36} + 36%|███▌ | 242/681 [10:15<18:28, 2.53s/it] 36%|███▌ | 243/681 [10:18<18:26, 2.53s/it] {'loss': 1.0334, 'grad_norm': 23.109296798706055, 'learning_rate': 4.07728699811968e-07, 'fcm_dpo/beta': 0.0031418318394571543, 'fcm_dpo/q_t': 0.3906528353691101, 'fcm_dpo/delta': -0.08077876269817352, 'fcm_dpo/margin': 151.74655151367188, 'margin_dpo/margin_mean': 151.74655151367188, 'margin_dpo/margin_std': 174.57302856445312, 'logps/chosen': -236.45870971679688, 'logps/rejected': -401.4566650390625, 'logps/ref_chosen': -63.0842399597168, 'logps/ref_rejected': -76.33563232421875, 'KL/chosen_KL_mean': -173.37445068359375, 'KL/rejected_KL_mean': -325.12103271484375, 'KL/mean': -249.24774169921875, 'KL/std': 158.65621948242188, 'logits/chosen': -0.3913189172744751, 'logits/rejected': -0.36040928959846497, 'epoch': 0.36} + 36%|███▌ | 243/681 [10:18<18:26, 2.53s/it] 36%|███▌ | 244/681 [10:20<18:21, 2.52s/it] {'loss': 1.0244, 'grad_norm': 40.61009216308594, 'learning_rate': 4.067309514735267e-07, 'fcm_dpo/beta': 0.0030940580181777477, 'fcm_dpo/q_t': 0.3908138573169708, 'fcm_dpo/delta': -0.07428047060966492, 'fcm_dpo/margin': 152.1016387939453, 'margin_dpo/margin_mean': 152.1016387939453, 'margin_dpo/margin_std': 155.42276000976562, 'logps/chosen': -214.38320922851562, 'logps/rejected': -400.236083984375, 'logps/ref_chosen': -61.140689849853516, 'logps/ref_rejected': -94.89193725585938, 'KL/chosen_KL_mean': -153.2425079345703, 'KL/rejected_KL_mean': -305.3441467285156, 'KL/mean': -229.29331970214844, 'KL/std': 151.1732635498047, 'logits/chosen': -0.4778062701225281, 'logits/rejected': -0.4712453782558441, 'epoch': 0.36} + 36%|███▌ | 244/681 [10:21<18:21, 2.52s/it] 36%|███▌ | 245/681 [10:23<18:46, 2.58s/it] {'loss': 1.1128, 'grad_norm': 25.50909423828125, 'learning_rate': 4.057290731287531e-07, 'fcm_dpo/beta': 0.0030736280605196953, 'fcm_dpo/q_t': 0.41335082054138184, 'fcm_dpo/delta': 0.022404037415981293, 'fcm_dpo/margin': 122.78466796875, 'margin_dpo/margin_mean': 122.78466796875, 'margin_dpo/margin_std': 168.45639038085938, 'logps/chosen': -247.1886444091797, 'logps/rejected': -390.35113525390625, 'logps/ref_chosen': -67.26228332519531, 'logps/ref_rejected': -87.64010620117188, 'KL/chosen_KL_mean': -179.92636108398438, 'KL/rejected_KL_mean': -302.7110290527344, 'KL/mean': -241.31871032714844, 'KL/std': 152.4658660888672, 'logits/chosen': -0.4174082279205322, 'logits/rejected': -0.3904969394207001, 'epoch': 0.36} + 36%|███▌ | 245/681 [10:23<18:46, 2.58s/it] 36%|███▌ | 246/681 [10:26<18:42, 2.58s/it] {'loss': 1.1069, 'grad_norm': 26.56377601623535, 'learning_rate': 4.047230911780736e-07, 'fcm_dpo/beta': 0.003099266439676285, 'fcm_dpo/q_t': 0.4126628637313843, 'fcm_dpo/delta': 0.010860616341233253, 'fcm_dpo/margin': 125.66952514648438, 'margin_dpo/margin_mean': 125.66952514648438, 'margin_dpo/margin_std': 182.71670532226562, 'logps/chosen': -245.54122924804688, 'logps/rejected': -388.8601379394531, 'logps/ref_chosen': -66.69696807861328, 'logps/ref_rejected': -84.34634399414062, 'KL/chosen_KL_mean': -178.84425354003906, 'KL/rejected_KL_mean': -304.5137939453125, 'KL/mean': -241.6790313720703, 'KL/std': 169.49676513671875, 'logits/chosen': -0.4388137459754944, 'logits/rejected': -0.40211886167526245, 'epoch': 0.36} + 36%|███▌ | 246/681 [10:26<18:42, 2.58s/it] 36%|███▋ | 247/681 [10:28<18:29, 2.56s/it] {'loss': 0.9995, 'grad_norm': 27.967557907104492, 'learning_rate': 4.0371303213004814e-07, 'fcm_dpo/beta': 0.003036319278180599, 'fcm_dpo/q_t': 0.3771999478340149, 'fcm_dpo/delta': -0.14767590165138245, 'fcm_dpo/margin': 177.672119140625, 'margin_dpo/margin_mean': 177.672119140625, 'margin_dpo/margin_std': 196.26101684570312, 'logps/chosen': -267.948974609375, 'logps/rejected': -495.30902099609375, 'logps/ref_chosen': -56.6053466796875, 'logps/ref_rejected': -106.29326629638672, 'KL/chosen_KL_mean': -211.3436279296875, 'KL/rejected_KL_mean': -389.0157470703125, 'KL/mean': -300.1796875, 'KL/std': 181.46612548828125, 'logits/chosen': -0.3680022656917572, 'logits/rejected': -0.36837178468704224, 'epoch': 0.36} + 36%|███▋ | 247/681 [10:28<18:29, 2.56s/it] 36%|███▋ | 248/681 [10:31<18:26, 2.55s/it] {'loss': 1.0241, 'grad_norm': 25.113601684570312, 'learning_rate': 4.0269892260067197e-07, 'fcm_dpo/beta': 0.002979197073727846, 'fcm_dpo/q_t': 0.39190369844436646, 'fcm_dpo/delta': -0.05982068181037903, 'fcm_dpo/margin': 153.2515411376953, 'margin_dpo/margin_mean': 153.25155639648438, 'margin_dpo/margin_std': 141.49179077148438, 'logps/chosen': -227.80104064941406, 'logps/rejected': -428.86627197265625, 'logps/ref_chosen': -44.043216705322266, 'logps/ref_rejected': -91.85687255859375, 'KL/chosen_KL_mean': -183.7578125, 'KL/rejected_KL_mean': -337.0093994140625, 'KL/mean': -260.38360595703125, 'KL/std': 146.3836669921875, 'logits/chosen': -0.33221954107284546, 'logits/rejected': -0.3516564965248108, 'epoch': 0.36} + 36%|███▋ | 248/681 [10:31<18:26, 2.55s/it] 37%|███▋ | 249/681 [10:33<18:00, 2.50s/it] {'loss': 1.2226, 'grad_norm': 34.834327697753906, 'learning_rate': 4.0168078931267426e-07, 'fcm_dpo/beta': 0.003046369180083275, 'fcm_dpo/q_t': 0.44190624356269836, 'fcm_dpo/delta': 0.1469813883304596, 'fcm_dpo/margin': 84.31859588623047, 'margin_dpo/margin_mean': 84.31858825683594, 'margin_dpo/margin_std': 186.0101318359375, 'logps/chosen': -297.4047546386719, 'logps/rejected': -399.7490539550781, 'logps/ref_chosen': -62.442352294921875, 'logps/ref_rejected': -80.46806335449219, 'KL/chosen_KL_mean': -234.96240234375, 'KL/rejected_KL_mean': -319.281005859375, 'KL/mean': -277.1217041015625, 'KL/std': 158.48863220214844, 'logits/chosen': -0.3620932698249817, 'logits/rejected': -0.33795762062072754, 'epoch': 0.37} + 37%|███▋ | 249/681 [10:33<18:00, 2.50s/it] 37%|███▋ | 250/681 [10:36<17:41, 2.46s/it] {'loss': 1.0237, 'grad_norm': 35.6130485534668, 'learning_rate': 4.006586590948141e-07, 'fcm_dpo/beta': 0.003046911209821701, 'fcm_dpo/q_t': 0.38924139738082886, 'fcm_dpo/delta': -0.08060043305158615, 'fcm_dpo/margin': 156.46585083007812, 'margin_dpo/margin_mean': 156.4658660888672, 'margin_dpo/margin_std': 162.3227996826172, 'logps/chosen': -272.461669921875, 'logps/rejected': -437.16265869140625, 'logps/ref_chosen': -65.63668823242188, 'logps/ref_rejected': -73.87184143066406, 'KL/chosen_KL_mean': -206.82498168945312, 'KL/rejected_KL_mean': -363.29083251953125, 'KL/mean': -285.0578918457031, 'KL/std': 159.93698120117188, 'logits/chosen': -0.33317434787750244, 'logits/rejected': -0.2766192555427551, 'epoch': 0.37} + 37%|███▋ | 250/681 [10:36<17:41, 2.46s/it] 37%|███▋ | 251/681 [10:38<17:34, 2.45s/it] {'loss': 1.1649, 'grad_norm': 28.103612899780273, 'learning_rate': 3.9963255888117325e-07, 'fcm_dpo/beta': 0.0030482178553938866, 'fcm_dpo/q_t': 0.42633721232414246, 'fcm_dpo/delta': 0.07727696746587753, 'fcm_dpo/margin': 106.71568298339844, 'margin_dpo/margin_mean': 106.71568298339844, 'margin_dpo/margin_std': 182.81895446777344, 'logps/chosen': -275.2982177734375, 'logps/rejected': -402.4945983886719, 'logps/ref_chosen': -57.182716369628906, 'logps/ref_rejected': -77.66343688964844, 'KL/chosen_KL_mean': -218.11549377441406, 'KL/rejected_KL_mean': -324.8311767578125, 'KL/mean': -271.47332763671875, 'KL/std': 169.133056640625, 'logits/chosen': -0.32768577337265015, 'logits/rejected': -0.30002111196517944, 'epoch': 0.37} + 37%|███▋ | 251/681 [10:38<17:34, 2.45s/it] 37%|███▋ | 252/681 [10:41<17:51, 2.50s/it] {'loss': 1.0441, 'grad_norm': 24.940649032592773, 'learning_rate': 3.9860251571044666e-07, 'fcm_dpo/beta': 0.0030416897498071194, 'fcm_dpo/q_t': 0.394910991191864, 'fcm_dpo/delta': -0.04213904216885567, 'fcm_dpo/margin': 144.67434692382812, 'margin_dpo/margin_mean': 144.67434692382812, 'margin_dpo/margin_std': 149.41883850097656, 'logps/chosen': -287.28411865234375, 'logps/rejected': -445.03082275390625, 'logps/ref_chosen': -71.68563842773438, 'logps/ref_rejected': -84.75799560546875, 'KL/chosen_KL_mean': -215.59848022460938, 'KL/rejected_KL_mean': -360.2728271484375, 'KL/mean': -287.9356689453125, 'KL/std': 146.37741088867188, 'logits/chosen': -0.4142671227455139, 'logits/rejected': -0.37881606817245483, 'epoch': 0.37} + 37%|███▋ | 252/681 [10:41<17:51, 2.50s/it] 37%|███▋ | 253/681 [10:43<18:24, 2.58s/it] {'loss': 1.0705, 'grad_norm': 19.748661041259766, 'learning_rate': 3.9756855672522986e-07, 'fcm_dpo/beta': 0.003045113291591406, 'fcm_dpo/q_t': 0.39946746826171875, 'fcm_dpo/delta': -0.03548625111579895, 'fcm_dpo/margin': 142.34326171875, 'margin_dpo/margin_mean': 142.34324645996094, 'margin_dpo/margin_std': 176.71458435058594, 'logps/chosen': -255.62966918945312, 'logps/rejected': -427.54150390625, 'logps/ref_chosen': -69.1339340209961, 'logps/ref_rejected': -98.70252990722656, 'KL/chosen_KL_mean': -186.49574279785156, 'KL/rejected_KL_mean': -328.8389892578125, 'KL/mean': -257.6673583984375, 'KL/std': 157.70240783691406, 'logits/chosen': -0.39257919788360596, 'logits/rejected': -0.3857148289680481, 'epoch': 0.37} + 37%|███▋ | 253/681 [10:43<18:24, 2.58s/it] 37%|███▋ | 254/681 [10:46<18:29, 2.60s/it] {'loss': 1.1421, 'grad_norm': 25.14466094970703, 'learning_rate': 3.965307091713037e-07, 'fcm_dpo/beta': 0.0030348035506904125, 'fcm_dpo/q_t': 0.4199643135070801, 'fcm_dpo/delta': 0.03659197315573692, 'fcm_dpo/margin': 120.19171142578125, 'margin_dpo/margin_mean': 120.19171142578125, 'margin_dpo/margin_std': 206.28839111328125, 'logps/chosen': -233.63986206054688, 'logps/rejected': -389.9842224121094, 'logps/ref_chosen': -54.154998779296875, 'logps/ref_rejected': -90.30764770507812, 'KL/chosen_KL_mean': -179.48486328125, 'KL/rejected_KL_mean': -299.67657470703125, 'KL/mean': -239.58071899414062, 'KL/std': 164.30686950683594, 'logits/chosen': -0.3829053044319153, 'logits/rejected': -0.36821985244750977, 'epoch': 0.37} + 37%|███▋ | 254/681 [10:46<18:29, 2.60s/it] 37%|███▋ | 255/681 [10:48<18:01, 2.54s/it] {'loss': 1.1067, 'grad_norm': 20.29219627380371, 'learning_rate': 3.954890003969163e-07, 'fcm_dpo/beta': 0.0030361046083271503, 'fcm_dpo/q_t': 0.4100106954574585, 'fcm_dpo/delta': 0.009780865162611008, 'fcm_dpo/margin': 128.564697265625, 'margin_dpo/margin_mean': 128.564697265625, 'margin_dpo/margin_std': 180.92298889160156, 'logps/chosen': -240.14614868164062, 'logps/rejected': -401.77777099609375, 'logps/ref_chosen': -57.14167022705078, 'logps/ref_rejected': -90.2085952758789, 'KL/chosen_KL_mean': -183.00448608398438, 'KL/rejected_KL_mean': -311.56915283203125, 'KL/mean': -247.2868194580078, 'KL/std': 143.20118713378906, 'logits/chosen': -0.34874504804611206, 'logits/rejected': -0.338106632232666, 'epoch': 0.37} + 37%|███▋ | 255/681 [10:48<18:01, 2.54s/it] 38%|███▊ | 256/681 [10:51<17:59, 2.54s/it] {'loss': 1.0781, 'grad_norm': 26.907196044921875, 'learning_rate': 3.944434578520628e-07, 'fcm_dpo/beta': 0.003040488576516509, 'fcm_dpo/q_t': 0.40354132652282715, 'fcm_dpo/delta': -0.02052391692996025, 'fcm_dpo/margin': 138.02523803710938, 'margin_dpo/margin_mean': 138.02523803710938, 'margin_dpo/margin_std': 178.26329040527344, 'logps/chosen': -213.6597442626953, 'logps/rejected': -389.08441162109375, 'logps/ref_chosen': -55.163490295410156, 'logps/ref_rejected': -92.56291961669922, 'KL/chosen_KL_mean': -158.49624633789062, 'KL/rejected_KL_mean': -296.521484375, 'KL/mean': -227.50888061523438, 'KL/std': 154.35052490234375, 'logits/chosen': -0.31947365403175354, 'logits/rejected': -0.3274417519569397, 'epoch': 0.38} + 38%|███▊ | 256/681 [10:51<17:59, 2.54s/it] 38%|███▊ | 257/681 [10:54<18:10, 2.57s/it] {'loss': 1.0685, 'grad_norm': 20.602216720581055, 'learning_rate': 3.933941090877615e-07, 'fcm_dpo/beta': 0.0029973145574331284, 'fcm_dpo/q_t': 0.4001276195049286, 'fcm_dpo/delta': -0.042888298630714417, 'fcm_dpo/margin': 146.73841857910156, 'margin_dpo/margin_mean': 146.73841857910156, 'margin_dpo/margin_std': 182.62957763671875, 'logps/chosen': -204.51800537109375, 'logps/rejected': -381.37060546875, 'logps/ref_chosen': -49.42369842529297, 'logps/ref_rejected': -79.53791809082031, 'KL/chosen_KL_mean': -155.09429931640625, 'KL/rejected_KL_mean': -301.83270263671875, 'KL/mean': -228.4635009765625, 'KL/std': 161.17501831054688, 'logits/chosen': -0.35195407271385193, 'logits/rejected': -0.338517963886261, 'epoch': 0.38} + 38%|███▊ | 257/681 [10:54<18:10, 2.57s/it] 38%|███▊ | 258/681 [10:56<17:30, 2.48s/it] {'loss': 1.0867, 'grad_norm': 29.499923706054688, 'learning_rate': 3.923409817553284e-07, 'fcm_dpo/beta': 0.0030030158814042807, 'fcm_dpo/q_t': 0.40110859274864197, 'fcm_dpo/delta': -0.029582539573311806, 'fcm_dpo/margin': 142.62550354003906, 'margin_dpo/margin_mean': 142.62550354003906, 'margin_dpo/margin_std': 199.8113250732422, 'logps/chosen': -264.27740478515625, 'logps/rejected': -443.5089111328125, 'logps/ref_chosen': -59.384124755859375, 'logps/ref_rejected': -95.99010467529297, 'KL/chosen_KL_mean': -204.89328002929688, 'KL/rejected_KL_mean': -347.518798828125, 'KL/mean': -276.2060546875, 'KL/std': 168.318115234375, 'logits/chosen': -0.3328630030155182, 'logits/rejected': -0.33087849617004395, 'epoch': 0.38} + 38%|███▊ | 258/681 [10:56<17:30, 2.48s/it] 38%|███▊ | 259/681 [10:58<17:31, 2.49s/it] {'loss': 1.1286, 'grad_norm': 23.914457321166992, 'learning_rate': 3.9128410360564793e-07, 'fcm_dpo/beta': 0.0030021152924746275, 'fcm_dpo/q_t': 0.41781848669052124, 'fcm_dpo/delta': 0.04111909121274948, 'fcm_dpo/margin': 120.01093292236328, 'margin_dpo/margin_mean': 120.01094055175781, 'margin_dpo/margin_std': 181.0947723388672, 'logps/chosen': -251.35284423828125, 'logps/rejected': -407.72711181640625, 'logps/ref_chosen': -52.828346252441406, 'logps/ref_rejected': -89.191650390625, 'KL/chosen_KL_mean': -198.52450561523438, 'KL/rejected_KL_mean': -318.53546142578125, 'KL/mean': -258.52996826171875, 'KL/std': 160.81214904785156, 'logits/chosen': -0.4086461663246155, 'logits/rejected': -0.4056541323661804, 'epoch': 0.38} + 38%|███▊ | 259/681 [10:58<17:31, 2.49s/it] 38%|███▊ | 260/681 [11:01<17:51, 2.55s/it] {'loss': 1.0208, 'grad_norm': 31.707292556762695, 'learning_rate': 3.9022350248844246e-07, 'fcm_dpo/beta': 0.0029884944669902325, 'fcm_dpo/q_t': 0.3894280791282654, 'fcm_dpo/delta': -0.09151086211204529, 'fcm_dpo/margin': 162.99392700195312, 'margin_dpo/margin_mean': 162.99392700195312, 'margin_dpo/margin_std': 178.20040893554688, 'logps/chosen': -250.7026824951172, 'logps/rejected': -461.36871337890625, 'logps/ref_chosen': -47.41767501831055, 'logps/ref_rejected': -95.08978271484375, 'KL/chosen_KL_mean': -203.28500366210938, 'KL/rejected_KL_mean': -366.2789306640625, 'KL/mean': -284.781982421875, 'KL/std': 167.090087890625, 'logits/chosen': -0.3748651146888733, 'logits/rejected': -0.391143798828125, 'epoch': 0.38} + 38%|███▊ | 260/681 [11:01<17:51, 2.55s/it] 38%|███▊ | 261/681 [11:03<17:07, 2.45s/it] {'loss': 1.0713, 'grad_norm': 21.78121566772461, 'learning_rate': 3.891592063515376e-07, 'fcm_dpo/beta': 0.002936106640845537, 'fcm_dpo/q_t': 0.39958545565605164, 'fcm_dpo/delta': -0.04489829018712044, 'fcm_dpo/margin': 150.76947021484375, 'margin_dpo/margin_mean': 150.76947021484375, 'margin_dpo/margin_std': 200.406005859375, 'logps/chosen': -265.68743896484375, 'logps/rejected': -451.9404602050781, 'logps/ref_chosen': -53.03137969970703, 'logps/ref_rejected': -88.51494598388672, 'KL/chosen_KL_mean': -212.65603637695312, 'KL/rejected_KL_mean': -363.4255065917969, 'KL/mean': -288.040771484375, 'KL/std': 181.0100555419922, 'logits/chosen': -0.3127421438694, 'logits/rejected': -0.31155508756637573, 'epoch': 0.38} + 38%|███▊ | 261/681 [11:03<17:07, 2.45s/it] 38%|███▊ | 262/681 [11:06<16:55, 2.42s/it] {'loss': 1.1117, 'grad_norm': 28.57038688659668, 'learning_rate': 3.880912432401264e-07, 'fcm_dpo/beta': 0.0029631485231220722, 'fcm_dpo/q_t': 0.4153136610984802, 'fcm_dpo/delta': 0.0350569412112236, 'fcm_dpo/margin': 123.46089172363281, 'margin_dpo/margin_mean': 123.46089172363281, 'margin_dpo/margin_std': 167.72061157226562, 'logps/chosen': -311.89776611328125, 'logps/rejected': -462.15704345703125, 'logps/ref_chosen': -59.620140075683594, 'logps/ref_rejected': -86.41853332519531, 'KL/chosen_KL_mean': -252.27764892578125, 'KL/rejected_KL_mean': -375.738525390625, 'KL/mean': -314.0080871582031, 'KL/std': 165.99118041992188, 'logits/chosen': -0.3218010663986206, 'logits/rejected': -0.2904987037181854, 'epoch': 0.38} + 38%|███▊ | 262/681 [11:06<16:55, 2.42s/it] 39%|███▊ | 263/681 [11:08<16:52, 2.42s/it] {'loss': 1.0194, 'grad_norm': 26.555845260620117, 'learning_rate': 3.870196412960302e-07, 'fcm_dpo/beta': 0.0028930227272212505, 'fcm_dpo/q_t': 0.3831733465194702, 'fcm_dpo/delta': -0.11854880303144455, 'fcm_dpo/margin': 176.84181213378906, 'margin_dpo/margin_mean': 176.84181213378906, 'margin_dpo/margin_std': 203.54107666015625, 'logps/chosen': -286.98931884765625, 'logps/rejected': -501.26739501953125, 'logps/ref_chosen': -59.42094421386719, 'logps/ref_rejected': -96.85720825195312, 'KL/chosen_KL_mean': -227.56837463378906, 'KL/rejected_KL_mean': -404.4101867675781, 'KL/mean': -315.9892883300781, 'KL/std': 192.87933349609375, 'logits/chosen': -0.3501706123352051, 'logits/rejected': -0.3262799084186554, 'epoch': 0.39} + 39%|███▊ | 263/681 [11:08<16:52, 2.42s/it] 39%|███▉ | 264/681 [11:11<17:27, 2.51s/it] {'loss': 1.0785, 'grad_norm': 29.80936622619629, 'learning_rate': 3.8594442875695665e-07, 'fcm_dpo/beta': 0.0028530117124319077, 'fcm_dpo/q_t': 0.40188103914260864, 'fcm_dpo/delta': -0.034962985664606094, 'fcm_dpo/margin': 151.5025634765625, 'margin_dpo/margin_mean': 151.50254821777344, 'margin_dpo/margin_std': 198.95477294921875, 'logps/chosen': -297.45343017578125, 'logps/rejected': -480.09014892578125, 'logps/ref_chosen': -62.722084045410156, 'logps/ref_rejected': -93.85620880126953, 'KL/chosen_KL_mean': -234.73135375976562, 'KL/rejected_KL_mean': -386.2339172363281, 'KL/mean': -310.48260498046875, 'KL/std': 176.64036560058594, 'logits/chosen': -0.38784724473953247, 'logits/rejected': -0.377646803855896, 'epoch': 0.39} + 39%|███▉ | 264/681 [11:11<17:27, 2.51s/it] 39%|███▉ | 265/681 [11:13<17:39, 2.55s/it] {'loss': 1.1186, 'grad_norm': 29.3031005859375, 'learning_rate': 3.848656339557562e-07, 'fcm_dpo/beta': 0.0028611307498067617, 'fcm_dpo/q_t': 0.4080343246459961, 'fcm_dpo/delta': -0.014629107899963856, 'fcm_dpo/margin': 144.70462036132812, 'margin_dpo/margin_mean': 144.70462036132812, 'margin_dpo/margin_std': 237.73236083984375, 'logps/chosen': -317.12762451171875, 'logps/rejected': -487.88134765625, 'logps/ref_chosen': -61.971466064453125, 'logps/ref_rejected': -88.02059936523438, 'KL/chosen_KL_mean': -255.1561279296875, 'KL/rejected_KL_mean': -399.8607482910156, 'KL/mean': -327.5084533691406, 'KL/std': 199.5748291015625, 'logits/chosen': -0.3426782488822937, 'logits/rejected': -0.32741084694862366, 'epoch': 0.39} + 39%|███▉ | 265/681 [11:13<17:39, 2.55s/it] 39%|███▉ | 266/681 [11:16<17:28, 2.53s/it] {'loss': 1.146, 'grad_norm': 43.39963912963867, 'learning_rate': 3.8378328531967507e-07, 'fcm_dpo/beta': 0.002886436879634857, 'fcm_dpo/q_t': 0.42259740829467773, 'fcm_dpo/delta': 0.061003364622592926, 'fcm_dpo/margin': 118.16098022460938, 'margin_dpo/margin_mean': 118.16098022460938, 'margin_dpo/margin_std': 192.56309509277344, 'logps/chosen': -319.6777648925781, 'logps/rejected': -438.7103271484375, 'logps/ref_chosen': -67.09967041015625, 'logps/ref_rejected': -67.97122192382812, 'KL/chosen_KL_mean': -252.57809448242188, 'KL/rejected_KL_mean': -370.73907470703125, 'KL/mean': -311.6585693359375, 'KL/std': 166.17433166503906, 'logits/chosen': -0.37320268154144287, 'logits/rejected': -0.3327832818031311, 'epoch': 0.39} + 39%|███▉ | 266/681 [11:16<17:28, 2.53s/it] 39%|███▉ | 267/681 [11:18<17:29, 2.54s/it] {'loss': 1.0906, 'grad_norm': 31.87999153137207, 'learning_rate': 3.8269741136960646e-07, 'fcm_dpo/beta': 0.002876041457056999, 'fcm_dpo/q_t': 0.40417009592056274, 'fcm_dpo/delta': -0.01955413445830345, 'fcm_dpo/margin': 145.48333740234375, 'margin_dpo/margin_mean': 145.48333740234375, 'margin_dpo/margin_std': 203.5807342529297, 'logps/chosen': -295.96441650390625, 'logps/rejected': -462.64544677734375, 'logps/ref_chosen': -68.97075653076172, 'logps/ref_rejected': -90.16844940185547, 'KL/chosen_KL_mean': -226.99365234375, 'KL/rejected_KL_mean': -372.47698974609375, 'KL/mean': -299.7353515625, 'KL/std': 180.788818359375, 'logits/chosen': -0.39031726121902466, 'logits/rejected': -0.3596029281616211, 'epoch': 0.39} + 39%|███▉ | 267/681 [11:19<17:29, 2.54s/it] 39%|███▉ | 268/681 [11:21<17:28, 2.54s/it] {'loss': 1.1069, 'grad_norm': 29.13970184326172, 'learning_rate': 3.8160804071933894e-07, 'fcm_dpo/beta': 0.0028773611411452293, 'fcm_dpo/q_t': 0.4103718400001526, 'fcm_dpo/delta': 0.004575518891215324, 'fcm_dpo/margin': 137.43919372558594, 'margin_dpo/margin_mean': 137.439208984375, 'margin_dpo/margin_std': 202.26385498046875, 'logps/chosen': -288.2640686035156, 'logps/rejected': -471.4505920410156, 'logps/ref_chosen': -55.90031051635742, 'logps/ref_rejected': -101.64763641357422, 'KL/chosen_KL_mean': -232.36375427246094, 'KL/rejected_KL_mean': -369.802978515625, 'KL/mean': -301.0833740234375, 'KL/std': 167.608154296875, 'logits/chosen': -0.3367459774017334, 'logits/rejected': -0.34343862533569336, 'epoch': 0.39} + 39%|███▉ | 268/681 [11:21<17:28, 2.54s/it] 40%|███▉ | 269/681 [11:23<17:14, 2.51s/it] {'loss': 1.0466, 'grad_norm': 24.988513946533203, 'learning_rate': 3.8051520207480204e-07, 'fcm_dpo/beta': 0.002847407478839159, 'fcm_dpo/q_t': 0.38866060972213745, 'fcm_dpo/delta': -0.09410010278224945, 'fcm_dpo/margin': 171.92185974121094, 'margin_dpo/margin_mean': 171.92185974121094, 'margin_dpo/margin_std': 220.83987426757812, 'logps/chosen': -317.26605224609375, 'logps/rejected': -526.4976806640625, 'logps/ref_chosen': -70.03955841064453, 'logps/ref_rejected': -107.34937286376953, 'KL/chosen_KL_mean': -247.22647094726562, 'KL/rejected_KL_mean': -419.1483154296875, 'KL/mean': -333.1874084472656, 'KL/std': 174.40249633789062, 'logits/chosen': -0.3828558027744293, 'logits/rejected': -0.363941490650177, 'epoch': 0.4} + 40%|███▉ | 269/681 [11:24<17:14, 2.51s/it] 40%|███▉ | 270/681 [11:26<17:31, 2.56s/it] {'loss': 1.1275, 'grad_norm': 27.773122787475586, 'learning_rate': 3.794189242333106e-07, 'fcm_dpo/beta': 0.002855871804058552, 'fcm_dpo/q_t': 0.41616952419281006, 'fcm_dpo/delta': 0.035725079476833344, 'fcm_dpo/margin': 127.91445922851562, 'margin_dpo/margin_mean': 127.91445922851562, 'margin_dpo/margin_std': 195.2829132080078, 'logps/chosen': -283.6026916503906, 'logps/rejected': -451.9123229980469, 'logps/ref_chosen': -69.53347778320312, 'logps/ref_rejected': -109.92864990234375, 'KL/chosen_KL_mean': -214.0692138671875, 'KL/rejected_KL_mean': -341.98370361328125, 'KL/mean': -278.02642822265625, 'KL/std': 153.86318969726562, 'logits/chosen': -0.44005024433135986, 'logits/rejected': -0.43610844016075134, 'epoch': 0.4} + 40%|███▉ | 270/681 [11:26<17:31, 2.56s/it] 40%|███▉ | 271/681 [11:29<17:08, 2.51s/it] {'loss': 1.0517, 'grad_norm': 24.570371627807617, 'learning_rate': 3.7831923608280514e-07, 'fcm_dpo/beta': 0.002827045973390341, 'fcm_dpo/q_t': 0.3988415598869324, 'fcm_dpo/delta': -0.04044891148805618, 'fcm_dpo/margin': 155.06826782226562, 'margin_dpo/margin_mean': 155.06826782226562, 'margin_dpo/margin_std': 173.97885131835938, 'logps/chosen': -255.21405029296875, 'logps/rejected': -446.03155517578125, 'logps/ref_chosen': -56.76456832885742, 'logps/ref_rejected': -92.51383972167969, 'KL/chosen_KL_mean': -198.44947814941406, 'KL/rejected_KL_mean': -353.5177001953125, 'KL/mean': -275.9836120605469, 'KL/std': 153.60189819335938, 'logits/chosen': -0.36762213706970215, 'logits/rejected': -0.35115593671798706, 'epoch': 0.4} + 40%|███▉ | 271/681 [11:29<17:08, 2.51s/it] 40%|███▉ | 272/681 [11:31<17:32, 2.57s/it] {'loss': 0.9804, 'grad_norm': 35.40150451660156, 'learning_rate': 3.772161666010912e-07, 'fcm_dpo/beta': 0.002780818846076727, 'fcm_dpo/q_t': 0.3746742010116577, 'fcm_dpo/delta': -0.1463950276374817, 'fcm_dpo/margin': 193.7360076904297, 'margin_dpo/margin_mean': 193.7360076904297, 'margin_dpo/margin_std': 182.89303588867188, 'logps/chosen': -237.64764404296875, 'logps/rejected': -487.4293212890625, 'logps/ref_chosen': -49.497154235839844, 'logps/ref_rejected': -105.54279327392578, 'KL/chosen_KL_mean': -188.15049743652344, 'KL/rejected_KL_mean': -381.8865051269531, 'KL/mean': -285.01849365234375, 'KL/std': 175.69546508789062, 'logits/chosen': -0.3044808804988861, 'logits/rejected': -0.3166738450527191, 'epoch': 0.4} + 40%|███▉ | 272/681 [11:31<17:32, 2.57s/it] 40%|████ | 273/681 [11:34<17:07, 2.52s/it] {'loss': 1.0069, 'grad_norm': 26.675121307373047, 'learning_rate': 3.761097448550755e-07, 'fcm_dpo/beta': 0.002692791633307934, 'fcm_dpo/q_t': 0.3825136423110962, 'fcm_dpo/delta': -0.11455152183771133, 'fcm_dpo/margin': 188.71389770507812, 'margin_dpo/margin_mean': 188.71389770507812, 'margin_dpo/margin_std': 197.89047241210938, 'logps/chosen': -282.19036865234375, 'logps/rejected': -500.427490234375, 'logps/ref_chosen': -62.97539520263672, 'logps/ref_rejected': -92.49858093261719, 'KL/chosen_KL_mean': -219.2149658203125, 'KL/rejected_KL_mean': -407.92889404296875, 'KL/mean': -313.57196044921875, 'KL/std': 178.0384063720703, 'logits/chosen': -0.32241398096084595, 'logits/rejected': -0.30522340536117554, 'epoch': 0.4} + 40%|████ | 273/681 [11:34<17:07, 2.52s/it] 40%|████ | 274/681 [11:36<16:47, 2.48s/it] {'loss': 1.0939, 'grad_norm': 25.53626823425293, 'learning_rate': 3.75e-07, 'fcm_dpo/beta': 0.0026972047053277493, 'fcm_dpo/q_t': 0.4096784293651581, 'fcm_dpo/delta': 0.00932791456580162, 'fcm_dpo/margin': 144.9096221923828, 'margin_dpo/margin_mean': 144.9096221923828, 'margin_dpo/margin_std': 187.6047821044922, 'logps/chosen': -320.48095703125, 'logps/rejected': -487.055908203125, 'logps/ref_chosen': -55.66770935058594, 'logps/ref_rejected': -77.33308410644531, 'KL/chosen_KL_mean': -264.813232421875, 'KL/rejected_KL_mean': -409.72283935546875, 'KL/mean': -337.2680358886719, 'KL/std': 165.03564453125, 'logits/chosen': -0.30063068866729736, 'logits/rejected': -0.2833949625492096, 'epoch': 0.4} + 40%|████ | 274/681 [11:36<16:47, 2.48s/it] 40%|████ | 275/681 [11:39<17:09, 2.53s/it] {'loss': 1.0768, 'grad_norm': 24.385211944580078, 'learning_rate': 3.738869612786737e-07, 'fcm_dpo/beta': 0.0026927865110337734, 'fcm_dpo/q_t': 0.4042346179485321, 'fcm_dpo/delta': -0.0143581572920084, 'fcm_dpo/margin': 153.6110076904297, 'margin_dpo/margin_mean': 153.61099243164062, 'margin_dpo/margin_std': 189.52232360839844, 'logps/chosen': -255.0521240234375, 'logps/rejected': -453.3720703125, 'logps/ref_chosen': -48.594703674316406, 'logps/ref_rejected': -93.30369567871094, 'KL/chosen_KL_mean': -206.4573974609375, 'KL/rejected_KL_mean': -360.0683898925781, 'KL/mean': -283.26287841796875, 'KL/std': 167.62567138671875, 'logits/chosen': -0.32019296288490295, 'logits/rejected': -0.32504212856292725, 'epoch': 0.4} + 40%|████ | 275/681 [11:39<17:09, 2.53s/it] 41%|████ | 276/681 [11:41<17:00, 2.52s/it] {'loss': 1.0868, 'grad_norm': 26.473548889160156, 'learning_rate': 3.7277065802070204e-07, 'fcm_dpo/beta': 0.002671858761459589, 'fcm_dpo/q_t': 0.40615737438201904, 'fcm_dpo/delta': -0.011981412768363953, 'fcm_dpo/margin': 153.95571899414062, 'margin_dpo/margin_mean': 153.9557342529297, 'margin_dpo/margin_std': 205.688232421875, 'logps/chosen': -281.83819580078125, 'logps/rejected': -449.58221435546875, 'logps/ref_chosen': -56.57740783691406, 'logps/ref_rejected': -70.36566925048828, 'KL/chosen_KL_mean': -225.26080322265625, 'KL/rejected_KL_mean': -379.216552734375, 'KL/mean': -302.2386779785156, 'KL/std': 172.80694580078125, 'logits/chosen': -0.3291136622428894, 'logits/rejected': -0.30698275566101074, 'epoch': 0.41} + 41%|████ | 276/681 [11:41<17:00, 2.52s/it] 41%|████ | 277/681 [11:44<16:39, 2.47s/it] {'loss': 1.0794, 'grad_norm': 30.444353103637695, 'learning_rate': 3.71651119641714e-07, 'fcm_dpo/beta': 0.002672237576916814, 'fcm_dpo/q_t': 0.4026916027069092, 'fcm_dpo/delta': -0.02400265261530876, 'fcm_dpo/margin': 158.27906799316406, 'margin_dpo/margin_mean': 158.27908325195312, 'margin_dpo/margin_std': 206.87417602539062, 'logps/chosen': -304.4332275390625, 'logps/rejected': -499.322021484375, 'logps/ref_chosen': -56.27156066894531, 'logps/ref_rejected': -92.88127136230469, 'KL/chosen_KL_mean': -248.16168212890625, 'KL/rejected_KL_mean': -406.44073486328125, 'KL/mean': -327.30120849609375, 'KL/std': 174.60836791992188, 'logits/chosen': -0.32184985280036926, 'logits/rejected': -0.30650681257247925, 'epoch': 0.41} + 41%|████ | 277/681 [11:44<16:39, 2.47s/it] 41%|████ | 278/681 [11:46<17:05, 2.54s/it] {'loss': 1.0289, 'grad_norm': 26.94320297241211, 'learning_rate': 3.705283756425872e-07, 'fcm_dpo/beta': 0.0026234271936118603, 'fcm_dpo/q_t': 0.389728844165802, 'fcm_dpo/delta': -0.08697425574064255, 'fcm_dpo/margin': 183.9108123779297, 'margin_dpo/margin_mean': 183.9108123779297, 'margin_dpo/margin_std': 208.0062255859375, 'logps/chosen': -275.33709716796875, 'logps/rejected': -497.5595397949219, 'logps/ref_chosen': -52.94194030761719, 'logps/ref_rejected': -91.25357818603516, 'KL/chosen_KL_mean': -222.39517211914062, 'KL/rejected_KL_mean': -406.30596923828125, 'KL/mean': -314.3505859375, 'KL/std': 186.62579345703125, 'logits/chosen': -0.34066635370254517, 'logits/rejected': -0.3471217155456543, 'epoch': 0.41} + 41%|████ | 278/681 [11:46<17:05, 2.54s/it] 41%|████ | 279/681 [11:49<17:00, 2.54s/it] {'loss': 1.061, 'grad_norm': 29.17497444152832, 'learning_rate': 3.6940245560867e-07, 'fcm_dpo/beta': 0.0025754275266081095, 'fcm_dpo/q_t': 0.39524978399276733, 'fcm_dpo/delta': -0.06957367807626724, 'fcm_dpo/margin': 180.64385986328125, 'margin_dpo/margin_mean': 180.6438751220703, 'margin_dpo/margin_std': 237.0701141357422, 'logps/chosen': -303.2684326171875, 'logps/rejected': -523.1224365234375, 'logps/ref_chosen': -48.641319274902344, 'logps/ref_rejected': -87.8514404296875, 'KL/chosen_KL_mean': -254.62709045410156, 'KL/rejected_KL_mean': -435.2709655761719, 'KL/mean': -344.94903564453125, 'KL/std': 193.30276489257812, 'logits/chosen': -0.26812469959259033, 'logits/rejected': -0.26797914505004883, 'epoch': 0.41} + 41%|████ | 279/681 [11:49<17:00, 2.54s/it] 41%|████ | 280/681 [11:51<17:10, 2.57s/it] {'loss': 1.0245, 'grad_norm': 33.25874710083008, 'learning_rate': 3.6827338920900253e-07, 'fcm_dpo/beta': 0.002550060860812664, 'fcm_dpo/q_t': 0.3887876272201538, 'fcm_dpo/delta': -0.07875210046768188, 'fcm_dpo/margin': 186.25331115722656, 'margin_dpo/margin_mean': 186.25331115722656, 'margin_dpo/margin_std': 194.61471557617188, 'logps/chosen': -308.4758605957031, 'logps/rejected': -534.5509033203125, 'logps/ref_chosen': -58.797122955322266, 'logps/ref_rejected': -98.61885070800781, 'KL/chosen_KL_mean': -249.67874145507812, 'KL/rejected_KL_mean': -435.93206787109375, 'KL/mean': -342.8053894042969, 'KL/std': 178.5480499267578, 'logits/chosen': -0.29288673400878906, 'logits/rejected': -0.29508256912231445, 'epoch': 0.41} + 41%|████ | 280/681 [11:51<17:10, 2.57s/it] 41%|████▏ | 281/681 [11:54<17:07, 2.57s/it] {'loss': 1.0583, 'grad_norm': 21.142444610595703, 'learning_rate': 3.6714120619553435e-07, 'fcm_dpo/beta': 0.0025381785817444324, 'fcm_dpo/q_t': 0.3983193635940552, 'fcm_dpo/delta': -0.031121131032705307, 'fcm_dpo/margin': 169.27987670898438, 'margin_dpo/margin_mean': 169.27987670898438, 'margin_dpo/margin_std': 189.16592407226562, 'logps/chosen': -280.2163391113281, 'logps/rejected': -474.8902587890625, 'logps/ref_chosen': -55.488521575927734, 'logps/ref_rejected': -80.88258361816406, 'KL/chosen_KL_mean': -224.7278289794922, 'KL/rejected_KL_mean': -394.0076904296875, 'KL/mean': -309.36773681640625, 'KL/std': 175.15994262695312, 'logits/chosen': -0.33014634251594543, 'logits/rejected': -0.30275779962539673, 'epoch': 0.41} + 41%|████▏ | 281/681 [11:54<17:07, 2.57s/it] 41%|████▏ | 282/681 [11:57<16:58, 2.55s/it] {'loss': 1.1459, 'grad_norm': 26.15049934387207, 'learning_rate': 3.660059364023408e-07, 'fcm_dpo/beta': 0.002561165951192379, 'fcm_dpo/q_t': 0.42569971084594727, 'fcm_dpo/delta': 0.06942006200551987, 'fcm_dpo/margin': 129.77859497070312, 'margin_dpo/margin_mean': 129.77859497070312, 'margin_dpo/margin_std': 209.7483673095703, 'logps/chosen': -323.91802978515625, 'logps/rejected': -475.9774475097656, 'logps/ref_chosen': -73.07014465332031, 'logps/ref_rejected': -95.35098266601562, 'KL/chosen_KL_mean': -250.84788513183594, 'KL/rejected_KL_mean': -380.62646484375, 'KL/mean': -315.7371826171875, 'KL/std': 179.92788696289062, 'logits/chosen': -0.41560858488082886, 'logits/rejected': -0.3941164016723633, 'epoch': 0.41} + 41%|████▏ | 282/681 [11:57<16:58, 2.55s/it] 42%|████▏ | 283/681 [11:59<16:51, 2.54s/it] {'loss': 1.0228, 'grad_norm': 35.29081726074219, 'learning_rate': 3.6486760974483685e-07, 'fcm_dpo/beta': 0.002537979045882821, 'fcm_dpo/q_t': 0.3873726427555084, 'fcm_dpo/delta': -0.08652851730585098, 'fcm_dpo/margin': 190.0657196044922, 'margin_dpo/margin_mean': 190.0657196044922, 'margin_dpo/margin_std': 199.068115234375, 'logps/chosen': -322.21588134765625, 'logps/rejected': -547.3697509765625, 'logps/ref_chosen': -61.89844512939453, 'logps/ref_rejected': -96.98655700683594, 'KL/chosen_KL_mean': -260.31744384765625, 'KL/rejected_KL_mean': -450.3831787109375, 'KL/mean': -355.35028076171875, 'KL/std': 210.8460693359375, 'logits/chosen': -0.3637465834617615, 'logits/rejected': -0.3678331673145294, 'epoch': 0.42} + 42%|████▏ | 283/681 [11:59<16:51, 2.54s/it] 42%|████▏ | 284/681 [12:02<17:10, 2.60s/it] {'loss': 1.0346, 'grad_norm': 27.652767181396484, 'learning_rate': 3.6372625621898863e-07, 'fcm_dpo/beta': 0.002489683451130986, 'fcm_dpo/q_t': 0.39381855726242065, 'fcm_dpo/delta': -0.06252136826515198, 'fcm_dpo/margin': 184.56295776367188, 'margin_dpo/margin_mean': 184.56295776367188, 'margin_dpo/margin_std': 199.2593994140625, 'logps/chosen': -305.9761962890625, 'logps/rejected': -525.5728759765625, 'logps/ref_chosen': -58.4355354309082, 'logps/ref_rejected': -93.46926879882812, 'KL/chosen_KL_mean': -247.5406494140625, 'KL/rejected_KL_mean': -432.1036071777344, 'KL/mean': -339.8221435546875, 'KL/std': 199.65481567382812, 'logits/chosen': -0.3889576494693756, 'logits/rejected': -0.37424755096435547, 'epoch': 0.42} + 42%|████▏ | 284/681 [12:02<17:10, 2.60s/it] 42%|████▏ | 285/681 [12:04<17:06, 2.59s/it] {'loss': 1.0729, 'grad_norm': 25.144207000732422, 'learning_rate': 3.625819059005228e-07, 'fcm_dpo/beta': 0.0024931158404797316, 'fcm_dpo/q_t': 0.4023195803165436, 'fcm_dpo/delta': -0.014689784497022629, 'fcm_dpo/margin': 165.91085815429688, 'margin_dpo/margin_mean': 165.91085815429688, 'margin_dpo/margin_std': 193.71629333496094, 'logps/chosen': -353.661865234375, 'logps/rejected': -552.4674072265625, 'logps/ref_chosen': -66.23219299316406, 'logps/ref_rejected': -99.1268310546875, 'KL/chosen_KL_mean': -287.4296875, 'KL/rejected_KL_mean': -453.340576171875, 'KL/mean': -370.3851318359375, 'KL/std': 177.46621704101562, 'logits/chosen': -0.35408467054367065, 'logits/rejected': -0.3383770287036896, 'epoch': 0.42} + 42%|████▏ | 285/681 [12:04<17:06, 2.59s/it] 42%|████▏ | 286/681 [12:07<16:56, 2.57s/it] {'loss': 1.0588, 'grad_norm': 25.498445510864258, 'learning_rate': 3.614345889441346e-07, 'fcm_dpo/beta': 0.00246500875800848, 'fcm_dpo/q_t': 0.39727091789245605, 'fcm_dpo/delta': -0.04596859961748123, 'fcm_dpo/margin': 180.1011199951172, 'margin_dpo/margin_mean': 180.10110473632812, 'margin_dpo/margin_std': 218.45489501953125, 'logps/chosen': -370.49884033203125, 'logps/rejected': -566.2373657226562, 'logps/ref_chosen': -72.95100402832031, 'logps/ref_rejected': -88.58845520019531, 'KL/chosen_KL_mean': -297.5478210449219, 'KL/rejected_KL_mean': -477.64892578125, 'KL/mean': -387.598388671875, 'KL/std': 204.80935668945312, 'logits/chosen': -0.37891730666160583, 'logits/rejected': -0.36352336406707764, 'epoch': 0.42} + 42%|████▏ | 286/681 [12:07<16:56, 2.57s/it] 42%|████▏ | 287/681 [12:09<16:17, 2.48s/it] {'loss': 1.1114, 'grad_norm': 28.178863525390625, 'learning_rate': 3.6028433558269275e-07, 'fcm_dpo/beta': 0.0024740160442888737, 'fcm_dpo/q_t': 0.41632279753685, 'fcm_dpo/delta': 0.04098087176680565, 'fcm_dpo/margin': 145.67831420898438, 'margin_dpo/margin_mean': 145.67831420898438, 'margin_dpo/margin_std': 192.67111206054688, 'logps/chosen': -340.5473937988281, 'logps/rejected': -502.380615234375, 'logps/ref_chosen': -61.54115295410156, 'logps/ref_rejected': -77.69607543945312, 'KL/chosen_KL_mean': -279.0062255859375, 'KL/rejected_KL_mean': -424.6845703125, 'KL/mean': -351.84539794921875, 'KL/std': 178.66867065429688, 'logits/chosen': -0.37850940227508545, 'logits/rejected': -0.35931217670440674, 'epoch': 0.42} + 42%|████▏ | 287/681 [12:09<16:17, 2.48s/it] 42%|████▏ | 288/681 [12:12<16:24, 2.50s/it] {'loss': 1.0367, 'grad_norm': 25.88406753540039, 'learning_rate': 3.5913117612644327e-07, 'fcm_dpo/beta': 0.0024337535724043846, 'fcm_dpo/q_t': 0.3927251994609833, 'fcm_dpo/delta': -0.0626014918088913, 'fcm_dpo/margin': 188.20782470703125, 'margin_dpo/margin_mean': 188.20785522460938, 'margin_dpo/margin_std': 193.08023071289062, 'logps/chosen': -332.9190673828125, 'logps/rejected': -551.8013916015625, 'logps/ref_chosen': -56.661224365234375, 'logps/ref_rejected': -87.33570098876953, 'KL/chosen_KL_mean': -276.2578430175781, 'KL/rejected_KL_mean': -464.46563720703125, 'KL/mean': -370.36175537109375, 'KL/std': 186.77584838867188, 'logits/chosen': -0.34253329038619995, 'logits/rejected': -0.3305118680000305, 'epoch': 0.42} + 42%|████▏ | 288/681 [12:12<16:24, 2.50s/it] 42%|████▏ | 289/681 [12:14<16:20, 2.50s/it] {'loss': 1.0157, 'grad_norm': 38.1318473815918, 'learning_rate': 3.5797514096221024e-07, 'fcm_dpo/beta': 0.0024142626207321882, 'fcm_dpo/q_t': 0.38501453399658203, 'fcm_dpo/delta': -0.10661280155181885, 'fcm_dpo/margin': 207.67982482910156, 'margin_dpo/margin_mean': 207.6798095703125, 'margin_dpo/margin_std': 225.15972900390625, 'logps/chosen': -314.76324462890625, 'logps/rejected': -564.8553466796875, 'logps/ref_chosen': -45.23039245605469, 'logps/ref_rejected': -87.64266967773438, 'KL/chosen_KL_mean': -269.5328369140625, 'KL/rejected_KL_mean': -477.2126770019531, 'KL/mean': -373.37274169921875, 'KL/std': 201.24783325195312, 'logits/chosen': -0.2764891982078552, 'logits/rejected': -0.27845776081085205, 'epoch': 0.42} + 42%|████▏ | 289/681 [12:14<16:20, 2.50s/it] 43%|████▎ | 290/681 [12:17<16:32, 2.54s/it] {'loss': 1.0308, 'grad_norm': 25.646024703979492, 'learning_rate': 3.568162605525952e-07, 'fcm_dpo/beta': 0.0023515745997428894, 'fcm_dpo/q_t': 0.38738417625427246, 'fcm_dpo/delta': -0.10482804477214813, 'fcm_dpo/margin': 212.32504272460938, 'margin_dpo/margin_mean': 212.3250274658203, 'margin_dpo/margin_std': 258.3596496582031, 'logps/chosen': -334.8027038574219, 'logps/rejected': -608.3648071289062, 'logps/ref_chosen': -55.47149658203125, 'logps/ref_rejected': -116.70857238769531, 'KL/chosen_KL_mean': -279.3312072753906, 'KL/rejected_KL_mean': -491.65625, 'KL/mean': -385.49371337890625, 'KL/std': 216.3263397216797, 'logits/chosen': -0.2992396950721741, 'logits/rejected': -0.318649560213089, 'epoch': 0.43} + 43%|████▎ | 290/681 [12:17<16:32, 2.54s/it] 43%|████▎ | 291/681 [12:19<16:36, 2.56s/it] {'loss': 1.0598, 'grad_norm': 23.715065002441406, 'learning_rate': 3.5565456543517485e-07, 'fcm_dpo/beta': 0.002339608035981655, 'fcm_dpo/q_t': 0.3976435661315918, 'fcm_dpo/delta': -0.04106954485177994, 'fcm_dpo/margin': 187.67138671875, 'margin_dpo/margin_mean': 187.67138671875, 'margin_dpo/margin_std': 221.575927734375, 'logps/chosen': -296.69622802734375, 'logps/rejected': -510.404296875, 'logps/ref_chosen': -63.26036834716797, 'logps/ref_rejected': -89.29708862304688, 'KL/chosen_KL_mean': -233.43585205078125, 'KL/rejected_KL_mean': -421.1072082519531, 'KL/mean': -327.27154541015625, 'KL/std': 182.66287231445312, 'logits/chosen': -0.3613763451576233, 'logits/rejected': -0.3522465229034424, 'epoch': 0.43} + 43%|████▎ | 291/681 [12:19<16:36, 2.56s/it] 43%|████▎ | 292/681 [12:22<16:13, 2.50s/it] {'loss': 1.0516, 'grad_norm': 20.776294708251953, 'learning_rate': 3.5449008622169583e-07, 'fcm_dpo/beta': 0.0023100622929632664, 'fcm_dpo/q_t': 0.39310041069984436, 'fcm_dpo/delta': -0.06708824634552002, 'fcm_dpo/margin': 200.83868408203125, 'margin_dpo/margin_mean': 200.83868408203125, 'margin_dpo/margin_std': 247.34228515625, 'logps/chosen': -299.351806640625, 'logps/rejected': -536.2332763671875, 'logps/ref_chosen': -53.91852951049805, 'logps/ref_rejected': -89.96138000488281, 'KL/chosen_KL_mean': -245.43325805664062, 'KL/rejected_KL_mean': -446.27191162109375, 'KL/mean': -345.85260009765625, 'KL/std': 213.15567016601562, 'logits/chosen': -0.3369968831539154, 'logits/rejected': -0.32491156458854675, 'epoch': 0.43} + 43%|████▎ | 292/681 [12:22<16:13, 2.50s/it] 43%|████▎ | 293/681 [12:24<16:24, 2.54s/it] {'loss': 1.1325, 'grad_norm': 49.972896575927734, 'learning_rate': 3.5332285359726846e-07, 'fcm_dpo/beta': 0.0023124441504478455, 'fcm_dpo/q_t': 0.4190768599510193, 'fcm_dpo/delta': 0.043067529797554016, 'fcm_dpo/margin': 154.9648895263672, 'margin_dpo/margin_mean': 154.9648895263672, 'margin_dpo/margin_std': 242.87945556640625, 'logps/chosen': -305.7259521484375, 'logps/rejected': -478.167236328125, 'logps/ref_chosen': -60.376033782958984, 'logps/ref_rejected': -77.85244750976562, 'KL/chosen_KL_mean': -245.34991455078125, 'KL/rejected_KL_mean': -400.3148193359375, 'KL/mean': -322.8323669433594, 'KL/std': 209.0297088623047, 'logits/chosen': -0.3593342900276184, 'logits/rejected': -0.35283225774765015, 'epoch': 0.43} + 43%|████▎ | 293/681 [12:24<16:24, 2.54s/it] 43%|████▎ | 294/681 [12:27<16:07, 2.50s/it] {'loss': 1.0954, 'grad_norm': 26.523639678955078, 'learning_rate': 3.5215289831955786e-07, 'fcm_dpo/beta': 0.0023088366724550724, 'fcm_dpo/q_t': 0.4094581604003906, 'fcm_dpo/delta': 0.002053305506706238, 'fcm_dpo/margin': 172.348388671875, 'margin_dpo/margin_mean': 172.348388671875, 'margin_dpo/margin_std': 234.5027618408203, 'logps/chosen': -275.269287109375, 'logps/rejected': -481.4271240234375, 'logps/ref_chosen': -48.0875358581543, 'logps/ref_rejected': -81.89698791503906, 'KL/chosen_KL_mean': -227.1817626953125, 'KL/rejected_KL_mean': -399.5301513671875, 'KL/mean': -313.35595703125, 'KL/std': 188.7653045654297, 'logits/chosen': -0.34593725204467773, 'logits/rejected': -0.3513278663158417, 'epoch': 0.43} + 43%|████▎ | 294/681 [12:27<16:07, 2.50s/it] 43%|████▎ | 295/681 [12:29<16:00, 2.49s/it] {'loss': 1.0729, 'grad_norm': 33.26468276977539, 'learning_rate': 3.509802512179737e-07, 'fcm_dpo/beta': 0.0023033185862004757, 'fcm_dpo/q_t': 0.39998045563697815, 'fcm_dpo/delta': -0.041762471199035645, 'fcm_dpo/margin': 191.0041046142578, 'margin_dpo/margin_mean': 191.0041046142578, 'margin_dpo/margin_std': 251.3482208251953, 'logps/chosen': -328.86077880859375, 'logps/rejected': -557.396484375, 'logps/ref_chosen': -49.92467498779297, 'logps/ref_rejected': -87.45632934570312, 'KL/chosen_KL_mean': -278.93609619140625, 'KL/rejected_KL_mean': -469.940185546875, 'KL/mean': -374.4381103515625, 'KL/std': 214.35740661621094, 'logits/chosen': -0.3557334244251251, 'logits/rejected': -0.3590019941329956, 'epoch': 0.43} + 43%|████▎ | 295/681 [12:29<16:00, 2.49s/it] 43%|████▎ | 296/681 [12:32<15:52, 2.48s/it] {'loss': 1.2009, 'grad_norm': 39.7788200378418, 'learning_rate': 3.498049431928577e-07, 'fcm_dpo/beta': 0.0022890730760991573, 'fcm_dpo/q_t': 0.4310154318809509, 'fcm_dpo/delta': -0.013507579453289509, 'fcm_dpo/margin': 131.2469024658203, 'margin_dpo/margin_mean': 131.2469024658203, 'margin_dpo/margin_std': 270.313232421875, 'logps/chosen': -421.4886474609375, 'logps/rejected': -580.3333740234375, 'logps/ref_chosen': -65.49124145507812, 'logps/ref_rejected': -93.08908081054688, 'KL/chosen_KL_mean': -355.9974060058594, 'KL/rejected_KL_mean': -487.2442932128906, 'KL/mean': -421.620849609375, 'KL/std': 209.83924865722656, 'logits/chosen': -0.380574107170105, 'logits/rejected': -0.36485421657562256, 'epoch': 0.43} + 43%|████▎ | 296/681 [12:32<15:52, 2.48s/it] 44%|████▎ | 297/681 [12:34<16:03, 2.51s/it] {'loss': 1.1089, 'grad_norm': 35.391021728515625, 'learning_rate': 3.486270052146694e-07, 'fcm_dpo/beta': 0.0022979602217674255, 'fcm_dpo/q_t': 0.41503405570983887, 'fcm_dpo/delta': 0.03410791605710983, 'fcm_dpo/margin': 159.7744598388672, 'margin_dpo/margin_mean': 159.77444458007812, 'margin_dpo/margin_std': 212.7035369873047, 'logps/chosen': -374.9660339355469, 'logps/rejected': -573.402099609375, 'logps/ref_chosen': -56.476951599121094, 'logps/ref_rejected': -95.1385498046875, 'KL/chosen_KL_mean': -318.48907470703125, 'KL/rejected_KL_mean': -478.2635192871094, 'KL/mean': -398.3763122558594, 'KL/std': 206.62014770507812, 'logits/chosen': -0.37360668182373047, 'logits/rejected': -0.3786901831626892, 'epoch': 0.44} + 44%|████▎ | 297/681 [12:34<16:03, 2.51s/it] 44%|████▍ | 298/681 [12:37<16:08, 2.53s/it] {'loss': 1.0737, 'grad_norm': 28.887287139892578, 'learning_rate': 3.474464683231698e-07, 'fcm_dpo/beta': 0.0022850334644317627, 'fcm_dpo/q_t': 0.3950398564338684, 'fcm_dpo/delta': -0.08811478316783905, 'fcm_dpo/margin': 211.7411346435547, 'margin_dpo/margin_mean': 211.7411346435547, 'margin_dpo/margin_std': 316.9564514160156, 'logps/chosen': -416.14276123046875, 'logps/rejected': -677.2208862304688, 'logps/ref_chosen': -67.32516479492188, 'logps/ref_rejected': -116.66217041015625, 'KL/chosen_KL_mean': -348.817626953125, 'KL/rejected_KL_mean': -560.5587158203125, 'KL/mean': -454.68817138671875, 'KL/std': 270.39697265625, 'logits/chosen': -0.3834819495677948, 'logits/rejected': -0.3983224630355835, 'epoch': 0.44} + 44%|████▍ | 298/681 [12:37<16:08, 2.53s/it] 44%|████▍ | 299/681 [12:39<16:06, 2.53s/it] {'loss': 1.1071, 'grad_norm': 41.97990417480469, 'learning_rate': 3.462633636266041e-07, 'fcm_dpo/beta': 0.0022771679796278477, 'fcm_dpo/q_t': 0.4095514416694641, 'fcm_dpo/delta': -0.0039763785898685455, 'fcm_dpo/margin': 177.01644897460938, 'margin_dpo/margin_mean': 177.01646423339844, 'margin_dpo/margin_std': 260.2517395019531, 'logps/chosen': -335.4031066894531, 'logps/rejected': -547.7857666015625, 'logps/ref_chosen': -48.96209716796875, 'logps/ref_rejected': -84.32823944091797, 'KL/chosen_KL_mean': -286.4410095214844, 'KL/rejected_KL_mean': -463.45751953125, 'KL/mean': -374.9492492675781, 'KL/std': 203.78659057617188, 'logits/chosen': -0.36679205298423767, 'logits/rejected': -0.37225013971328735, 'epoch': 0.44} + 44%|████▍ | 299/681 [12:39<16:06, 2.53s/it] 44%|████▍ | 300/681 [12:42<16:20, 2.57s/it] {'loss': 1.0565, 'grad_norm': 29.504758834838867, 'learning_rate': 3.4507772230088147e-07, 'fcm_dpo/beta': 0.002236669883131981, 'fcm_dpo/q_t': 0.38846272230148315, 'fcm_dpo/delta': -0.09869952499866486, 'fcm_dpo/margin': 220.83203125, 'margin_dpo/margin_mean': 220.83203125, 'margin_dpo/margin_std': 298.01202392578125, 'logps/chosen': -413.30255126953125, 'logps/rejected': -671.02734375, 'logps/ref_chosen': -59.07371139526367, 'logps/ref_rejected': -95.9664535522461, 'KL/chosen_KL_mean': -354.22882080078125, 'KL/rejected_KL_mean': -575.0608520507812, 'KL/mean': -464.64483642578125, 'KL/std': 244.3948974609375, 'logits/chosen': -0.35466793179512024, 'logits/rejected': -0.35978570580482483, 'epoch': 0.44} + 44%|████▍ | 300/681 [12:42<16:20, 2.57s/it] 44%|████▍ | 301/681 [12:44<15:44, 2.48s/it] {'loss': 1.0522, 'grad_norm': 24.30299949645996, 'learning_rate': 3.4388957558875316e-07, 'fcm_dpo/beta': 0.0021925170440226793, 'fcm_dpo/q_t': 0.39620107412338257, 'fcm_dpo/delta': -0.06446747481822968, 'fcm_dpo/margin': 210.38323974609375, 'margin_dpo/margin_mean': 210.38323974609375, 'margin_dpo/margin_std': 263.6629333496094, 'logps/chosen': -354.27423095703125, 'logps/rejected': -599.7616577148438, 'logps/ref_chosen': -57.249366760253906, 'logps/ref_rejected': -92.35354614257812, 'KL/chosen_KL_mean': -297.02484130859375, 'KL/rejected_KL_mean': -507.4081115722656, 'KL/mean': -402.21649169921875, 'KL/std': 221.8513641357422, 'logits/chosen': -0.319614976644516, 'logits/rejected': -0.31558164954185486, 'epoch': 0.44} + 44%|████▍ | 301/681 [12:44<15:44, 2.48s/it] 44%|████▍ | 302/681 [12:47<16:06, 2.55s/it] {'loss': 1.1013, 'grad_norm': 22.057884216308594, 'learning_rate': 3.426989547989902e-07, 'fcm_dpo/beta': 0.0021865563467144966, 'fcm_dpo/q_t': 0.41217830777168274, 'fcm_dpo/delta': 0.017055466771125793, 'fcm_dpo/margin': 175.31198120117188, 'margin_dpo/margin_mean': 175.31198120117188, 'margin_dpo/margin_std': 233.5333251953125, 'logps/chosen': -309.5210876464844, 'logps/rejected': -530.8614501953125, 'logps/ref_chosen': -51.197994232177734, 'logps/ref_rejected': -97.22636413574219, 'KL/chosen_KL_mean': -258.3230895996094, 'KL/rejected_KL_mean': -433.63507080078125, 'KL/mean': -345.97906494140625, 'KL/std': 195.16726684570312, 'logits/chosen': -0.3739083409309387, 'logits/rejected': -0.3783670663833618, 'epoch': 0.44} + 44%|████▍ | 302/681 [12:47<16:06, 2.55s/it] 44%|████▍ | 303/681 [12:50<16:29, 2.62s/it] {'loss': 1.1241, 'grad_norm': 21.407352447509766, 'learning_rate': 3.4150589130555773e-07, 'fcm_dpo/beta': 0.002201956696808338, 'fcm_dpo/q_t': 0.4157490134239197, 'fcm_dpo/delta': 0.031198769807815552, 'fcm_dpo/margin': 167.98590087890625, 'margin_dpo/margin_mean': 167.98590087890625, 'margin_dpo/margin_std': 252.49813842773438, 'logps/chosen': -335.2654113769531, 'logps/rejected': -523.4827880859375, 'logps/ref_chosen': -66.71394348144531, 'logps/ref_rejected': -86.94542694091797, 'KL/chosen_KL_mean': -268.55145263671875, 'KL/rejected_KL_mean': -436.537353515625, 'KL/mean': -352.54443359375, 'KL/std': 213.00778198242188, 'logits/chosen': -0.3980519771575928, 'logits/rejected': -0.38533222675323486, 'epoch': 0.44} + 44%|████▍ | 303/681 [12:50<16:29, 2.62s/it] 45%|████▍ | 304/681 [12:53<16:38, 2.65s/it] {'loss': 1.0529, 'grad_norm': 33.168792724609375, 'learning_rate': 3.403104165467883e-07, 'fcm_dpo/beta': 0.002216983586549759, 'fcm_dpo/q_t': 0.40225833654403687, 'fcm_dpo/delta': -0.009714346379041672, 'fcm_dpo/margin': 184.502197265625, 'margin_dpo/margin_mean': 184.50221252441406, 'margin_dpo/margin_std': 165.41900634765625, 'logps/chosen': -307.55694580078125, 'logps/rejected': -510.58050537109375, 'logps/ref_chosen': -71.95069885253906, 'logps/ref_rejected': -90.47203063964844, 'KL/chosen_KL_mean': -235.60621643066406, 'KL/rejected_KL_mean': -420.10845947265625, 'KL/mean': -327.85736083984375, 'KL/std': 188.72259521484375, 'logits/chosen': -0.42254841327667236, 'logits/rejected': -0.41290074586868286, 'epoch': 0.45} + 45%|████▍ | 304/681 [12:53<16:38, 2.65s/it] 45%|████▍ | 305/681 [12:55<16:34, 2.65s/it] {'loss': 1.1211, 'grad_norm': 26.381446838378906, 'learning_rate': 3.391125620245535e-07, 'fcm_dpo/beta': 0.002209282945841551, 'fcm_dpo/q_t': 0.4180421531200409, 'fcm_dpo/delta': 0.04157250002026558, 'fcm_dpo/margin': 162.76901245117188, 'margin_dpo/margin_mean': 162.76901245117188, 'margin_dpo/margin_std': 229.144775390625, 'logps/chosen': -313.06787109375, 'logps/rejected': -501.7962341308594, 'logps/ref_chosen': -66.79523468017578, 'logps/ref_rejected': -92.75459289550781, 'KL/chosen_KL_mean': -246.27264404296875, 'KL/rejected_KL_mean': -409.0416259765625, 'KL/mean': -327.6571350097656, 'KL/std': 209.47964477539062, 'logits/chosen': -0.4123404622077942, 'logits/rejected': -0.39488470554351807, 'epoch': 0.45} + 45%|████▍ | 305/681 [12:55<16:34, 2.65s/it] 45%|████▍ | 306/681 [12:58<16:29, 2.64s/it] {'loss': 1.1087, 'grad_norm': 32.627220153808594, 'learning_rate': 3.3791235930343417e-07, 'fcm_dpo/beta': 0.0022436161525547504, 'fcm_dpo/q_t': 0.417450875043869, 'fcm_dpo/delta': 0.0480995737016201, 'fcm_dpo/margin': 157.5745849609375, 'margin_dpo/margin_mean': 157.5745849609375, 'margin_dpo/margin_std': 195.82992553710938, 'logps/chosen': -307.6580810546875, 'logps/rejected': -480.70794677734375, 'logps/ref_chosen': -69.68389892578125, 'logps/ref_rejected': -85.15919494628906, 'KL/chosen_KL_mean': -237.97418212890625, 'KL/rejected_KL_mean': -395.54876708984375, 'KL/mean': -316.761474609375, 'KL/std': 183.5909881591797, 'logits/chosen': -0.41048943996429443, 'logits/rejected': -0.3866746425628662, 'epoch': 0.45} + 45%|████▍ | 306/681 [12:58<16:29, 2.64s/it] 45%|████▌ | 307/681 [13:01<16:38, 2.67s/it] {'loss': 1.1073, 'grad_norm': 28.697053909301758, 'learning_rate': 3.367098400098881e-07, 'fcm_dpo/beta': 0.002255768049508333, 'fcm_dpo/q_t': 0.415368914604187, 'fcm_dpo/delta': 0.03941379487514496, 'fcm_dpo/margin': 160.48297119140625, 'margin_dpo/margin_mean': 160.48297119140625, 'margin_dpo/margin_std': 204.71377563476562, 'logps/chosen': -287.4091491699219, 'logps/rejected': -464.69903564453125, 'logps/ref_chosen': -70.16542053222656, 'logps/ref_rejected': -86.97230529785156, 'KL/chosen_KL_mean': -217.2437286376953, 'KL/rejected_KL_mean': -377.72674560546875, 'KL/mean': -297.4852294921875, 'KL/std': 166.21441650390625, 'logits/chosen': -0.4190334677696228, 'logits/rejected': -0.400789737701416, 'epoch': 0.45} + 45%|████▌ | 307/681 [13:01<16:38, 2.67s/it] 45%|████▌ | 308/681 [13:03<16:26, 2.64s/it] {'loss': 1.0594, 'grad_norm': 32.00320053100586, 'learning_rate': 3.355050358314172e-07, 'fcm_dpo/beta': 0.002262132242321968, 'fcm_dpo/q_t': 0.40270644426345825, 'fcm_dpo/delta': -0.012053810060024261, 'fcm_dpo/margin': 181.93313598632812, 'margin_dpo/margin_mean': 181.93313598632812, 'margin_dpo/margin_std': 186.53610229492188, 'logps/chosen': -282.2489929199219, 'logps/rejected': -488.3094177246094, 'logps/ref_chosen': -55.2449951171875, 'logps/ref_rejected': -79.37226104736328, 'KL/chosen_KL_mean': -227.00399780273438, 'KL/rejected_KL_mean': -408.9371337890625, 'KL/mean': -317.9705810546875, 'KL/std': 191.77737426757812, 'logits/chosen': -0.40174388885498047, 'logits/rejected': -0.3911542594432831, 'epoch': 0.45} + 45%|████▌ | 308/681 [13:03<16:26, 2.64s/it] 45%|████▌ | 309/681 [13:06<16:29, 2.66s/it] {'loss': 1.067, 'grad_norm': 36.268280029296875, 'learning_rate': 3.3429797851573183e-07, 'fcm_dpo/beta': 0.0022613410837948322, 'fcm_dpo/q_t': 0.40044116973876953, 'fcm_dpo/delta': -0.025947626680135727, 'fcm_dpo/margin': 187.72882080078125, 'margin_dpo/margin_mean': 187.72882080078125, 'margin_dpo/margin_std': 219.98861694335938, 'logps/chosen': -283.98138427734375, 'logps/rejected': -505.09185791015625, 'logps/ref_chosen': -48.959083557128906, 'logps/ref_rejected': -82.34072875976562, 'KL/chosen_KL_mean': -235.02227783203125, 'KL/rejected_KL_mean': -422.7511291503906, 'KL/mean': -328.88671875, 'KL/std': 208.54153442382812, 'logits/chosen': -0.3440445065498352, 'logits/rejected': -0.3352274000644684, 'epoch': 0.45} + 45%|████▌ | 309/681 [13:06<16:29, 2.66s/it] 46%|████▌ | 310/681 [13:08<16:26, 2.66s/it] {'loss': 1.0746, 'grad_norm': 27.633638381958008, 'learning_rate': 3.3308869986991487e-07, 'fcm_dpo/beta': 0.002258842345327139, 'fcm_dpo/q_t': 0.40748023986816406, 'fcm_dpo/delta': 0.008499890565872192, 'fcm_dpo/margin': 173.33135986328125, 'margin_dpo/margin_mean': 173.33135986328125, 'margin_dpo/margin_std': 180.37374877929688, 'logps/chosen': -337.8306579589844, 'logps/rejected': -528.3505249023438, 'logps/ref_chosen': -62.74177932739258, 'logps/ref_rejected': -79.9302978515625, 'KL/chosen_KL_mean': -275.0888671875, 'KL/rejected_KL_mean': -448.42022705078125, 'KL/mean': -361.7545471191406, 'KL/std': 183.4505157470703, 'logits/chosen': -0.3811958432197571, 'logits/rejected': -0.36551085114479065, 'epoch': 0.46} + 46%|████▌ | 310/681 [13:09<16:26, 2.66s/it] 46%|████▌ | 311/681 [13:11<15:56, 2.58s/it] {'loss': 1.0617, 'grad_norm': 27.40664291381836, 'learning_rate': 3.3187723175958346e-07, 'fcm_dpo/beta': 0.0022331131622195244, 'fcm_dpo/q_t': 0.3981863260269165, 'fcm_dpo/delta': -0.05178193002939224, 'fcm_dpo/margin': 201.25680541992188, 'margin_dpo/margin_mean': 201.25680541992188, 'margin_dpo/margin_std': 256.835693359375, 'logps/chosen': -340.7347717285156, 'logps/rejected': -566.40185546875, 'logps/ref_chosen': -53.02798080444336, 'logps/ref_rejected': -77.43820190429688, 'KL/chosen_KL_mean': -287.706787109375, 'KL/rejected_KL_mean': -488.963623046875, 'KL/mean': -388.335205078125, 'KL/std': 243.3779296875, 'logits/chosen': -0.3802347779273987, 'logits/rejected': -0.35250845551490784, 'epoch': 0.46} + 46%|████▌ | 311/681 [13:11<15:56, 2.58s/it] 46%|████▌ | 312/681 [13:13<15:22, 2.50s/it] {'loss': 1.0944, 'grad_norm': 25.603744506835938, 'learning_rate': 3.306636061080487e-07, 'fcm_dpo/beta': 0.002234598621726036, 'fcm_dpo/q_t': 0.4089137613773346, 'fcm_dpo/delta': -0.004756327718496323, 'fcm_dpo/margin': 180.94058227539062, 'margin_dpo/margin_mean': 180.94058227539062, 'margin_dpo/margin_std': 252.2486114501953, 'logps/chosen': -326.34814453125, 'logps/rejected': -533.6893310546875, 'logps/ref_chosen': -49.39221954345703, 'logps/ref_rejected': -75.79280853271484, 'KL/chosen_KL_mean': -276.9559326171875, 'KL/rejected_KL_mean': -457.896484375, 'KL/mean': -367.42620849609375, 'KL/std': 211.4532470703125, 'logits/chosen': -0.3543195128440857, 'logits/rejected': -0.34367918968200684, 'epoch': 0.46} + 46%|████▌ | 312/681 [13:13<15:22, 2.50s/it] 46%|████▌ | 313/681 [13:16<15:02, 2.45s/it] {'loss': 1.0913, 'grad_norm': 25.151771545410156, 'learning_rate': 3.2944785489547537e-07, 'fcm_dpo/beta': 0.0022380563896149397, 'fcm_dpo/q_t': 0.4060678482055664, 'fcm_dpo/delta': -0.009237736463546753, 'fcm_dpo/margin': 182.31651306152344, 'margin_dpo/margin_mean': 182.31651306152344, 'margin_dpo/margin_std': 241.20245361328125, 'logps/chosen': -308.8540954589844, 'logps/rejected': -527.424072265625, 'logps/ref_chosen': -50.152740478515625, 'logps/ref_rejected': -86.40620422363281, 'KL/chosen_KL_mean': -258.70135498046875, 'KL/rejected_KL_mean': -441.01788330078125, 'KL/mean': -349.859619140625, 'KL/std': 218.23260498046875, 'logits/chosen': -0.40875959396362305, 'logits/rejected': -0.40443694591522217, 'epoch': 0.46} + 46%|████▌ | 313/681 [13:16<15:02, 2.45s/it] 46%|████▌ | 314/681 [13:18<15:25, 2.52s/it] {'loss': 1.1216, 'grad_norm': 21.262128829956055, 'learning_rate': 3.2823001015803857e-07, 'fcm_dpo/beta': 0.002218043664470315, 'fcm_dpo/q_t': 0.4140198826789856, 'fcm_dpo/delta': 0.01605740562081337, 'fcm_dpo/margin': 173.25335693359375, 'margin_dpo/margin_mean': 173.25335693359375, 'margin_dpo/margin_std': 268.62567138671875, 'logps/chosen': -330.53955078125, 'logps/rejected': -544.15185546875, 'logps/ref_chosen': -57.237579345703125, 'logps/ref_rejected': -97.5965347290039, 'KL/chosen_KL_mean': -273.3019714355469, 'KL/rejected_KL_mean': -446.5552978515625, 'KL/mean': -359.92864990234375, 'KL/std': 209.75840759277344, 'logits/chosen': -0.4432973861694336, 'logits/rejected': -0.44494277238845825, 'epoch': 0.46} + 46%|████▌ | 314/681 [13:18<15:25, 2.52s/it] 46%|████▋ | 315/681 [13:21<15:28, 2.54s/it] {'loss': 1.1129, 'grad_norm': 21.051586151123047, 'learning_rate': 3.270101039870797e-07, 'fcm_dpo/beta': 0.0022414117120206356, 'fcm_dpo/q_t': 0.41757166385650635, 'fcm_dpo/delta': 0.04551296681165695, 'fcm_dpo/margin': 158.88729858398438, 'margin_dpo/margin_mean': 158.88729858398438, 'margin_dpo/margin_std': 208.52919006347656, 'logps/chosen': -296.01580810546875, 'logps/rejected': -491.5143737792969, 'logps/ref_chosen': -49.06958770751953, 'logps/ref_rejected': -85.68087768554688, 'KL/chosen_KL_mean': -246.94622802734375, 'KL/rejected_KL_mean': -405.83349609375, 'KL/mean': -326.3898620605469, 'KL/std': 184.21778869628906, 'logits/chosen': -0.3383270502090454, 'logits/rejected': -0.33996307849884033, 'epoch': 0.46} + 46%|████▋ | 315/681 [13:21<15:28, 2.54s/it] 46%|████▋ | 316/681 [13:23<15:25, 2.54s/it] {'loss': 1.0184, 'grad_norm': 26.301368713378906, 'learning_rate': 3.2578816852826086e-07, 'fcm_dpo/beta': 0.002204576972872019, 'fcm_dpo/q_t': 0.386934369802475, 'fcm_dpo/delta': -0.10198242217302322, 'fcm_dpo/margin': 224.96533203125, 'margin_dpo/margin_mean': 224.96531677246094, 'margin_dpo/margin_std': 245.899169921875, 'logps/chosen': -301.6004638671875, 'logps/rejected': -573.5865478515625, 'logps/ref_chosen': -54.26074981689453, 'logps/ref_rejected': -101.2814712524414, 'KL/chosen_KL_mean': -247.3397216796875, 'KL/rejected_KL_mean': -472.3050537109375, 'KL/mean': -359.8223876953125, 'KL/std': 221.7631378173828, 'logits/chosen': -0.37936335802078247, 'logits/rejected': -0.38319075107574463, 'epoch': 0.46} + 46%|████▋ | 316/681 [13:23<15:25, 2.54s/it] 47%|████▋ | 317/681 [13:26<15:33, 2.56s/it] {'loss': 0.9823, 'grad_norm': 27.939563751220703, 'learning_rate': 3.2456423598071783e-07, 'fcm_dpo/beta': 0.0021673087030649185, 'fcm_dpo/q_t': 0.3760732412338257, 'fcm_dpo/delta': -0.1395837366580963, 'fcm_dpo/margin': 245.63772583007812, 'margin_dpo/margin_mean': 245.6377410888672, 'margin_dpo/margin_std': 226.56988525390625, 'logps/chosen': -310.66094970703125, 'logps/rejected': -600.9035034179688, 'logps/ref_chosen': -56.094207763671875, 'logps/ref_rejected': -100.69905090332031, 'KL/chosen_KL_mean': -254.56671142578125, 'KL/rejected_KL_mean': -500.2044372558594, 'KL/mean': -377.3856201171875, 'KL/std': 204.3238525390625, 'logits/chosen': -0.36965489387512207, 'logits/rejected': -0.3580781817436218, 'epoch': 0.47} + 47%|████▋ | 317/681 [13:26<15:33, 2.56s/it] 47%|████▋ | 318/681 [13:28<15:14, 2.52s/it] {'loss': 1.0842, 'grad_norm': 25.99791717529297, 'learning_rate': 3.233383385962115e-07, 'fcm_dpo/beta': 0.002148838248103857, 'fcm_dpo/q_t': 0.4076169729232788, 'fcm_dpo/delta': -0.0003134552389383316, 'fcm_dpo/margin': 186.2266387939453, 'margin_dpo/margin_mean': 186.2266387939453, 'margin_dpo/margin_std': 232.8768310546875, 'logps/chosen': -344.34295654296875, 'logps/rejected': -548.6881103515625, 'logps/ref_chosen': -64.64569854736328, 'logps/ref_rejected': -82.76425170898438, 'KL/chosen_KL_mean': -279.697265625, 'KL/rejected_KL_mean': -465.92388916015625, 'KL/mean': -372.810546875, 'KL/std': 213.03033447265625, 'logits/chosen': -0.4427594542503357, 'logits/rejected': -0.4159233570098877, 'epoch': 0.47} + 47%|████▋ | 318/681 [13:28<15:14, 2.52s/it] 47%|████▋ | 319/681 [13:31<15:32, 2.57s/it] {'loss': 1.04, 'grad_norm': 22.634004592895508, 'learning_rate': 3.2211050867827805e-07, 'fcm_dpo/beta': 0.002122014295309782, 'fcm_dpo/q_t': 0.3920608162879944, 'fcm_dpo/delta': -0.06920456886291504, 'fcm_dpo/margin': 219.60238647460938, 'margin_dpo/margin_mean': 219.60238647460938, 'margin_dpo/margin_std': 253.1864013671875, 'logps/chosen': -303.81646728515625, 'logps/rejected': -587.9415893554688, 'logps/ref_chosen': -49.383758544921875, 'logps/ref_rejected': -113.90650939941406, 'KL/chosen_KL_mean': -254.43267822265625, 'KL/rejected_KL_mean': -474.03509521484375, 'KL/mean': -364.23388671875, 'KL/std': 231.52847290039062, 'logits/chosen': -0.40253257751464844, 'logits/rejected': -0.41706568002700806, 'epoch': 0.47} + 47%|████▋ | 319/681 [13:31<15:32, 2.57s/it] 47%|████▋ | 320/681 [13:34<15:36, 2.59s/it] {'loss': 1.0125, 'grad_norm': 26.852354049682617, 'learning_rate': 3.208807785813777e-07, 'fcm_dpo/beta': 0.002073149662464857, 'fcm_dpo/q_t': 0.38467395305633545, 'fcm_dpo/delta': -0.10854032635688782, 'fcm_dpo/margin': 242.403564453125, 'margin_dpo/margin_mean': 242.403564453125, 'margin_dpo/margin_std': 261.4935607910156, 'logps/chosen': -329.9727783203125, 'logps/rejected': -610.5386352539062, 'logps/ref_chosen': -59.50489044189453, 'logps/ref_rejected': -97.66717529296875, 'KL/chosen_KL_mean': -270.4678955078125, 'KL/rejected_KL_mean': -512.8714599609375, 'KL/mean': -391.669677734375, 'KL/std': 244.3293914794922, 'logits/chosen': -0.37782442569732666, 'logits/rejected': -0.3842761516571045, 'epoch': 0.47} + 47%|████▋ | 320/681 [13:34<15:36, 2.59s/it] 47%|████▋ | 321/681 [13:36<15:20, 2.56s/it] {'loss': 1.0823, 'grad_norm': 25.74936294555664, 'learning_rate': 3.1964918071004217e-07, 'fcm_dpo/beta': 0.002050905954092741, 'fcm_dpo/q_t': 0.4041670560836792, 'fcm_dpo/delta': -0.019337691366672516, 'fcm_dpo/margin': 203.71778869628906, 'margin_dpo/margin_mean': 203.71778869628906, 'margin_dpo/margin_std': 262.6986083984375, 'logps/chosen': -397.091552734375, 'logps/rejected': -630.9017333984375, 'logps/ref_chosen': -61.548683166503906, 'logps/ref_rejected': -91.64103698730469, 'KL/chosen_KL_mean': -335.5428771972656, 'KL/rejected_KL_mean': -539.2606201171875, 'KL/mean': -437.40179443359375, 'KL/std': 246.63409423828125, 'logits/chosen': -0.3634049594402313, 'logits/rejected': -0.35541000962257385, 'epoch': 0.47} + 47%|████▋ | 321/681 [13:36<15:20, 2.56s/it] 47%|████▋ | 322/681 [13:39<15:24, 2.57s/it] {'loss': 1.0333, 'grad_norm': 21.486406326293945, 'learning_rate': 3.184157475180207e-07, 'fcm_dpo/beta': 0.0020338515751063824, 'fcm_dpo/q_t': 0.39247822761535645, 'fcm_dpo/delta': -0.0633477047085762, 'fcm_dpo/margin': 226.15872192382812, 'margin_dpo/margin_mean': 226.15872192382812, 'margin_dpo/margin_std': 235.21531677246094, 'logps/chosen': -350.89593505859375, 'logps/rejected': -615.5145263671875, 'logps/ref_chosen': -57.29003143310547, 'logps/ref_rejected': -95.74992370605469, 'KL/chosen_KL_mean': -293.60589599609375, 'KL/rejected_KL_mean': -519.7646484375, 'KL/mean': -406.685302734375, 'KL/std': 222.54376220703125, 'logits/chosen': -0.3742543160915375, 'logits/rejected': -0.37500399351119995, 'epoch': 0.47} + 47%|████▋ | 322/681 [13:39<15:24, 2.57s/it] 47%|████▋ | 323/681 [13:41<15:27, 2.59s/it] {'loss': 1.094, 'grad_norm': 40.52562713623047, 'learning_rate': 3.171805115074251e-07, 'fcm_dpo/beta': 0.002034769393503666, 'fcm_dpo/q_t': 0.4092080295085907, 'fcm_dpo/delta': 0.009797626174986362, 'fcm_dpo/margin': 191.94479370117188, 'margin_dpo/margin_mean': 191.94479370117188, 'margin_dpo/margin_std': 242.95655822753906, 'logps/chosen': -365.85089111328125, 'logps/rejected': -581.6236572265625, 'logps/ref_chosen': -51.23395919799805, 'logps/ref_rejected': -75.06192016601562, 'KL/chosen_KL_mean': -314.616943359375, 'KL/rejected_KL_mean': -506.56170654296875, 'KL/mean': -410.58935546875, 'KL/std': 222.89964294433594, 'logits/chosen': -0.39556217193603516, 'logits/rejected': -0.3936944603919983, 'epoch': 0.47} + 47%|████▋ | 323/681 [13:41<15:27, 2.59s/it] 48%|████▊ | 324/681 [13:44<15:09, 2.55s/it] {'loss': 1.1314, 'grad_norm': 45.35006332397461, 'learning_rate': 3.1594350522787295e-07, 'fcm_dpo/beta': 0.0020614464301615953, 'fcm_dpo/q_t': 0.41647300124168396, 'fcm_dpo/delta': 0.0281895250082016, 'fcm_dpo/margin': 180.3145294189453, 'margin_dpo/margin_mean': 180.3145294189453, 'margin_dpo/margin_std': 275.48095703125, 'logps/chosen': -429.6033630371094, 'logps/rejected': -631.26025390625, 'logps/ref_chosen': -65.13516998291016, 'logps/ref_rejected': -86.47750854492188, 'KL/chosen_KL_mean': -364.46820068359375, 'KL/rejected_KL_mean': -544.78271484375, 'KL/mean': -454.6254577636719, 'KL/std': 239.51873779296875, 'logits/chosen': -0.3872758746147156, 'logits/rejected': -0.3733510971069336, 'epoch': 0.48} + 48%|████▊ | 324/681 [13:44<15:09, 2.55s/it] 48%|████▊ | 325/681 [13:47<15:38, 2.64s/it] {'loss': 1.1354, 'grad_norm': 30.042072296142578, 'learning_rate': 3.147047612756302e-07, 'fcm_dpo/beta': 0.0020753461867570877, 'fcm_dpo/q_t': 0.42378658056259155, 'fcm_dpo/delta': 0.076748326420784, 'fcm_dpo/margin': 156.92562866210938, 'margin_dpo/margin_mean': 156.92564392089844, 'margin_dpo/margin_std': 211.30374145507812, 'logps/chosen': -338.57427978515625, 'logps/rejected': -509.37030029296875, 'logps/ref_chosen': -56.215599060058594, 'logps/ref_rejected': -70.08592987060547, 'KL/chosen_KL_mean': -282.35870361328125, 'KL/rejected_KL_mean': -439.28436279296875, 'KL/mean': -360.821533203125, 'KL/std': 208.74551391601562, 'logits/chosen': -0.44832050800323486, 'logits/rejected': -0.43317437171936035, 'epoch': 0.48} + 48%|████▊ | 325/681 [13:47<15:38, 2.64s/it] 48%|████▊ | 326/681 [13:50<15:52, 2.68s/it] {'loss': 1.1383, 'grad_norm': 45.18388748168945, 'learning_rate': 3.134643122927519e-07, 'fcm_dpo/beta': 0.002105048391968012, 'fcm_dpo/q_t': 0.4256167709827423, 'fcm_dpo/delta': 0.08887322247028351, 'fcm_dpo/margin': 149.17068481445312, 'margin_dpo/margin_mean': 149.17068481445312, 'margin_dpo/margin_std': 190.10052490234375, 'logps/chosen': -385.3875732421875, 'logps/rejected': -541.6800537109375, 'logps/ref_chosen': -72.72496032714844, 'logps/ref_rejected': -79.8467788696289, 'KL/chosen_KL_mean': -312.6626281738281, 'KL/rejected_KL_mean': -461.83331298828125, 'KL/mean': -387.2479553222656, 'KL/std': 196.3777618408203, 'logits/chosen': -0.4697904884815216, 'logits/rejected': -0.45466452836990356, 'epoch': 0.48} + 48%|████▊ | 326/681 [13:50<15:52, 2.68s/it] 48%|████▊ | 327/681 [13:52<15:50, 2.69s/it] {'loss': 1.0185, 'grad_norm': 40.068824768066406, 'learning_rate': 3.1222219096622264e-07, 'fcm_dpo/beta': 0.0020871213637292385, 'fcm_dpo/q_t': 0.38719016313552856, 'fcm_dpo/delta': -0.08413384854793549, 'fcm_dpo/margin': 229.8604278564453, 'margin_dpo/margin_mean': 229.86044311523438, 'margin_dpo/margin_std': 231.85646057128906, 'logps/chosen': -337.015625, 'logps/rejected': -609.6754150390625, 'logps/ref_chosen': -69.13441467285156, 'logps/ref_rejected': -111.93377685546875, 'KL/chosen_KL_mean': -267.8811950683594, 'KL/rejected_KL_mean': -497.74163818359375, 'KL/mean': -382.8114013671875, 'KL/std': 212.30575561523438, 'logits/chosen': -0.419431209564209, 'logits/rejected': -0.40632164478302, 'epoch': 0.48} + 48%|████▊ | 327/681 [13:52<15:50, 2.69s/it] 48%|████▊ | 328/681 [13:55<15:41, 2.67s/it] {'loss': 1.0631, 'grad_norm': 26.695911407470703, 'learning_rate': 3.1097843002709427e-07, 'fcm_dpo/beta': 0.002073537092655897, 'fcm_dpo/q_t': 0.4000805914402008, 'fcm_dpo/delta': -0.03960520401597023, 'fcm_dpo/margin': 211.17982482910156, 'margin_dpo/margin_mean': 211.17984008789062, 'margin_dpo/margin_std': 262.86199951171875, 'logps/chosen': -343.0738525390625, 'logps/rejected': -585.4215087890625, 'logps/ref_chosen': -59.68719482421875, 'logps/ref_rejected': -90.85499572753906, 'KL/chosen_KL_mean': -283.38665771484375, 'KL/rejected_KL_mean': -494.5665283203125, 'KL/mean': -388.9765930175781, 'KL/std': 235.5018310546875, 'logits/chosen': -0.40584391355514526, 'logits/rejected': -0.4096938371658325, 'epoch': 0.48} + 48%|████▊ | 328/681 [13:55<15:41, 2.67s/it] 48%|████▊ | 329/681 [13:57<15:17, 2.61s/it] {'loss': 1.0669, 'grad_norm': 27.266719818115234, 'learning_rate': 3.0973306224962437e-07, 'fcm_dpo/beta': 0.0020416276529431343, 'fcm_dpo/q_t': 0.3970402479171753, 'fcm_dpo/delta': -0.05077539384365082, 'fcm_dpo/margin': 219.33106994628906, 'margin_dpo/margin_mean': 219.33106994628906, 'margin_dpo/margin_std': 276.76806640625, 'logps/chosen': -381.0254211425781, 'logps/rejected': -635.8079833984375, 'logps/ref_chosen': -65.2461929321289, 'logps/ref_rejected': -100.69770812988281, 'KL/chosen_KL_mean': -315.77923583984375, 'KL/rejected_KL_mean': -535.1102294921875, 'KL/mean': -425.44476318359375, 'KL/std': 248.7440185546875, 'logits/chosen': -0.39109185338020325, 'logits/rejected': -0.37896549701690674, 'epoch': 0.48} + 48%|████▊ | 329/681 [13:57<15:17, 2.61s/it] 48%|████▊ | 330/681 [14:00<15:17, 2.62s/it] {'loss': 1.0401, 'grad_norm': 22.988527297973633, 'learning_rate': 3.084861204504122e-07, 'fcm_dpo/beta': 0.002023911103606224, 'fcm_dpo/q_t': 0.39241012930870056, 'fcm_dpo/delta': -0.07296737283468246, 'fcm_dpo/margin': 231.99075317382812, 'margin_dpo/margin_mean': 231.99075317382812, 'margin_dpo/margin_std': 271.9549560546875, 'logps/chosen': -318.92950439453125, 'logps/rejected': -590.7987060546875, 'logps/ref_chosen': -46.998348236083984, 'logps/ref_rejected': -86.87684631347656, 'KL/chosen_KL_mean': -271.9311828613281, 'KL/rejected_KL_mean': -503.921875, 'KL/mean': -387.9265441894531, 'KL/std': 245.21937561035156, 'logits/chosen': -0.38984963297843933, 'logits/rejected': -0.3909507393836975, 'epoch': 0.48} + 48%|████▊ | 330/681 [14:00<15:17, 2.62s/it] 49%|████▊ | 331/681 [14:02<15:00, 2.57s/it] {'loss': 1.0166, 'grad_norm': 29.254379272460938, 'learning_rate': 3.072376374875335e-07, 'fcm_dpo/beta': 0.001998601946979761, 'fcm_dpo/q_t': 0.391870379447937, 'fcm_dpo/delta': -0.058225952088832855, 'fcm_dpo/margin': 227.9350128173828, 'margin_dpo/margin_mean': 227.9350128173828, 'margin_dpo/margin_std': 194.46043395996094, 'logps/chosen': -342.7950744628906, 'logps/rejected': -609.2212524414062, 'logps/ref_chosen': -50.52424621582031, 'logps/ref_rejected': -89.01544189453125, 'KL/chosen_KL_mean': -292.27081298828125, 'KL/rejected_KL_mean': -520.205810546875, 'KL/mean': -406.2383117675781, 'KL/std': 200.16488647460938, 'logits/chosen': -0.4170438051223755, 'logits/rejected': -0.41196513175964355, 'epoch': 0.49} + 49%|████▊ | 331/681 [14:02<15:00, 2.57s/it] 49%|████▉ | 332/681 [14:05<14:39, 2.52s/it] {'loss': 1.1333, 'grad_norm': 23.76082420349121, 'learning_rate': 3.059876462596758e-07, 'fcm_dpo/beta': 0.002016157377511263, 'fcm_dpo/q_t': 0.4236387014389038, 'fcm_dpo/delta': 0.06822776794433594, 'fcm_dpo/margin': 165.63372802734375, 'margin_dpo/margin_mean': 165.63372802734375, 'margin_dpo/margin_std': 237.92779541015625, 'logps/chosen': -343.43145751953125, 'logps/rejected': -536.3699951171875, 'logps/ref_chosen': -49.18028259277344, 'logps/ref_rejected': -76.48515319824219, 'KL/chosen_KL_mean': -294.25115966796875, 'KL/rejected_KL_mean': -459.8848876953125, 'KL/mean': -377.06805419921875, 'KL/std': 212.61550903320312, 'logits/chosen': -0.4166898727416992, 'logits/rejected': -0.39659583568573, 'epoch': 0.49} + 49%|████▉ | 332/681 [14:05<14:39, 2.52s/it] 49%|████▉ | 333/681 [14:07<14:21, 2.48s/it] {'loss': 1.0872, 'grad_norm': 21.290910720825195, 'learning_rate': 3.0473617970527015e-07, 'fcm_dpo/beta': 0.0019987255800515413, 'fcm_dpo/q_t': 0.40265339612960815, 'fcm_dpo/delta': -0.030487176030874252, 'fcm_dpo/margin': 214.22947692871094, 'margin_dpo/margin_mean': 214.22946166992188, 'margin_dpo/margin_std': 293.54949951171875, 'logps/chosen': -377.7965087890625, 'logps/rejected': -623.3143310546875, 'logps/ref_chosen': -63.75574493408203, 'logps/ref_rejected': -95.04411315917969, 'KL/chosen_KL_mean': -314.040771484375, 'KL/rejected_KL_mean': -528.270263671875, 'KL/mean': -421.1554870605469, 'KL/std': 245.66607666015625, 'logits/chosen': -0.414547860622406, 'logits/rejected': -0.40775951743125916, 'epoch': 0.49} + 49%|████▉ | 333/681 [14:07<14:21, 2.48s/it] 49%|████▉ | 334/681 [14:10<14:18, 2.47s/it] {'loss': 1.1255, 'grad_norm': 25.199310302734375, 'learning_rate': 3.034832708016243e-07, 'fcm_dpo/beta': 0.0020114602521061897, 'fcm_dpo/q_t': 0.4135018587112427, 'fcm_dpo/delta': 0.016515735536813736, 'fcm_dpo/margin': 190.96356201171875, 'margin_dpo/margin_mean': 190.9635467529297, 'margin_dpo/margin_std': 301.6986389160156, 'logps/chosen': -368.91595458984375, 'logps/rejected': -588.2166748046875, 'logps/ref_chosen': -66.97975158691406, 'logps/ref_rejected': -95.31692504882812, 'KL/chosen_KL_mean': -301.93621826171875, 'KL/rejected_KL_mean': -492.8997802734375, 'KL/mean': -397.41796875, 'KL/std': 265.2902526855469, 'logits/chosen': -0.4625867009162903, 'logits/rejected': -0.4604346752166748, 'epoch': 0.49} + 49%|████▉ | 334/681 [14:10<14:18, 2.47s/it] 49%|████▉ | 335/681 [14:12<14:25, 2.50s/it] {'loss': 1.1788, 'grad_norm': 32.44525146484375, 'learning_rate': 3.022289525640531e-07, 'fcm_dpo/beta': 0.0020141825079917908, 'fcm_dpo/q_t': 0.4313260614871979, 'fcm_dpo/delta': -0.004568679258227348, 'fcm_dpo/margin': 150.0985870361328, 'margin_dpo/margin_mean': 150.0985870361328, 'margin_dpo/margin_std': 268.7898254394531, 'logps/chosen': -394.66583251953125, 'logps/rejected': -569.8396606445312, 'logps/ref_chosen': -62.54248046875, 'logps/ref_rejected': -87.61770629882812, 'KL/chosen_KL_mean': -332.12335205078125, 'KL/rejected_KL_mean': -482.2219543457031, 'KL/mean': -407.17266845703125, 'KL/std': 248.314453125, 'logits/chosen': -0.43714985251426697, 'logits/rejected': -0.4139357805252075, 'epoch': 0.49} + 49%|████▉ | 335/681 [14:12<14:25, 2.50s/it] 49%|████▉ | 336/681 [14:15<14:31, 2.53s/it] {'loss': 1.0672, 'grad_norm': 29.619176864624023, 'learning_rate': 3.009732580450086e-07, 'fcm_dpo/beta': 0.0019955080933868885, 'fcm_dpo/q_t': 0.39297786355018616, 'fcm_dpo/delta': -0.08617211878299713, 'fcm_dpo/margin': 241.56228637695312, 'margin_dpo/margin_mean': 241.56228637695312, 'margin_dpo/margin_std': 339.20538330078125, 'logps/chosen': -388.9188232421875, 'logps/rejected': -680.3541870117188, 'logps/ref_chosen': -54.53115463256836, 'logps/ref_rejected': -104.40424346923828, 'KL/chosen_KL_mean': -334.3876647949219, 'KL/rejected_KL_mean': -575.949951171875, 'KL/mean': -455.1687927246094, 'KL/std': 287.1412353515625, 'logits/chosen': -0.399771511554718, 'logits/rejected': -0.3994015157222748, 'epoch': 0.49} + 49%|████▉ | 336/681 [14:15<14:31, 2.53s/it] 49%|████▉ | 337/681 [14:17<14:27, 2.52s/it] {'loss': 1.0365, 'grad_norm': 31.691265106201172, 'learning_rate': 2.9971622033320914e-07, 'fcm_dpo/beta': 0.0019532032310962677, 'fcm_dpo/q_t': 0.3924025893211365, 'fcm_dpo/delta': -0.07179627567529678, 'fcm_dpo/margin': 239.69003295898438, 'margin_dpo/margin_mean': 239.69003295898438, 'margin_dpo/margin_std': 272.0997314453125, 'logps/chosen': -364.8409729003906, 'logps/rejected': -641.1293334960938, 'logps/ref_chosen': -65.12869262695312, 'logps/ref_rejected': -101.72701263427734, 'KL/chosen_KL_mean': -299.7122802734375, 'KL/rejected_KL_mean': -539.40234375, 'KL/mean': -419.55731201171875, 'KL/std': 230.1120147705078, 'logits/chosen': -0.44581082463264465, 'logits/rejected': -0.4338313341140747, 'epoch': 0.49} + 49%|████▉ | 337/681 [14:17<14:27, 2.52s/it] 50%|████▉ | 338/681 [14:20<14:31, 2.54s/it] {'loss': 1.0325, 'grad_norm': 33.03255081176758, 'learning_rate': 2.984578725527675e-07, 'fcm_dpo/beta': 0.0019332608208060265, 'fcm_dpo/q_t': 0.3942795991897583, 'fcm_dpo/delta': -0.052127446979284286, 'fcm_dpo/margin': 232.6099853515625, 'margin_dpo/margin_mean': 232.6099853515625, 'margin_dpo/margin_std': 229.81480407714844, 'logps/chosen': -323.6939697265625, 'logps/rejected': -586.9498291015625, 'logps/ref_chosen': -58.422706604003906, 'logps/ref_rejected': -89.06854248046875, 'KL/chosen_KL_mean': -265.2712707519531, 'KL/rejected_KL_mean': -497.88128662109375, 'KL/mean': -381.5762634277344, 'KL/std': 221.42172241210938, 'logits/chosen': -0.44652998447418213, 'logits/rejected': -0.4394975006580353, 'epoch': 0.5} + 50%|████▉ | 338/681 [14:20<14:31, 2.54s/it] 50%|████▉ | 339/681 [14:22<14:02, 2.46s/it] {'loss': 1.0459, 'grad_norm': 26.379383087158203, 'learning_rate': 2.9719824786231796e-07, 'fcm_dpo/beta': 0.0019332109950482845, 'fcm_dpo/q_t': 0.3964860439300537, 'fcm_dpo/delta': -0.045980703085660934, 'fcm_dpo/margin': 229.14202880859375, 'margin_dpo/margin_mean': 229.14202880859375, 'margin_dpo/margin_std': 232.25975036621094, 'logps/chosen': -348.03900146484375, 'logps/rejected': -621.0966186523438, 'logps/ref_chosen': -59.99531555175781, 'logps/ref_rejected': -103.9109115600586, 'KL/chosen_KL_mean': -288.043701171875, 'KL/rejected_KL_mean': -517.1857299804688, 'KL/mean': -402.6147155761719, 'KL/std': 232.51951599121094, 'logits/chosen': -0.4995361864566803, 'logits/rejected': -0.486974835395813, 'epoch': 0.5} + 50%|████▉ | 339/681 [14:22<14:02, 2.46s/it] 50%|████▉ | 340/681 [14:25<14:12, 2.50s/it] {'loss': 1.1118, 'grad_norm': 24.018463134765625, 'learning_rate': 2.959373794541426e-07, 'fcm_dpo/beta': 0.001913035404868424, 'fcm_dpo/q_t': 0.4138457477092743, 'fcm_dpo/delta': 0.026591314002871513, 'fcm_dpo/margin': 195.61285400390625, 'margin_dpo/margin_mean': 195.61285400390625, 'margin_dpo/margin_std': 273.03375244140625, 'logps/chosen': -358.63323974609375, 'logps/rejected': -574.5231323242188, 'logps/ref_chosen': -52.83022689819336, 'logps/ref_rejected': -73.10723114013672, 'KL/chosen_KL_mean': -305.80303955078125, 'KL/rejected_KL_mean': -501.4158935546875, 'KL/mean': -403.6094665527344, 'KL/std': 221.99676513671875, 'logits/chosen': -0.41224145889282227, 'logits/rejected': -0.3901046812534332, 'epoch': 0.5} + 50%|████▉ | 340/681 [14:25<14:12, 2.50s/it] 50%|█████ | 341/681 [14:27<13:45, 2.43s/it] {'loss': 1.0601, 'grad_norm': 25.791650772094727, 'learning_rate': 2.946753005532965e-07, 'fcm_dpo/beta': 0.0019069017143920064, 'fcm_dpo/q_t': 0.4006652235984802, 'fcm_dpo/delta': -0.03159831091761589, 'fcm_dpo/margin': 225.42550659179688, 'margin_dpo/margin_mean': 225.42550659179688, 'margin_dpo/margin_std': 256.30413818359375, 'logps/chosen': -347.8078308105469, 'logps/rejected': -627.1434326171875, 'logps/ref_chosen': -47.899803161621094, 'logps/ref_rejected': -101.80987548828125, 'KL/chosen_KL_mean': -299.90802001953125, 'KL/rejected_KL_mean': -525.3335571289062, 'KL/mean': -412.6207580566406, 'KL/std': 242.2279510498047, 'logits/chosen': -0.40798383951187134, 'logits/rejected': -0.4071720838546753, 'epoch': 0.5} + 50%|█████ | 341/681 [14:27<13:45, 2.43s/it] 50%|█████ | 342/681 [14:30<14:06, 2.50s/it] {'loss': 1.1027, 'grad_norm': 25.002784729003906, 'learning_rate': 2.934120444167326e-07, 'fcm_dpo/beta': 0.001922906143590808, 'fcm_dpo/q_t': 0.4113759696483612, 'fcm_dpo/delta': 0.01581621915102005, 'fcm_dpo/margin': 199.9203338623047, 'margin_dpo/margin_mean': 199.92034912109375, 'margin_dpo/margin_std': 271.4499206542969, 'logps/chosen': -380.83563232421875, 'logps/rejected': -601.348876953125, 'logps/ref_chosen': -71.99664306640625, 'logps/ref_rejected': -92.58959197998047, 'KL/chosen_KL_mean': -308.8389892578125, 'KL/rejected_KL_mean': -508.7593078613281, 'KL/mean': -408.7991638183594, 'KL/std': 230.74411010742188, 'logits/chosen': -0.4692263603210449, 'logits/rejected': -0.44338518381118774, 'epoch': 0.5} + 50%|█████ | 342/681 [14:30<14:06, 2.50s/it] 50%|█████ | 343/681 [14:32<13:48, 2.45s/it] {'loss': 1.0162, 'grad_norm': 23.37291145324707, 'learning_rate': 2.9214764433242476e-07, 'fcm_dpo/beta': 0.001898743212223053, 'fcm_dpo/q_t': 0.39045825600624084, 'fcm_dpo/delta': -0.06434239447116852, 'fcm_dpo/margin': 242.99090576171875, 'margin_dpo/margin_mean': 242.99090576171875, 'margin_dpo/margin_std': 209.3662109375, 'logps/chosen': -350.59490966796875, 'logps/rejected': -650.2216186523438, 'logps/ref_chosen': -54.405616760253906, 'logps/ref_rejected': -111.04142761230469, 'KL/chosen_KL_mean': -296.18927001953125, 'KL/rejected_KL_mean': -539.18017578125, 'KL/mean': -417.68475341796875, 'KL/std': 236.11985778808594, 'logits/chosen': -0.4716584086418152, 'logits/rejected': -0.4760972261428833, 'epoch': 0.5} + 50%|█████ | 343/681 [14:32<13:48, 2.45s/it] 51%|█████ | 344/681 [14:34<13:46, 2.45s/it] {'loss': 1.0951, 'grad_norm': 31.523855209350586, 'learning_rate': 2.9088213361849126e-07, 'fcm_dpo/beta': 0.0019109161803498864, 'fcm_dpo/q_t': 0.41091352701187134, 'fcm_dpo/delta': 0.0036756470799446106, 'fcm_dpo/margin': 206.59829711914062, 'margin_dpo/margin_mean': 206.59829711914062, 'margin_dpo/margin_std': 264.9103698730469, 'logps/chosen': -357.40850830078125, 'logps/rejected': -600.66552734375, 'logps/ref_chosen': -53.96466827392578, 'logps/ref_rejected': -90.62336730957031, 'KL/chosen_KL_mean': -303.44384765625, 'KL/rejected_KL_mean': -510.04217529296875, 'KL/mean': -406.74298095703125, 'KL/std': 256.48602294921875, 'logits/chosen': -0.4483766555786133, 'logits/rejected': -0.45180875062942505, 'epoch': 0.51} + 51%|█████ | 344/681 [14:35<13:46, 2.45s/it] 51%|█████ | 345/681 [14:37<14:08, 2.52s/it] {'loss': 1.0557, 'grad_norm': 22.779315948486328, 'learning_rate': 2.896155456223163e-07, 'fcm_dpo/beta': 0.0018856715178117156, 'fcm_dpo/q_t': 0.3976198434829712, 'fcm_dpo/delta': -0.04471251741051674, 'fcm_dpo/margin': 234.78369140625, 'margin_dpo/margin_mean': 234.78372192382812, 'margin_dpo/margin_std': 277.4552307128906, 'logps/chosen': -422.91015625, 'logps/rejected': -695.4985961914062, 'logps/ref_chosen': -61.685699462890625, 'logps/ref_rejected': -99.49041748046875, 'KL/chosen_KL_mean': -361.2244567871094, 'KL/rejected_KL_mean': -596.0081787109375, 'KL/mean': -478.6163024902344, 'KL/std': 251.1361083984375, 'logits/chosen': -0.4647873342037201, 'logits/rejected': -0.4615704417228699, 'epoch': 0.51} + 51%|█████ | 345/681 [14:37<14:08, 2.52s/it] 51%|█████ | 346/681 [14:40<14:01, 2.51s/it] {'loss': 1.0682, 'grad_norm': 22.303312301635742, 'learning_rate': 2.883479137196714e-07, 'fcm_dpo/beta': 0.0018714326433837414, 'fcm_dpo/q_t': 0.4019482135772705, 'fcm_dpo/delta': -0.018616080284118652, 'fcm_dpo/margin': 223.2701873779297, 'margin_dpo/margin_mean': 223.27017211914062, 'margin_dpo/margin_std': 262.06787109375, 'logps/chosen': -414.02667236328125, 'logps/rejected': -659.4559326171875, 'logps/ref_chosen': -55.256263732910156, 'logps/ref_rejected': -77.41532135009766, 'KL/chosen_KL_mean': -358.7704162597656, 'KL/rejected_KL_mean': -582.0405883789062, 'KL/mean': -470.405517578125, 'KL/std': 246.00839233398438, 'logits/chosen': -0.42917925119400024, 'logits/rejected': -0.41765835881233215, 'epoch': 0.51} + 51%|█████ | 346/681 [14:40<14:01, 2.51s/it] 51%|█████ | 347/681 [14:42<13:25, 2.41s/it] {'loss': 1.0814, 'grad_norm': 26.2041015625, 'learning_rate': 2.8707927131383614e-07, 'fcm_dpo/beta': 0.001866616541519761, 'fcm_dpo/q_t': 0.4048117995262146, 'fcm_dpo/delta': -0.015192577615380287, 'fcm_dpo/margin': 222.09446716308594, 'margin_dpo/margin_mean': 222.094482421875, 'margin_dpo/margin_std': 288.15325927734375, 'logps/chosen': -422.106689453125, 'logps/rejected': -678.989990234375, 'logps/ref_chosen': -57.56623840332031, 'logps/ref_rejected': -92.35509490966797, 'KL/chosen_KL_mean': -364.54046630859375, 'KL/rejected_KL_mean': -586.6348876953125, 'KL/mean': -475.5876770019531, 'KL/std': 260.4476318359375, 'logits/chosen': -0.4826762080192566, 'logits/rejected': -0.47840413451194763, 'epoch': 0.51} + 51%|█████ | 347/681 [14:42<13:25, 2.41s/it] 51%|█████ | 348/681 [14:44<13:12, 2.38s/it] {'loss': 1.1255, 'grad_norm': 27.389596939086914, 'learning_rate': 2.858096518347179e-07, 'fcm_dpo/beta': 0.0018670517019927502, 'fcm_dpo/q_t': 0.41940367221832275, 'fcm_dpo/delta': 0.05069158226251602, 'fcm_dpo/margin': 187.81101989746094, 'margin_dpo/margin_mean': 187.81101989746094, 'margin_dpo/margin_std': 261.6010437011719, 'logps/chosen': -384.727294921875, 'logps/rejected': -605.3590087890625, 'logps/ref_chosen': -56.31770324707031, 'logps/ref_rejected': -89.13836669921875, 'KL/chosen_KL_mean': -328.40960693359375, 'KL/rejected_KL_mean': -516.2205810546875, 'KL/mean': -422.31512451171875, 'KL/std': 226.09664916992188, 'logits/chosen': -0.47834187746047974, 'logits/rejected': -0.47976285219192505, 'epoch': 0.51} + 51%|█████ | 348/681 [14:44<13:12, 2.38s/it] 51%|█████ | 349/681 [14:47<13:42, 2.48s/it] {'loss': 1.0928, 'grad_norm': 21.567838668823242, 'learning_rate': 2.845390887379706e-07, 'fcm_dpo/beta': 0.0018880900461226702, 'fcm_dpo/q_t': 0.4071810245513916, 'fcm_dpo/delta': -0.011443812400102615, 'fcm_dpo/margin': 217.50335693359375, 'margin_dpo/margin_mean': 217.50335693359375, 'margin_dpo/margin_std': 300.6847839355469, 'logps/chosen': -367.06610107421875, 'logps/rejected': -624.0491333007812, 'logps/ref_chosen': -58.025516510009766, 'logps/ref_rejected': -97.50515747070312, 'KL/chosen_KL_mean': -309.04058837890625, 'KL/rejected_KL_mean': -526.5439453125, 'KL/mean': -417.7922668457031, 'KL/std': 246.7399139404297, 'logits/chosen': -0.48450082540512085, 'logits/rejected': -0.4876035153865814, 'epoch': 0.51} + 51%|█████ | 349/681 [14:47<13:42, 2.48s/it] 51%|█████▏ | 350/681 [14:50<14:06, 2.56s/it] {'loss': 1.0998, 'grad_norm': 34.51575469970703, 'learning_rate': 2.8326761550411346e-07, 'fcm_dpo/beta': 0.0018717560451477766, 'fcm_dpo/q_t': 0.40805143117904663, 'fcm_dpo/delta': 0.00548534095287323, 'fcm_dpo/margin': 210.67298889160156, 'margin_dpo/margin_mean': 210.67300415039062, 'margin_dpo/margin_std': 285.7344055175781, 'logps/chosen': -390.2320861816406, 'logps/rejected': -626.4462890625, 'logps/ref_chosen': -64.33049011230469, 'logps/ref_rejected': -89.87164306640625, 'KL/chosen_KL_mean': -325.901611328125, 'KL/rejected_KL_mean': -536.5746459960938, 'KL/mean': -431.23809814453125, 'KL/std': 234.06008911132812, 'logits/chosen': -0.5076397657394409, 'logits/rejected': -0.510471761226654, 'epoch': 0.51} + 51%|█████▏ | 350/681 [14:50<14:06, 2.56s/it] 52%|█████▏ | 351/681 [14:52<14:07, 2.57s/it] {'loss': 1.0536, 'grad_norm': 33.712581634521484, 'learning_rate': 2.819952656376487e-07, 'fcm_dpo/beta': 0.0018583099590614438, 'fcm_dpo/q_t': 0.39529091119766235, 'fcm_dpo/delta': -0.06623544543981552, 'fcm_dpo/margin': 249.109375, 'margin_dpo/margin_mean': 249.109375, 'margin_dpo/margin_std': 312.37847900390625, 'logps/chosen': -366.968017578125, 'logps/rejected': -656.970703125, 'logps/ref_chosen': -60.6721305847168, 'logps/ref_rejected': -101.5654296875, 'KL/chosen_KL_mean': -306.2958984375, 'KL/rejected_KL_mean': -555.4052734375, 'KL/mean': -430.8505859375, 'KL/std': 270.62652587890625, 'logits/chosen': -0.5161840915679932, 'logits/rejected': -0.5157532095909119, 'epoch': 0.52} + 52%|█████▏ | 351/681 [14:52<14:07, 2.57s/it] 52%|█████▏ | 352/681 [14:55<14:06, 2.57s/it] {'loss': 1.1643, 'grad_norm': 34.96100997924805, 'learning_rate': 2.8072207266617854e-07, 'fcm_dpo/beta': 0.0018763558473438025, 'fcm_dpo/q_t': 0.429553359746933, 'fcm_dpo/delta': 0.09677629917860031, 'fcm_dpo/margin': 163.22299194335938, 'margin_dpo/margin_mean': 163.22300720214844, 'margin_dpo/margin_std': 265.39306640625, 'logps/chosen': -424.18267822265625, 'logps/rejected': -593.104248046875, 'logps/ref_chosen': -70.9434585571289, 'logps/ref_rejected': -76.6419677734375, 'KL/chosen_KL_mean': -353.2392578125, 'KL/rejected_KL_mean': -516.4622802734375, 'KL/mean': -434.8507385253906, 'KL/std': 244.72021484375, 'logits/chosen': -0.470248281955719, 'logits/rejected': -0.4367384910583496, 'epoch': 0.52} + 52%|█████▏ | 352/681 [14:55<14:06, 2.57s/it] 52%|█████▏ | 353/681 [14:57<13:59, 2.56s/it] {'loss': 1.0991, 'grad_norm': 26.08613395690918, 'learning_rate': 2.794480701395219e-07, 'fcm_dpo/beta': 0.0018868569750338793, 'fcm_dpo/q_t': 0.4107271432876587, 'fcm_dpo/delta': 0.01154874637722969, 'fcm_dpo/margin': 205.90402221679688, 'margin_dpo/margin_mean': 205.90402221679688, 'margin_dpo/margin_std': 272.3714294433594, 'logps/chosen': -380.8548583984375, 'logps/rejected': -608.6990966796875, 'logps/ref_chosen': -58.39533996582031, 'logps/ref_rejected': -80.33553314208984, 'KL/chosen_KL_mean': -322.45953369140625, 'KL/rejected_KL_mean': -528.363525390625, 'KL/mean': -425.41156005859375, 'KL/std': 244.48275756835938, 'logits/chosen': -0.5393311977386475, 'logits/rejected': -0.5296196937561035, 'epoch': 0.52} + 52%|█████▏ | 353/681 [14:57<13:59, 2.56s/it] 52%|█████▏ | 354/681 [15:00<14:08, 2.59s/it] {'loss': 1.0442, 'grad_norm': 40.203250885009766, 'learning_rate': 2.781732916288303e-07, 'fcm_dpo/beta': 0.0018875201931223273, 'fcm_dpo/q_t': 0.39921072125434875, 'fcm_dpo/delta': -0.02746821939945221, 'fcm_dpo/margin': 225.830322265625, 'margin_dpo/margin_mean': 225.83033752441406, 'margin_dpo/margin_std': 216.87001037597656, 'logps/chosen': -327.97381591796875, 'logps/rejected': -582.7586669921875, 'logps/ref_chosen': -59.80299377441406, 'logps/ref_rejected': -88.75750732421875, 'KL/chosen_KL_mean': -268.17083740234375, 'KL/rejected_KL_mean': -494.00115966796875, 'KL/mean': -381.08599853515625, 'KL/std': 217.59808349609375, 'logits/chosen': -0.49349963665008545, 'logits/rejected': -0.4861293137073517, 'epoch': 0.52} + 52%|█████▏ | 354/681 [15:00<14:08, 2.59s/it] 52%|█████▏ | 355/681 [15:02<13:46, 2.54s/it] {'loss': 1.0634, 'grad_norm': 44.118072509765625, 'learning_rate': 2.7689777072570284e-07, 'fcm_dpo/beta': 0.0018801202531903982, 'fcm_dpo/q_t': 0.40371406078338623, 'fcm_dpo/delta': -0.00877285934984684, 'fcm_dpo/margin': 217.17283630371094, 'margin_dpo/margin_mean': 217.17282104492188, 'margin_dpo/margin_std': 228.13082885742188, 'logps/chosen': -329.806396484375, 'logps/rejected': -575.2567749023438, 'logps/ref_chosen': -54.12849807739258, 'logps/ref_rejected': -82.40606689453125, 'KL/chosen_KL_mean': -275.6778564453125, 'KL/rejected_KL_mean': -492.8507080078125, 'KL/mean': -384.2642822265625, 'KL/std': 227.14535522460938, 'logits/chosen': -0.5707640647888184, 'logits/rejected': -0.5644550323486328, 'epoch': 0.52} + 52%|█████▏ | 355/681 [15:02<13:46, 2.54s/it] 52%|█████▏ | 356/681 [15:05<14:03, 2.60s/it] {'loss': 1.2434, 'grad_norm': 34.34591293334961, 'learning_rate': 2.7562154104130176e-07, 'fcm_dpo/beta': 0.0018893997184932232, 'fcm_dpo/q_t': 0.44711410999298096, 'fcm_dpo/delta': 0.03272160887718201, 'fcm_dpo/margin': 123.2698974609375, 'margin_dpo/margin_mean': 123.2698974609375, 'margin_dpo/margin_std': 296.36102294921875, 'logps/chosen': -405.62225341796875, 'logps/rejected': -540.11767578125, 'logps/ref_chosen': -64.6738052368164, 'logps/ref_rejected': -75.89926147460938, 'KL/chosen_KL_mean': -340.9484558105469, 'KL/rejected_KL_mean': -464.2183837890625, 'KL/mean': -402.58343505859375, 'KL/std': 240.2225341796875, 'logits/chosen': -0.47087016701698303, 'logits/rejected': -0.4466116726398468, 'epoch': 0.52} + 52%|█████▏ | 356/681 [15:05<14:03, 2.60s/it] 52%|█████▏ | 357/681 [15:08<14:04, 2.61s/it] {'loss': 1.0905, 'grad_norm': 30.455440521240234, 'learning_rate': 2.7434463620546594e-07, 'fcm_dpo/beta': 0.0018900984432548285, 'fcm_dpo/q_t': 0.40922728180885315, 'fcm_dpo/delta': 0.012749601155519485, 'fcm_dpo/margin': 204.93594360351562, 'margin_dpo/margin_mean': 204.93594360351562, 'margin_dpo/margin_std': 249.49462890625, 'logps/chosen': -362.5935974121094, 'logps/rejected': -601.6448974609375, 'logps/ref_chosen': -52.725799560546875, 'logps/ref_rejected': -86.84115600585938, 'KL/chosen_KL_mean': -309.8677978515625, 'KL/rejected_KL_mean': -514.8037109375, 'KL/mean': -412.3357849121094, 'KL/std': 240.03021240234375, 'logits/chosen': -0.5195204019546509, 'logits/rejected': -0.5092127323150635, 'epoch': 0.52} + 52%|█████▏ | 357/681 [15:08<14:04, 2.61s/it] 53%|█████▎ | 358/681 [15:11<14:18, 2.66s/it] {'loss': 1.1049, 'grad_norm': 24.4883975982666, 'learning_rate': 2.730670898658255e-07, 'fcm_dpo/beta': 0.0019095418974757195, 'fcm_dpo/q_t': 0.4149158000946045, 'fcm_dpo/delta': 0.032218970358371735, 'fcm_dpo/margin': 193.22451782226562, 'margin_dpo/margin_mean': 193.22451782226562, 'margin_dpo/margin_std': 251.6090087890625, 'logps/chosen': -346.0395812988281, 'logps/rejected': -564.4319458007812, 'logps/ref_chosen': -63.20543670654297, 'logps/ref_rejected': -88.373291015625, 'KL/chosen_KL_mean': -282.8341369628906, 'KL/rejected_KL_mean': -476.05865478515625, 'KL/mean': -379.4464111328125, 'KL/std': 233.26222229003906, 'logits/chosen': -0.5249518156051636, 'logits/rejected': -0.5085197687149048, 'epoch': 0.53} + 53%|█████▎ | 358/681 [15:11<14:18, 2.66s/it] 53%|█████▎ | 359/681 [15:13<14:06, 2.63s/it] {'loss': 1.0712, 'grad_norm': 27.346670150756836, 'learning_rate': 2.717889356869146e-07, 'fcm_dpo/beta': 0.0019002794288098812, 'fcm_dpo/q_t': 0.4016547203063965, 'fcm_dpo/delta': -0.03259321302175522, 'fcm_dpo/margin': 226.85186767578125, 'margin_dpo/margin_mean': 226.85186767578125, 'margin_dpo/margin_std': 285.25946044921875, 'logps/chosen': -376.261962890625, 'logps/rejected': -628.9173583984375, 'logps/ref_chosen': -56.370216369628906, 'logps/ref_rejected': -82.17375183105469, 'KL/chosen_KL_mean': -319.8917541503906, 'KL/rejected_KL_mean': -546.74365234375, 'KL/mean': -433.31768798828125, 'KL/std': 235.4507293701172, 'logits/chosen': -0.4491726756095886, 'logits/rejected': -0.43380558490753174, 'epoch': 0.53} + 53%|█████▎ | 359/681 [15:13<14:06, 2.63s/it] 53%|█████▎ | 360/681 [15:16<13:51, 2.59s/it] {'loss': 1.1224, 'grad_norm': 37.464805603027344, 'learning_rate': 2.7051020734928443e-07, 'fcm_dpo/beta': 0.0019250914920121431, 'fcm_dpo/q_t': 0.4223863184452057, 'fcm_dpo/delta': 0.07284688949584961, 'fcm_dpo/margin': 171.06541442871094, 'margin_dpo/margin_mean': 171.06541442871094, 'margin_dpo/margin_std': 206.2929229736328, 'logps/chosen': -374.1743469238281, 'logps/rejected': -563.6182861328125, 'logps/ref_chosen': -51.460384368896484, 'logps/ref_rejected': -69.83892059326172, 'KL/chosen_KL_mean': -322.7139587402344, 'KL/rejected_KL_mean': -493.77935791015625, 'KL/mean': -408.2466735839844, 'KL/std': 200.92642211914062, 'logits/chosen': -0.43620049953460693, 'logits/rejected': -0.4205591678619385, 'epoch': 0.53} + 53%|█████▎ | 360/681 [15:16<13:51, 2.59s/it] 53%|█████▎ | 361/681 [15:18<14:00, 2.63s/it] {'loss': 1.1377, 'grad_norm': 30.787109375, 'learning_rate': 2.6923093854861593e-07, 'fcm_dpo/beta': 0.0019522447837516665, 'fcm_dpo/q_t': 0.4221458435058594, 'fcm_dpo/delta': 0.06582384556531906, 'fcm_dpo/margin': 172.15640258789062, 'margin_dpo/margin_mean': 172.15640258789062, 'margin_dpo/margin_std': 255.71853637695312, 'logps/chosen': -391.72393798828125, 'logps/rejected': -600.7801513671875, 'logps/ref_chosen': -53.86951446533203, 'logps/ref_rejected': -90.7692642211914, 'KL/chosen_KL_mean': -337.85443115234375, 'KL/rejected_KL_mean': -510.0108337402344, 'KL/mean': -423.9326477050781, 'KL/std': 233.32354736328125, 'logits/chosen': -0.474801242351532, 'logits/rejected': -0.47210389375686646, 'epoch': 0.53} + 53%|█████▎ | 361/681 [15:18<14:00, 2.63s/it] 53%|█████▎ | 362/681 [15:21<13:46, 2.59s/it] {'loss': 0.9921, 'grad_norm': 25.072677612304688, 'learning_rate': 2.679511629948319e-07, 'fcm_dpo/beta': 0.0019115547183901072, 'fcm_dpo/q_t': 0.3806150555610657, 'fcm_dpo/delta': -0.12366791069507599, 'fcm_dpo/margin': 270.275146484375, 'margin_dpo/margin_mean': 270.275146484375, 'margin_dpo/margin_std': 262.97430419921875, 'logps/chosen': -351.6600341796875, 'logps/rejected': -668.8780517578125, 'logps/ref_chosen': -58.639060974121094, 'logps/ref_rejected': -105.58195495605469, 'KL/chosen_KL_mean': -293.02099609375, 'KL/rejected_KL_mean': -563.296142578125, 'KL/mean': -428.1585693359375, 'KL/std': 246.54940795898438, 'logits/chosen': -0.461614191532135, 'logits/rejected': -0.471387654542923, 'epoch': 0.53} + 53%|█████▎ | 362/681 [15:21<13:46, 2.59s/it] 53%|█████▎ | 363/681 [15:23<13:16, 2.50s/it] {'loss': 0.9901, 'grad_norm': 25.539962768554688, 'learning_rate': 2.6667091441120816e-07, 'fcm_dpo/beta': 0.0018741564126685262, 'fcm_dpo/q_t': 0.37928664684295654, 'fcm_dpo/delta': -0.12923146784305573, 'fcm_dpo/margin': 278.8757629394531, 'margin_dpo/margin_mean': 278.87579345703125, 'margin_dpo/margin_std': 272.97113037109375, 'logps/chosen': -314.65380859375, 'logps/rejected': -623.6661376953125, 'logps/ref_chosen': -44.558380126953125, 'logps/ref_rejected': -74.69496154785156, 'KL/chosen_KL_mean': -270.0954284667969, 'KL/rejected_KL_mean': -548.97119140625, 'KL/mean': -409.5333251953125, 'KL/std': 249.90145874023438, 'logits/chosen': -0.45062029361724854, 'logits/rejected': -0.445356547832489, 'epoch': 0.53} + 53%|█████▎ | 363/681 [15:23<13:16, 2.50s/it] 53%|█████▎ | 364/681 [15:26<13:20, 2.53s/it] {'loss': 1.1024, 'grad_norm': 24.92216682434082, 'learning_rate': 2.6539022653348575e-07, 'fcm_dpo/beta': 0.0018688710406422615, 'fcm_dpo/q_t': 0.4103432595729828, 'fcm_dpo/delta': 0.013297256082296371, 'fcm_dpo/margin': 206.97998046875, 'margin_dpo/margin_mean': 206.97999572753906, 'margin_dpo/margin_std': 280.64617919921875, 'logps/chosen': -350.4951171875, 'logps/rejected': -599.9762573242188, 'logps/ref_chosen': -48.894622802734375, 'logps/ref_rejected': -91.395751953125, 'KL/chosen_KL_mean': -301.60052490234375, 'KL/rejected_KL_mean': -508.58050537109375, 'KL/mean': -405.09051513671875, 'KL/std': 244.21636962890625, 'logits/chosen': -0.49542540311813354, 'logits/rejected': -0.5060637593269348, 'epoch': 0.53} + 53%|█████▎ | 364/681 [15:26<13:20, 2.53s/it] 54%|█████▎ | 365/681 [15:28<13:26, 2.55s/it] {'loss': 1.0716, 'grad_norm': 25.717348098754883, 'learning_rate': 2.641091331089811e-07, 'fcm_dpo/beta': 0.0018601326737552881, 'fcm_dpo/q_t': 0.4056779444217682, 'fcm_dpo/delta': -0.011300592683255672, 'fcm_dpo/margin': 220.8641815185547, 'margin_dpo/margin_mean': 220.8641815185547, 'margin_dpo/margin_std': 261.02288818359375, 'logps/chosen': -336.53826904296875, 'logps/rejected': -598.6113891601562, 'logps/ref_chosen': -51.49274444580078, 'logps/ref_rejected': -92.70166778564453, 'KL/chosen_KL_mean': -285.0455322265625, 'KL/rejected_KL_mean': -505.90972900390625, 'KL/mean': -395.47760009765625, 'KL/std': 253.05226135253906, 'logits/chosen': -0.44014662504196167, 'logits/rejected': -0.45165252685546875, 'epoch': 0.54} + 54%|█████▎ | 365/681 [15:28<13:26, 2.55s/it] 54%|█████▎ | 366/681 [15:31<13:25, 2.56s/it] {'loss': 1.0809, 'grad_norm': 22.693359375, 'learning_rate': 2.6282766789569736e-07, 'fcm_dpo/beta': 0.0018460990395396948, 'fcm_dpo/q_t': 0.40484321117401123, 'fcm_dpo/delta': -0.015634853392839432, 'fcm_dpo/margin': 224.57406616210938, 'margin_dpo/margin_mean': 224.57406616210938, 'margin_dpo/margin_std': 287.28973388671875, 'logps/chosen': -311.67181396484375, 'logps/rejected': -574.835693359375, 'logps/ref_chosen': -44.7205696105957, 'logps/ref_rejected': -83.31040954589844, 'KL/chosen_KL_mean': -266.95123291015625, 'KL/rejected_KL_mean': -491.52532958984375, 'KL/mean': -379.23828125, 'KL/std': 242.17172241210938, 'logits/chosen': -0.4734732210636139, 'logits/rejected': -0.4902943968772888, 'epoch': 0.54} + 54%|█████▎ | 366/681 [15:31<13:25, 2.56s/it] 54%|█████▍ | 367/681 [15:34<13:41, 2.62s/it] {'loss': 1.1149, 'grad_norm': 22.13631820678711, 'learning_rate': 2.615458646614349e-07, 'fcm_dpo/beta': 0.0018712931778281927, 'fcm_dpo/q_t': 0.4180119037628174, 'fcm_dpo/delta': 0.05387837439775467, 'fcm_dpo/margin': 185.85562133789062, 'margin_dpo/margin_mean': 185.85562133789062, 'margin_dpo/margin_std': 234.53326416015625, 'logps/chosen': -337.56787109375, 'logps/rejected': -541.7694091796875, 'logps/ref_chosen': -58.405418395996094, 'logps/ref_rejected': -76.75132751464844, 'KL/chosen_KL_mean': -279.1624450683594, 'KL/rejected_KL_mean': -465.01806640625, 'KL/mean': -372.0902404785156, 'KL/std': 217.56224060058594, 'logits/chosen': -0.47421082854270935, 'logits/rejected': -0.45852982997894287, 'epoch': 0.54} + 54%|█████▍ | 367/681 [15:34<13:41, 2.62s/it] 54%|█████▍ | 368/681 [15:36<13:37, 2.61s/it] {'loss': 0.9603, 'grad_norm': 36.742767333984375, 'learning_rate': 2.6026375718290083e-07, 'fcm_dpo/beta': 0.0018334980122745037, 'fcm_dpo/q_t': 0.37282127141952515, 'fcm_dpo/delta': -0.14245912432670593, 'fcm_dpo/margin': 291.716552734375, 'margin_dpo/margin_mean': 291.7165222167969, 'margin_dpo/margin_std': 222.715087890625, 'logps/chosen': -305.1435546875, 'logps/rejected': -650.9627685546875, 'logps/ref_chosen': -44.452518463134766, 'logps/ref_rejected': -98.55526733398438, 'KL/chosen_KL_mean': -260.6910400390625, 'KL/rejected_KL_mean': -552.4075317382812, 'KL/mean': -406.54931640625, 'KL/std': 245.8842010498047, 'logits/chosen': -0.48603498935699463, 'logits/rejected': -0.4929526448249817, 'epoch': 0.54} + 54%|█████▍ | 368/681 [15:36<13:37, 2.61s/it] 54%|█████▍ | 369/681 [15:39<13:40, 2.63s/it] {'loss': 1.1922, 'grad_norm': 26.54815673828125, 'learning_rate': 2.589813792448196e-07, 'fcm_dpo/beta': 0.0018251673318445683, 'fcm_dpo/q_t': 0.43544578552246094, 'fcm_dpo/delta': 0.02955966256558895, 'fcm_dpo/margin': 150.8870086669922, 'margin_dpo/margin_mean': 150.88702392578125, 'margin_dpo/margin_std': 279.23529052734375, 'logps/chosen': -424.5224609375, 'logps/rejected': -595.32373046875, 'logps/ref_chosen': -71.38150024414062, 'logps/ref_rejected': -91.29582214355469, 'KL/chosen_KL_mean': -353.1409606933594, 'KL/rejected_KL_mean': -504.0279235839844, 'KL/mean': -428.58447265625, 'KL/std': 253.20993041992188, 'logits/chosen': -0.49702537059783936, 'logits/rejected': -0.47865352034568787, 'epoch': 0.54} + 54%|█████▍ | 369/681 [15:39<13:40, 2.63s/it] 54%|█████▍ | 370/681 [15:42<13:46, 2.66s/it] {'loss': 1.1989, 'grad_norm': 31.06038475036621, 'learning_rate': 2.5769876463904263e-07, 'fcm_dpo/beta': 0.0018681371584534645, 'fcm_dpo/q_t': 0.43673175573349, 'fcm_dpo/delta': 0.1253683865070343, 'fcm_dpo/margin': 148.7425537109375, 'margin_dpo/margin_mean': 148.74256896972656, 'margin_dpo/margin_std': 292.08331298828125, 'logps/chosen': -441.69873046875, 'logps/rejected': -616.093505859375, 'logps/ref_chosen': -71.60749816894531, 'logps/ref_rejected': -97.25978088378906, 'KL/chosen_KL_mean': -370.0912170410156, 'KL/rejected_KL_mean': -518.833740234375, 'KL/mean': -444.4624938964844, 'KL/std': 263.0101318359375, 'logits/chosen': -0.5078903436660767, 'logits/rejected': -0.5006515979766846, 'epoch': 0.54} + 54%|█████▍ | 370/681 [15:42<13:46, 2.66s/it] 54%|█████▍ | 371/681 [15:44<13:46, 2.67s/it] {'loss': 1.0989, 'grad_norm': 33.49449920654297, 'learning_rate': 2.5641594716365744e-07, 'fcm_dpo/beta': 0.0018815153744071722, 'fcm_dpo/q_t': 0.40850624442100525, 'fcm_dpo/delta': -0.0019350722432136536, 'fcm_dpo/margin': 213.51040649414062, 'margin_dpo/margin_mean': 213.51040649414062, 'margin_dpo/margin_std': 299.0671081542969, 'logps/chosen': -427.32537841796875, 'logps/rejected': -670.593505859375, 'logps/ref_chosen': -69.41448974609375, 'logps/ref_rejected': -99.17217254638672, 'KL/chosen_KL_mean': -357.910888671875, 'KL/rejected_KL_mean': -571.4212646484375, 'KL/mean': -464.66607666015625, 'KL/std': 263.6308288574219, 'logits/chosen': -0.557892918586731, 'logits/rejected': -0.5437754392623901, 'epoch': 0.54} + 54%|█████▍ | 371/681 [15:44<13:46, 2.67s/it] 55%|█████▍ | 372/681 [15:47<13:07, 2.55s/it] {'loss': 1.0575, 'grad_norm': 28.089242935180664, 'learning_rate': 2.551329606220976e-07, 'fcm_dpo/beta': 0.0018529519438743591, 'fcm_dpo/q_t': 0.395152747631073, 'fcm_dpo/delta': -0.07631818950176239, 'fcm_dpo/margin': 255.05174255371094, 'margin_dpo/margin_mean': 255.0517578125, 'margin_dpo/margin_std': 344.2037353515625, 'logps/chosen': -414.96771240234375, 'logps/rejected': -686.740966796875, 'logps/ref_chosen': -61.8179931640625, 'logps/ref_rejected': -78.53948974609375, 'KL/chosen_KL_mean': -353.14971923828125, 'KL/rejected_KL_mean': -608.201416015625, 'KL/mean': -480.6755676269531, 'KL/std': 304.527099609375, 'logits/chosen': -0.5017907619476318, 'logits/rejected': -0.4822356402873993, 'epoch': 0.55} + 55%|█████▍ | 372/681 [15:47<13:07, 2.55s/it] 55%|█████▍ | 373/681 [15:49<13:00, 2.53s/it] {'loss': 1.0544, 'grad_norm': 25.535263061523438, 'learning_rate': 2.538498388222517e-07, 'fcm_dpo/beta': 0.001842833822593093, 'fcm_dpo/q_t': 0.39565837383270264, 'fcm_dpo/delta': -0.0538918599486351, 'fcm_dpo/margin': 244.8360137939453, 'margin_dpo/margin_mean': 244.83599853515625, 'margin_dpo/margin_std': 285.15032958984375, 'logps/chosen': -445.0198974609375, 'logps/rejected': -711.598388671875, 'logps/ref_chosen': -64.21713256835938, 'logps/ref_rejected': -85.95960998535156, 'KL/chosen_KL_mean': -380.80279541015625, 'KL/rejected_KL_mean': -625.6387939453125, 'KL/mean': -503.22076416015625, 'KL/std': 292.56231689453125, 'logits/chosen': -0.4881356954574585, 'logits/rejected': -0.46633967757225037, 'epoch': 0.55} + 55%|█████▍ | 373/681 [15:49<13:00, 2.53s/it] 55%|█████▍ | 374/681 [15:52<13:18, 2.60s/it] {'loss': 1.1193, 'grad_norm': 27.090904235839844, 'learning_rate': 2.525666155755725e-07, 'fcm_dpo/beta': 0.0018177898600697517, 'fcm_dpo/q_t': 0.4120628535747528, 'fcm_dpo/delta': -0.0016709566116333008, 'fcm_dpo/margin': 220.6092529296875, 'margin_dpo/margin_mean': 220.60926818847656, 'margin_dpo/margin_std': 354.3055419921875, 'logps/chosen': -415.596923828125, 'logps/rejected': -659.1961669921875, 'logps/ref_chosen': -70.65018463134766, 'logps/ref_rejected': -93.64016723632812, 'KL/chosen_KL_mean': -344.94671630859375, 'KL/rejected_KL_mean': -565.5560302734375, 'KL/mean': -455.2513427734375, 'KL/std': 316.259765625, 'logits/chosen': -0.5761805772781372, 'logits/rejected': -0.5577331781387329, 'epoch': 0.55} + 55%|█████▍ | 374/681 [15:52<13:18, 2.60s/it] 55%|█████▌ | 375/681 [15:55<13:25, 2.63s/it] {'loss': 1.1045, 'grad_norm': 36.25385665893555, 'learning_rate': 2.512833246961859e-07, 'fcm_dpo/beta': 0.00181809242349118, 'fcm_dpo/q_t': 0.4092579185962677, 'fcm_dpo/delta': 0.005695123225450516, 'fcm_dpo/margin': 216.62973022460938, 'margin_dpo/margin_mean': 216.62973022460938, 'margin_dpo/margin_std': 301.60943603515625, 'logps/chosen': -409.48876953125, 'logps/rejected': -654.9766235351562, 'logps/ref_chosen': -60.080223083496094, 'logps/ref_rejected': -88.93830871582031, 'KL/chosen_KL_mean': -349.4085693359375, 'KL/rejected_KL_mean': -566.038330078125, 'KL/mean': -457.72344970703125, 'KL/std': 255.31455993652344, 'logits/chosen': -0.5407253503799438, 'logits/rejected': -0.5411194562911987, 'epoch': 0.55} + 55%|█████▌ | 375/681 [15:55<13:25, 2.63s/it] 55%|█████▌ | 376/681 [15:57<13:27, 2.65s/it] {'loss': 1.0426, 'grad_norm': 23.30925750732422, 'learning_rate': 2.5e-07, 'fcm_dpo/beta': 0.001806009327992797, 'fcm_dpo/q_t': 0.3911965489387512, 'fcm_dpo/delta': -0.0772535428404808, 'fcm_dpo/margin': 262.1507568359375, 'margin_dpo/margin_mean': 262.1507568359375, 'margin_dpo/margin_std': 314.1611328125, 'logps/chosen': -405.50262451171875, 'logps/rejected': -710.5196533203125, 'logps/ref_chosen': -62.660308837890625, 'logps/ref_rejected': -105.52660369873047, 'KL/chosen_KL_mean': -342.84234619140625, 'KL/rejected_KL_mean': -604.9931030273438, 'KL/mean': -473.917724609375, 'KL/std': 271.80767822265625, 'logits/chosen': -0.5112703442573547, 'logits/rejected': -0.5029022693634033, 'epoch': 0.55} + 55%|█████▌ | 376/681 [15:57<13:27, 2.65s/it] 55%|█████▌ | 377/681 [16:00<13:11, 2.60s/it] {'loss': 1.0577, 'grad_norm': 29.571670532226562, 'learning_rate': 2.487166753038141e-07, 'fcm_dpo/beta': 0.0017937154043465853, 'fcm_dpo/q_t': 0.3983774781227112, 'fcm_dpo/delta': -0.043446458876132965, 'fcm_dpo/margin': 246.15281677246094, 'margin_dpo/margin_mean': 246.15280151367188, 'margin_dpo/margin_std': 293.776123046875, 'logps/chosen': -398.8645324707031, 'logps/rejected': -689.241943359375, 'logps/ref_chosen': -54.478736877441406, 'logps/ref_rejected': -98.70335388183594, 'KL/chosen_KL_mean': -344.38580322265625, 'KL/rejected_KL_mean': -590.5386352539062, 'KL/mean': -467.46221923828125, 'KL/std': 280.3377685546875, 'logits/chosen': -0.4719467759132385, 'logits/rejected': -0.47322678565979004, 'epoch': 0.55} + 55%|█████▌ | 377/681 [16:00<13:11, 2.60s/it] 56%|█████▌ | 378/681 [16:02<12:28, 2.47s/it] {'loss': 1.0324, 'grad_norm': 30.750337600708008, 'learning_rate': 2.4743338442442754e-07, 'fcm_dpo/beta': 0.0017670897068455815, 'fcm_dpo/q_t': 0.3910645842552185, 'fcm_dpo/delta': -0.0676431879401207, 'fcm_dpo/margin': 262.839599609375, 'margin_dpo/margin_mean': 262.8395690917969, 'margin_dpo/margin_std': 280.932861328125, 'logps/chosen': -372.546630859375, 'logps/rejected': -678.41259765625, 'logps/ref_chosen': -45.02053451538086, 'logps/ref_rejected': -88.0469741821289, 'KL/chosen_KL_mean': -327.5260925292969, 'KL/rejected_KL_mean': -590.3656005859375, 'KL/mean': -458.94586181640625, 'KL/std': 256.42523193359375, 'logits/chosen': -0.48483866453170776, 'logits/rejected': -0.505121648311615, 'epoch': 0.56} + 56%|█████▌ | 378/681 [16:02<12:28, 2.47s/it] 56%|█████▌ | 379/681 [16:04<12:23, 2.46s/it] {'loss': 1.0587, 'grad_norm': 28.86778450012207, 'learning_rate': 2.461501611777483e-07, 'fcm_dpo/beta': 0.0017432118766009808, 'fcm_dpo/q_t': 0.39824381470680237, 'fcm_dpo/delta': -0.04486480727791786, 'fcm_dpo/margin': 253.78221130371094, 'margin_dpo/margin_mean': 253.78219604492188, 'margin_dpo/margin_std': 305.1265563964844, 'logps/chosen': -409.1395568847656, 'logps/rejected': -724.0399169921875, 'logps/ref_chosen': -53.182098388671875, 'logps/ref_rejected': -114.3001708984375, 'KL/chosen_KL_mean': -355.95745849609375, 'KL/rejected_KL_mean': -609.73974609375, 'KL/mean': -482.84857177734375, 'KL/std': 260.3935546875, 'logits/chosen': -0.4996095299720764, 'logits/rejected': -0.5224326848983765, 'epoch': 0.56} + 56%|█████▌ | 379/681 [16:04<12:23, 2.46s/it] 56%|█████▌ | 380/681 [16:07<12:09, 2.42s/it] {'loss': 1.027, 'grad_norm': 27.250988006591797, 'learning_rate': 2.4486703937790243e-07, 'fcm_dpo/beta': 0.001726464950479567, 'fcm_dpo/q_t': 0.38729774951934814, 'fcm_dpo/delta': -0.08805520087480545, 'fcm_dpo/margin': 280.27264404296875, 'margin_dpo/margin_mean': 280.27264404296875, 'margin_dpo/margin_std': 311.29791259765625, 'logps/chosen': -392.4548034667969, 'logps/rejected': -725.5660400390625, 'logps/ref_chosen': -51.3530387878418, 'logps/ref_rejected': -104.19169616699219, 'KL/chosen_KL_mean': -341.1017761230469, 'KL/rejected_KL_mean': -621.3743896484375, 'KL/mean': -481.23809814453125, 'KL/std': 289.7486267089844, 'logits/chosen': -0.47446852922439575, 'logits/rejected': -0.5016424655914307, 'epoch': 0.56} + 56%|█████▌ | 380/681 [16:07<12:09, 2.42s/it] 56%|█████▌ | 381/681 [16:09<12:12, 2.44s/it] {'loss': 1.1465, 'grad_norm': 25.71771812438965, 'learning_rate': 2.435840528363426e-07, 'fcm_dpo/beta': 0.001726742135360837, 'fcm_dpo/q_t': 0.4205264449119568, 'fcm_dpo/delta': 0.05530242994427681, 'fcm_dpo/margin': 200.75161743164062, 'margin_dpo/margin_mean': 200.75160217285156, 'margin_dpo/margin_std': 330.10223388671875, 'logps/chosen': -415.838623046875, 'logps/rejected': -638.006591796875, 'logps/ref_chosen': -57.80306625366211, 'logps/ref_rejected': -79.21940612792969, 'KL/chosen_KL_mean': -358.0355529785156, 'KL/rejected_KL_mean': -558.7872314453125, 'KL/mean': -458.411376953125, 'KL/std': 245.9998016357422, 'logits/chosen': -0.45564746856689453, 'logits/rejected': -0.43560299277305603, 'epoch': 0.56} + 56%|█████▌ | 381/681 [16:09<12:12, 2.44s/it] 56%|█████▌ | 382/681 [16:12<12:23, 2.49s/it] {'loss': 1.0551, 'grad_norm': 29.356277465820312, 'learning_rate': 2.4230123536095745e-07, 'fcm_dpo/beta': 0.0017278792802244425, 'fcm_dpo/q_t': 0.4007778763771057, 'fcm_dpo/delta': -0.024810172617435455, 'fcm_dpo/margin': 245.24298095703125, 'margin_dpo/margin_mean': 245.24298095703125, 'margin_dpo/margin_std': 262.1793518066406, 'logps/chosen': -405.11962890625, 'logps/rejected': -695.052490234375, 'logps/ref_chosen': -66.02030181884766, 'logps/ref_rejected': -110.71016693115234, 'KL/chosen_KL_mean': -339.0993347167969, 'KL/rejected_KL_mean': -584.34228515625, 'KL/mean': -461.7208557128906, 'KL/std': 235.19894409179688, 'logits/chosen': -0.4832066297531128, 'logits/rejected': -0.4879419803619385, 'epoch': 0.56} + 56%|█████▌ | 382/681 [16:12<12:23, 2.49s/it] 56%|█████▌ | 383/681 [16:14<12:29, 2.51s/it] {'loss': 1.0874, 'grad_norm': 25.48250961303711, 'learning_rate': 2.4101862075518037e-07, 'fcm_dpo/beta': 0.0017200370784848928, 'fcm_dpo/q_t': 0.4029914140701294, 'fcm_dpo/delta': -0.022270262241363525, 'fcm_dpo/margin': 244.95123291015625, 'margin_dpo/margin_mean': 244.95123291015625, 'margin_dpo/margin_std': 338.13385009765625, 'logps/chosen': -396.48223876953125, 'logps/rejected': -684.7578735351562, 'logps/ref_chosen': -50.39148712158203, 'logps/ref_rejected': -93.71589660644531, 'KL/chosen_KL_mean': -346.09075927734375, 'KL/rejected_KL_mean': -591.0419921875, 'KL/mean': -468.5663757324219, 'KL/std': 261.1944274902344, 'logits/chosen': -0.46931326389312744, 'logits/rejected': -0.47857019305229187, 'epoch': 0.56} + 56%|█████▌ | 383/681 [16:14<12:29, 2.51s/it] 56%|█████▋ | 384/681 [16:17<12:31, 2.53s/it] {'loss': 1.1164, 'grad_norm': 25.28335952758789, 'learning_rate': 2.397362428170992e-07, 'fcm_dpo/beta': 0.001742619788274169, 'fcm_dpo/q_t': 0.4206535220146179, 'fcm_dpo/delta': 0.06796430051326752, 'fcm_dpo/margin': 191.53521728515625, 'margin_dpo/margin_mean': 191.53521728515625, 'margin_dpo/margin_std': 219.35858154296875, 'logps/chosen': -400.5798034667969, 'logps/rejected': -625.829833984375, 'logps/ref_chosen': -52.046104431152344, 'logps/ref_rejected': -85.76089477539062, 'KL/chosen_KL_mean': -348.53369140625, 'KL/rejected_KL_mean': -540.0689697265625, 'KL/mean': -444.30133056640625, 'KL/std': 233.04791259765625, 'logits/chosen': -0.5027947425842285, 'logits/rejected': -0.4957225024700165, 'epoch': 0.56} + 56%|█████▋ | 384/681 [16:17<12:31, 2.53s/it] 57%|█████▋ | 385/681 [16:19<12:24, 2.52s/it] {'loss': 1.0601, 'grad_norm': 30.516326904296875, 'learning_rate': 2.3845413533856514e-07, 'fcm_dpo/beta': 0.0017409389838576317, 'fcm_dpo/q_t': 0.4035521149635315, 'fcm_dpo/delta': -0.006193262059241533, 'fcm_dpo/margin': 233.17413330078125, 'margin_dpo/margin_mean': 233.17413330078125, 'margin_dpo/margin_std': 232.32888793945312, 'logps/chosen': -382.04083251953125, 'logps/rejected': -627.49072265625, 'logps/ref_chosen': -65.55215454101562, 'logps/ref_rejected': -77.82792663574219, 'KL/chosen_KL_mean': -316.4886474609375, 'KL/rejected_KL_mean': -549.6627807617188, 'KL/mean': -433.07574462890625, 'KL/std': 208.34341430664062, 'logits/chosen': -0.5098952054977417, 'logits/rejected': -0.4792426824569702, 'epoch': 0.57} + 57%|█████▋ | 385/681 [16:19<12:24, 2.52s/it] 57%|█████▋ | 386/681 [16:22<12:30, 2.54s/it] {'loss': 1.0541, 'grad_norm': 22.08116912841797, 'learning_rate': 2.3717233210430254e-07, 'fcm_dpo/beta': 0.0017288768431171775, 'fcm_dpo/q_t': 0.3971378803253174, 'fcm_dpo/delta': -0.047393690794706345, 'fcm_dpo/margin': 257.5691833496094, 'margin_dpo/margin_mean': 257.5691833496094, 'margin_dpo/margin_std': 305.404052734375, 'logps/chosen': -390.5174865722656, 'logps/rejected': -682.1922607421875, 'logps/ref_chosen': -58.22185516357422, 'logps/ref_rejected': -92.32742309570312, 'KL/chosen_KL_mean': -332.295654296875, 'KL/rejected_KL_mean': -589.8648071289062, 'KL/mean': -461.0802307128906, 'KL/std': 266.24664306640625, 'logits/chosen': -0.5011097192764282, 'logits/rejected': -0.49594956636428833, 'epoch': 0.57} + 57%|█████▋ | 386/681 [16:22<12:30, 2.54s/it] 57%|█████▋ | 387/681 [16:25<12:46, 2.61s/it] {'loss': 1.1016, 'grad_norm': 29.83814239501953, 'learning_rate': 2.3589086689101889e-07, 'fcm_dpo/beta': 0.0017271433025598526, 'fcm_dpo/q_t': 0.41534334421157837, 'fcm_dpo/delta': 0.042273543775081635, 'fcm_dpo/margin': 207.89810180664062, 'margin_dpo/margin_mean': 207.89810180664062, 'margin_dpo/margin_std': 238.62474060058594, 'logps/chosen': -420.0334167480469, 'logps/rejected': -653.6812744140625, 'logps/ref_chosen': -66.41944885253906, 'logps/ref_rejected': -92.16915893554688, 'KL/chosen_KL_mean': -353.61395263671875, 'KL/rejected_KL_mean': -561.5120849609375, 'KL/mean': -457.56298828125, 'KL/std': 239.767333984375, 'logits/chosen': -0.5749870538711548, 'logits/rejected': -0.5577224493026733, 'epoch': 0.57} + 57%|█████▋ | 387/681 [16:25<12:46, 2.61s/it] 57%|█████▋ | 388/681 [16:27<12:20, 2.53s/it] {'loss': 1.0286, 'grad_norm': 24.234586715698242, 'learning_rate': 2.3460977346651428e-07, 'fcm_dpo/beta': 0.0017118379473686218, 'fcm_dpo/q_t': 0.3895290791988373, 'fcm_dpo/delta': -0.08915528655052185, 'fcm_dpo/margin': 283.0830993652344, 'margin_dpo/margin_mean': 283.08306884765625, 'margin_dpo/margin_std': 323.8463134765625, 'logps/chosen': -392.931884765625, 'logps/rejected': -730.318603515625, 'logps/ref_chosen': -50.129459381103516, 'logps/ref_rejected': -104.43305969238281, 'KL/chosen_KL_mean': -342.80242919921875, 'KL/rejected_KL_mean': -625.8855590820312, 'KL/mean': -484.3439636230469, 'KL/std': 283.77276611328125, 'logits/chosen': -0.4718579649925232, 'logits/rejected': -0.48248615860939026, 'epoch': 0.57} + 57%|█████▋ | 388/681 [16:27<12:20, 2.53s/it] 57%|█████▋ | 389/681 [16:29<12:07, 2.49s/it] {'loss': 1.0864, 'grad_norm': 30.227901458740234, 'learning_rate': 2.3332908558879177e-07, 'fcm_dpo/beta': 0.0017030881717801094, 'fcm_dpo/q_t': 0.40635746717453003, 'fcm_dpo/delta': -0.005919036455452442, 'fcm_dpo/margin': 238.1655731201172, 'margin_dpo/margin_mean': 238.1655731201172, 'margin_dpo/margin_std': 309.348876953125, 'logps/chosen': -444.28411865234375, 'logps/rejected': -702.4576416015625, 'logps/ref_chosen': -57.906593322753906, 'logps/ref_rejected': -77.91454315185547, 'KL/chosen_KL_mean': -386.37750244140625, 'KL/rejected_KL_mean': -624.5430908203125, 'KL/mean': -505.4602966308594, 'KL/std': 282.40252685546875, 'logits/chosen': -0.5446810722351074, 'logits/rejected': -0.5356224775314331, 'epoch': 0.57} + 57%|█████▋ | 389/681 [16:30<12:07, 2.49s/it] 57%|█████▋ | 390/681 [16:32<12:07, 2.50s/it] {'loss': 1.0979, 'grad_norm': 25.220243453979492, 'learning_rate': 2.320488370051681e-07, 'fcm_dpo/beta': 0.0016950219869613647, 'fcm_dpo/q_t': 0.40823304653167725, 'fcm_dpo/delta': -0.014803212136030197, 'fcm_dpo/margin': 244.15020751953125, 'margin_dpo/margin_mean': 244.15020751953125, 'margin_dpo/margin_std': 357.4918212890625, 'logps/chosen': -426.5779113769531, 'logps/rejected': -707.0303955078125, 'logps/ref_chosen': -49.22591781616211, 'logps/ref_rejected': -85.5281982421875, 'KL/chosen_KL_mean': -377.35198974609375, 'KL/rejected_KL_mean': -621.502197265625, 'KL/mean': -499.4270935058594, 'KL/std': 282.41424560546875, 'logits/chosen': -0.4850386083126068, 'logits/rejected': -0.4768223166465759, 'epoch': 0.57} + 57%|█████▋ | 390/681 [16:32<12:07, 2.50s/it] 57%|█████▋ | 391/681 [16:34<12:03, 2.50s/it] {'loss': 1.2128, 'grad_norm': 46.24735641479492, 'learning_rate': 2.3076906145138405e-07, 'fcm_dpo/beta': 0.0017408509738743305, 'fcm_dpo/q_t': 0.4426559805870056, 'fcm_dpo/delta': 0.15400069952011108, 'fcm_dpo/margin': 143.47076416015625, 'margin_dpo/margin_mean': 143.4707794189453, 'margin_dpo/margin_std': 285.8855285644531, 'logps/chosen': -446.9159240722656, 'logps/rejected': -612.7952270507812, 'logps/ref_chosen': -64.32965087890625, 'logps/ref_rejected': -86.73820495605469, 'KL/chosen_KL_mean': -382.5862731933594, 'KL/rejected_KL_mean': -526.0570068359375, 'KL/mean': -454.3216552734375, 'KL/std': 272.00946044921875, 'logits/chosen': -0.5240955352783203, 'logits/rejected': -0.5136964321136475, 'epoch': 0.57} + 57%|█████▋ | 391/681 [16:35<12:03, 2.50s/it] 58%|█████▊ | 392/681 [16:37<12:15, 2.54s/it] {'loss': 1.0105, 'grad_norm': 26.776010513305664, 'learning_rate': 2.294897926507156e-07, 'fcm_dpo/beta': 0.0017360819038003683, 'fcm_dpo/q_t': 0.3868769109249115, 'fcm_dpo/delta': -0.09214547276496887, 'fcm_dpo/margin': 280.925537109375, 'margin_dpo/margin_mean': 280.9255065917969, 'margin_dpo/margin_std': 274.7219543457031, 'logps/chosen': -381.3224182128906, 'logps/rejected': -711.0897827148438, 'logps/ref_chosen': -53.50397872924805, 'logps/ref_rejected': -102.34584045410156, 'KL/chosen_KL_mean': -327.81842041015625, 'KL/rejected_KL_mean': -608.7439575195312, 'KL/mean': -468.28118896484375, 'KL/std': 280.24822998046875, 'logits/chosen': -0.4836753010749817, 'logits/rejected': -0.47832804918289185, 'epoch': 0.58} + 58%|█████▊ | 392/681 [16:37<12:15, 2.54s/it] 58%|█████▊ | 393/681 [16:39<11:57, 2.49s/it] {'loss': 1.1217, 'grad_norm': 22.34610939025879, 'learning_rate': 2.2821106431308543e-07, 'fcm_dpo/beta': 0.001724720699712634, 'fcm_dpo/q_t': 0.4159342050552368, 'fcm_dpo/delta': 0.022230474278330803, 'fcm_dpo/margin': 219.5145263671875, 'margin_dpo/margin_mean': 219.51451110839844, 'margin_dpo/margin_std': 345.4471740722656, 'logps/chosen': -366.9886474609375, 'logps/rejected': -611.9981689453125, 'logps/ref_chosen': -46.473915100097656, 'logps/ref_rejected': -71.96885681152344, 'KL/chosen_KL_mean': -320.5147399902344, 'KL/rejected_KL_mean': -540.029296875, 'KL/mean': -430.2720031738281, 'KL/std': 273.57257080078125, 'logits/chosen': -0.4929124414920807, 'logits/rejected': -0.4912495017051697, 'epoch': 0.58} + 58%|█████▊ | 393/681 [16:40<11:57, 2.49s/it] 58%|█████▊ | 394/681 [16:42<12:06, 2.53s/it] {'loss': 1.0862, 'grad_norm': 22.500810623168945, 'learning_rate': 2.2693291013417452e-07, 'fcm_dpo/beta': 0.0017247963696718216, 'fcm_dpo/q_t': 0.40792059898376465, 'fcm_dpo/delta': -0.004414796829223633, 'fcm_dpo/margin': 234.32321166992188, 'margin_dpo/margin_mean': 234.3231964111328, 'margin_dpo/margin_std': 305.0433349609375, 'logps/chosen': -412.365966796875, 'logps/rejected': -684.6002197265625, 'logps/ref_chosen': -52.91154861450195, 'logps/ref_rejected': -90.8226318359375, 'KL/chosen_KL_mean': -359.45440673828125, 'KL/rejected_KL_mean': -593.777587890625, 'KL/mean': -476.61602783203125, 'KL/std': 293.0263671875, 'logits/chosen': -0.477075457572937, 'logits/rejected': -0.47646427154541016, 'epoch': 0.58} + 58%|█████▊ | 394/681 [16:42<12:06, 2.53s/it] 58%|█████▊ | 395/681 [16:44<11:48, 2.48s/it] {'loss': 1.0727, 'grad_norm': 24.562576293945312, 'learning_rate': 2.2565536379453404e-07, 'fcm_dpo/beta': 0.0017139037372544408, 'fcm_dpo/q_t': 0.4019749164581299, 'fcm_dpo/delta': -0.03093145042657852, 'fcm_dpo/margin': 250.46954345703125, 'margin_dpo/margin_mean': 250.46954345703125, 'margin_dpo/margin_std': 320.5044250488281, 'logps/chosen': -416.94866943359375, 'logps/rejected': -688.65478515625, 'logps/ref_chosen': -62.546112060546875, 'logps/ref_rejected': -83.78262329101562, 'KL/chosen_KL_mean': -354.402587890625, 'KL/rejected_KL_mean': -604.8721923828125, 'KL/mean': -479.6373596191406, 'KL/std': 283.220458984375, 'logits/chosen': -0.545151948928833, 'logits/rejected': -0.540196418762207, 'epoch': 0.58} + 58%|█████▊ | 395/681 [16:45<11:48, 2.48s/it] 58%|█████▊ | 396/681 [16:47<11:53, 2.50s/it] {'loss': 1.0874, 'grad_norm': 26.06972312927246, 'learning_rate': 2.2437845895869825e-07, 'fcm_dpo/beta': 0.0017187923658639193, 'fcm_dpo/q_t': 0.4109712243080139, 'fcm_dpo/delta': 0.01880822703242302, 'fcm_dpo/margin': 222.18011474609375, 'margin_dpo/margin_mean': 222.1800994873047, 'margin_dpo/margin_std': 261.53839111328125, 'logps/chosen': -441.6429443359375, 'logps/rejected': -683.4737548828125, 'logps/ref_chosen': -68.99594116210938, 'logps/ref_rejected': -88.64665985107422, 'KL/chosen_KL_mean': -372.64703369140625, 'KL/rejected_KL_mean': -594.8271484375, 'KL/mean': -483.7370910644531, 'KL/std': 283.7913818359375, 'logits/chosen': -0.5321019887924194, 'logits/rejected': -0.514782726764679, 'epoch': 0.58} + 58%|█████▊ | 396/681 [16:47<11:53, 2.50s/it] 58%|█████▊ | 397/681 [16:50<11:48, 2.49s/it] {'loss': 1.0122, 'grad_norm': 34.39192581176758, 'learning_rate': 2.2310222927429716e-07, 'fcm_dpo/beta': 0.0016919523477554321, 'fcm_dpo/q_t': 0.3857801556587219, 'fcm_dpo/delta': -0.09789647161960602, 'fcm_dpo/margin': 291.03643798828125, 'margin_dpo/margin_mean': 291.03643798828125, 'margin_dpo/margin_std': 297.6669616699219, 'logps/chosen': -411.83819580078125, 'logps/rejected': -744.713623046875, 'logps/ref_chosen': -61.27716827392578, 'logps/ref_rejected': -103.11612701416016, 'KL/chosen_KL_mean': -350.56103515625, 'KL/rejected_KL_mean': -641.597412109375, 'KL/mean': -496.0792236328125, 'KL/std': 283.2353820800781, 'logits/chosen': -0.4891834259033203, 'logits/rejected': -0.4951217472553253, 'epoch': 0.58} + 58%|█████▊ | 397/681 [16:50<11:48, 2.49s/it] 58%|█████▊ | 398/681 [16:52<11:31, 2.44s/it] {'loss': 1.0711, 'grad_norm': 22.932445526123047, 'learning_rate': 2.2182670837116972e-07, 'fcm_dpo/beta': 0.001679509412497282, 'fcm_dpo/q_t': 0.4011620283126831, 'fcm_dpo/delta': -0.04076296463608742, 'fcm_dpo/margin': 261.36553955078125, 'margin_dpo/margin_mean': 261.36553955078125, 'margin_dpo/margin_std': 347.5543518066406, 'logps/chosen': -450.9375915527344, 'logps/rejected': -752.6752319335938, 'logps/ref_chosen': -68.15155029296875, 'logps/ref_rejected': -108.52360534667969, 'KL/chosen_KL_mean': -382.7860412597656, 'KL/rejected_KL_mean': -644.151611328125, 'KL/mean': -513.4688110351562, 'KL/std': 299.1744384765625, 'logits/chosen': -0.5526921153068542, 'logits/rejected': -0.5512826442718506, 'epoch': 0.58} + 58%|█████▊ | 398/681 [16:52<11:31, 2.44s/it] 59%|█████▊ | 399/681 [16:54<11:34, 2.46s/it] {'loss': 1.1039, 'grad_norm': 32.84918975830078, 'learning_rate': 2.2055192986047804e-07, 'fcm_dpo/beta': 0.0016741682775318623, 'fcm_dpo/q_t': 0.4100215435028076, 'fcm_dpo/delta': 0.0070329029113054276, 'fcm_dpo/margin': 234.8346405029297, 'margin_dpo/margin_mean': 234.8346405029297, 'margin_dpo/margin_std': 332.3253173828125, 'logps/chosen': -398.22894287109375, 'logps/rejected': -650.1392822265625, 'logps/ref_chosen': -60.889801025390625, 'logps/ref_rejected': -77.965576171875, 'KL/chosen_KL_mean': -337.339111328125, 'KL/rejected_KL_mean': -572.1737060546875, 'KL/mean': -454.7564392089844, 'KL/std': 264.2313232421875, 'logits/chosen': -0.5098556280136108, 'logits/rejected': -0.47407281398773193, 'epoch': 0.59} + 59%|█████▊ | 399/681 [16:54<11:34, 2.46s/it] 59%|█████▊ | 400/681 [16:57<11:38, 2.48s/it] {'loss': 0.9719, 'grad_norm': 22.400123596191406, 'learning_rate': 2.192779273338215e-07, 'fcm_dpo/beta': 0.001637051347643137, 'fcm_dpo/q_t': 0.3719968795776367, 'fcm_dpo/delta': -0.15437571704387665, 'fcm_dpo/margin': 333.2471008300781, 'margin_dpo/margin_mean': 333.24713134765625, 'margin_dpo/margin_std': 307.0823974609375, 'logps/chosen': -390.168701171875, 'logps/rejected': -765.02490234375, 'logps/ref_chosen': -63.64359664916992, 'logps/ref_rejected': -105.252685546875, 'KL/chosen_KL_mean': -326.5251159667969, 'KL/rejected_KL_mean': -659.772216796875, 'KL/mean': -493.1486511230469, 'KL/std': 275.30633544921875, 'logits/chosen': -0.523221492767334, 'logits/rejected': -0.5234454870223999, 'epoch': 0.59} + 59%|█████▊ | 400/681 [16:57<11:38, 2.48s/it] 59%|█████▉ | 401/681 [16:59<11:43, 2.51s/it] {'loss': 1.1966, 'grad_norm': 29.05838394165039, 'learning_rate': 2.1800473436235136e-07, 'fcm_dpo/beta': 0.001645284821279347, 'fcm_dpo/q_t': 0.4307780861854553, 'fcm_dpo/delta': 0.0895879864692688, 'fcm_dpo/margin': 190.42982482910156, 'margin_dpo/margin_mean': 190.42984008789062, 'margin_dpo/margin_std': 395.08624267578125, 'logps/chosen': -434.54864501953125, 'logps/rejected': -651.60791015625, 'logps/ref_chosen': -57.16303253173828, 'logps/ref_rejected': -83.79249572753906, 'KL/chosen_KL_mean': -377.3856201171875, 'KL/rejected_KL_mean': -567.8154296875, 'KL/mean': -472.60052490234375, 'KL/std': 292.94207763671875, 'logits/chosen': -0.47050148248672485, 'logits/rejected': -0.4605827033519745, 'epoch': 0.59} + 59%|█████▉ | 401/681 [16:59<11:43, 2.51s/it] 59%|█████▉ | 402/681 [17:02<11:21, 2.44s/it] {'loss': 0.955, 'grad_norm': 31.056983947753906, 'learning_rate': 2.1673238449588665e-07, 'fcm_dpo/beta': 0.0016145255649462342, 'fcm_dpo/q_t': 0.3686904013156891, 'fcm_dpo/delta': -0.17511004209518433, 'fcm_dpo/margin': 350.07647705078125, 'margin_dpo/margin_mean': 350.07647705078125, 'margin_dpo/margin_std': 310.8368225097656, 'logps/chosen': -338.682373046875, 'logps/rejected': -719.0645751953125, 'logps/ref_chosen': -50.74037170410156, 'logps/ref_rejected': -81.0460433959961, 'KL/chosen_KL_mean': -287.9420166015625, 'KL/rejected_KL_mean': -638.0184936523438, 'KL/mean': -462.9802551269531, 'KL/std': 305.24725341796875, 'logits/chosen': -0.5516624450683594, 'logits/rejected': -0.5455505847930908, 'epoch': 0.59} + 59%|█████▉ | 402/681 [17:02<11:21, 2.44s/it] 59%|█████▉ | 403/681 [17:04<11:15, 2.43s/it] {'loss': 1.0561, 'grad_norm': 26.145050048828125, 'learning_rate': 2.154609112620295e-07, 'fcm_dpo/beta': 0.0015922733582556248, 'fcm_dpo/q_t': 0.40002089738845825, 'fcm_dpo/delta': -0.032966844737529755, 'fcm_dpo/margin': 271.0248107910156, 'margin_dpo/margin_mean': 271.0248107910156, 'margin_dpo/margin_std': 303.0567932128906, 'logps/chosen': -374.93505859375, 'logps/rejected': -676.0791015625, 'logps/ref_chosen': -47.14731216430664, 'logps/ref_rejected': -77.2666015625, 'KL/chosen_KL_mean': -327.7877197265625, 'KL/rejected_KL_mean': -598.8125, 'KL/mean': -463.3001403808594, 'KL/std': 292.3427429199219, 'logits/chosen': -0.5232188701629639, 'logits/rejected': -0.525371789932251, 'epoch': 0.59} + 59%|█████▉ | 403/681 [17:04<11:15, 2.43s/it] 59%|█████▉ | 404/681 [17:07<11:18, 2.45s/it] {'loss': 1.0866, 'grad_norm': 32.78266906738281, 'learning_rate': 2.1419034816528218e-07, 'fcm_dpo/beta': 0.0015823390567675233, 'fcm_dpo/q_t': 0.4035566449165344, 'fcm_dpo/delta': -0.025122996419668198, 'fcm_dpo/margin': 267.9746398925781, 'margin_dpo/margin_mean': 267.97467041015625, 'margin_dpo/margin_std': 370.1915283203125, 'logps/chosen': -405.8624267578125, 'logps/rejected': -703.1168212890625, 'logps/ref_chosen': -47.875274658203125, 'logps/ref_rejected': -77.15499877929688, 'KL/chosen_KL_mean': -357.9871520996094, 'KL/rejected_KL_mean': -625.9617919921875, 'KL/mean': -491.9744873046875, 'KL/std': 283.893310546875, 'logits/chosen': -0.5385224223136902, 'logits/rejected': -0.533818244934082, 'epoch': 0.59} + 59%|█████▉ | 404/681 [17:07<11:18, 2.45s/it] 59%|█████▉ | 405/681 [17:09<11:02, 2.40s/it] {'loss': 1.1557, 'grad_norm': 32.378849029541016, 'learning_rate': 2.129207286861638e-07, 'fcm_dpo/beta': 0.0015738653019070625, 'fcm_dpo/q_t': 0.42195454239845276, 'fcm_dpo/delta': -0.04883524030447006, 'fcm_dpo/margin': 220.505615234375, 'margin_dpo/margin_mean': 220.505615234375, 'margin_dpo/margin_std': 375.1750183105469, 'logps/chosen': -467.20526123046875, 'logps/rejected': -709.7347412109375, 'logps/ref_chosen': -65.16290283203125, 'logps/ref_rejected': -87.18678283691406, 'KL/chosen_KL_mean': -402.0423278808594, 'KL/rejected_KL_mean': -622.5479125976562, 'KL/mean': -512.295166015625, 'KL/std': 303.5537414550781, 'logits/chosen': -0.5482072830200195, 'logits/rejected': -0.5404790639877319, 'epoch': 0.59} + 59%|█████▉ | 405/681 [17:09<11:02, 2.40s/it] 60%|█████▉ | 406/681 [17:11<10:57, 2.39s/it] {'loss': 1.0558, 'grad_norm': 26.965667724609375, 'learning_rate': 2.1165208628032861e-07, 'fcm_dpo/beta': 0.001560859032906592, 'fcm_dpo/q_t': 0.3976570963859558, 'fcm_dpo/delta': -0.050896137952804565, 'fcm_dpo/margin': 287.3280334472656, 'margin_dpo/margin_mean': 287.3280334472656, 'margin_dpo/margin_std': 347.114990234375, 'logps/chosen': -411.8919982910156, 'logps/rejected': -741.557861328125, 'logps/ref_chosen': -49.740814208984375, 'logps/ref_rejected': -92.07862854003906, 'KL/chosen_KL_mean': -362.15118408203125, 'KL/rejected_KL_mean': -649.479248046875, 'KL/mean': -505.815185546875, 'KL/std': 306.6311950683594, 'logits/chosen': -0.5607165098190308, 'logits/rejected': -0.5739609599113464, 'epoch': 0.6} + 60%|█████▉ | 406/681 [17:11<10:57, 2.39s/it] 60%|█████▉ | 407/681 [17:14<11:03, 2.42s/it] {'loss': 1.1933, 'grad_norm': 27.866214752197266, 'learning_rate': 2.1038445437768375e-07, 'fcm_dpo/beta': 0.0015518320724368095, 'fcm_dpo/q_t': 0.43519794940948486, 'fcm_dpo/delta': 0.009759590961039066, 'fcm_dpo/margin': 177.0074462890625, 'margin_dpo/margin_mean': 177.0074462890625, 'margin_dpo/margin_std': 323.0165710449219, 'logps/chosen': -446.8701477050781, 'logps/rejected': -645.0589599609375, 'logps/ref_chosen': -56.33069610595703, 'logps/ref_rejected': -77.51209259033203, 'KL/chosen_KL_mean': -390.5394287109375, 'KL/rejected_KL_mean': -567.546875, 'KL/mean': -479.0431823730469, 'KL/std': 242.9478759765625, 'logits/chosen': -0.5981777310371399, 'logits/rejected': -0.5707347393035889, 'epoch': 0.6} + 60%|█████▉ | 407/681 [17:14<11:03, 2.42s/it] 60%|█████▉ | 408/681 [17:16<11:21, 2.50s/it] {'loss': 1.1321, 'grad_norm': 26.215208053588867, 'learning_rate': 2.0911786638150872e-07, 'fcm_dpo/beta': 0.0015722161624580622, 'fcm_dpo/q_t': 0.4239957928657532, 'fcm_dpo/delta': 0.07937593758106232, 'fcm_dpo/margin': 205.55116271972656, 'margin_dpo/margin_mean': 205.55116271972656, 'margin_dpo/margin_std': 269.0951232910156, 'logps/chosen': -460.0039367675781, 'logps/rejected': -685.8627319335938, 'logps/ref_chosen': -69.789306640625, 'logps/ref_rejected': -90.09693908691406, 'KL/chosen_KL_mean': -390.214599609375, 'KL/rejected_KL_mean': -595.7658081054688, 'KL/mean': -492.9902038574219, 'KL/std': 239.10140991210938, 'logits/chosen': -0.5892931222915649, 'logits/rejected': -0.5651265382766724, 'epoch': 0.6} + 60%|█████▉ | 408/681 [17:16<11:21, 2.50s/it] 60%|██████ | 409/681 [17:19<11:33, 2.55s/it] {'loss': 1.1421, 'grad_norm': 37.040836334228516, 'learning_rate': 2.0785235566757517e-07, 'fcm_dpo/beta': 0.00160063779912889, 'fcm_dpo/q_t': 0.4251343607902527, 'fcm_dpo/delta': 0.07980034500360489, 'fcm_dpo/margin': 201.486328125, 'margin_dpo/margin_mean': 201.48635864257812, 'margin_dpo/margin_std': 292.822998046875, 'logps/chosen': -451.42791748046875, 'logps/rejected': -670.5010986328125, 'logps/ref_chosen': -67.31744384765625, 'logps/ref_rejected': -84.904296875, 'KL/chosen_KL_mean': -384.1105041503906, 'KL/rejected_KL_mean': -585.5968017578125, 'KL/mean': -484.8536682128906, 'KL/std': 270.78424072265625, 'logits/chosen': -0.5418749451637268, 'logits/rejected': -0.5251990556716919, 'epoch': 0.6} + 60%|██████ | 409/681 [17:19<11:33, 2.55s/it] 60%|██████ | 410/681 [17:22<11:41, 2.59s/it] {'loss': 1.0854, 'grad_norm': 28.998903274536133, 'learning_rate': 2.065879555832674e-07, 'fcm_dpo/beta': 0.0016031713457778096, 'fcm_dpo/q_t': 0.4068432152271271, 'fcm_dpo/delta': 0.003254479728639126, 'fcm_dpo/margin': 247.55059814453125, 'margin_dpo/margin_mean': 247.5506134033203, 'margin_dpo/margin_std': 306.04541015625, 'logps/chosen': -420.98089599609375, 'logps/rejected': -700.26513671875, 'logps/ref_chosen': -51.465354919433594, 'logps/ref_rejected': -83.198974609375, 'KL/chosen_KL_mean': -369.51556396484375, 'KL/rejected_KL_mean': -617.066162109375, 'KL/mean': -493.2908630371094, 'KL/std': 272.4659423828125, 'logits/chosen': -0.5895723104476929, 'logits/rejected': -0.5948858857154846, 'epoch': 0.6} + 60%|██████ | 410/681 [17:22<11:41, 2.59s/it] 60%|██████ | 411/681 [17:24<11:21, 2.53s/it] {'loss': 1.1204, 'grad_norm': 33.94599533081055, 'learning_rate': 2.0532469944670343e-07, 'fcm_dpo/beta': 0.001580849289894104, 'fcm_dpo/q_t': 0.41524988412857056, 'fcm_dpo/delta': -0.0746782049536705, 'fcm_dpo/margin': 234.19656372070312, 'margin_dpo/margin_mean': 234.19654846191406, 'margin_dpo/margin_std': 333.4945068359375, 'logps/chosen': -437.5541687011719, 'logps/rejected': -700.138427734375, 'logps/ref_chosen': -52.30727005004883, 'logps/ref_rejected': -80.69495391845703, 'KL/chosen_KL_mean': -385.24688720703125, 'KL/rejected_KL_mean': -619.4434204101562, 'KL/mean': -502.3451843261719, 'KL/std': 290.84130859375, 'logits/chosen': -0.5782661437988281, 'logits/rejected': -0.589745819568634, 'epoch': 0.6} + 60%|██████ | 411/681 [17:24<11:21, 2.53s/it] 60%|██████ | 412/681 [17:27<11:10, 2.49s/it] {'loss': 1.0898, 'grad_norm': 30.582490921020508, 'learning_rate': 2.0406262054585738e-07, 'fcm_dpo/beta': 0.001582764321938157, 'fcm_dpo/q_t': 0.4081728160381317, 'fcm_dpo/delta': 0.0037793107330799103, 'fcm_dpo/margin': 250.42257690429688, 'margin_dpo/margin_mean': 250.42259216308594, 'margin_dpo/margin_std': 321.5178527832031, 'logps/chosen': -434.891357421875, 'logps/rejected': -732.2306518554688, 'logps/ref_chosen': -53.144126892089844, 'logps/ref_rejected': -100.0608139038086, 'KL/chosen_KL_mean': -381.74725341796875, 'KL/rejected_KL_mean': -632.1698608398438, 'KL/mean': -506.9585266113281, 'KL/std': 275.69854736328125, 'logits/chosen': -0.6569205522537231, 'logits/rejected': -0.6889303922653198, 'epoch': 0.6} + 60%|██████ | 412/681 [17:27<11:10, 2.49s/it] 61%|██████ | 413/681 [17:29<11:22, 2.55s/it] {'loss': 1.1012, 'grad_norm': 35.33984375, 'learning_rate': 2.0280175213768205e-07, 'fcm_dpo/beta': 0.001589751336723566, 'fcm_dpo/q_t': 0.410112202167511, 'fcm_dpo/delta': 0.01986226998269558, 'fcm_dpo/margin': 239.52615356445312, 'margin_dpo/margin_mean': 239.52615356445312, 'margin_dpo/margin_std': 314.6912841796875, 'logps/chosen': -478.81341552734375, 'logps/rejected': -756.2310180664062, 'logps/ref_chosen': -61.58196258544922, 'logps/ref_rejected': -99.47340393066406, 'KL/chosen_KL_mean': -417.2314453125, 'KL/rejected_KL_mean': -656.7576293945312, 'KL/mean': -536.9945068359375, 'KL/std': 290.6329345703125, 'logits/chosen': -0.6048665046691895, 'logits/rejected': -0.6170832514762878, 'epoch': 0.61} + 61%|██████ | 413/681 [17:29<11:22, 2.55s/it] 61%|██████ | 414/681 [17:32<11:21, 2.55s/it] {'loss': 1.0772, 'grad_norm': 36.57903289794922, 'learning_rate': 2.0154212744723247e-07, 'fcm_dpo/beta': 0.0015965222846716642, 'fcm_dpo/q_t': 0.40425509214401245, 'fcm_dpo/delta': -0.010348714888095856, 'fcm_dpo/margin': 256.2596435546875, 'margin_dpo/margin_mean': 256.2596435546875, 'margin_dpo/margin_std': 301.134033203125, 'logps/chosen': -430.7724609375, 'logps/rejected': -728.047119140625, 'logps/ref_chosen': -46.63148498535156, 'logps/ref_rejected': -87.64653015136719, 'KL/chosen_KL_mean': -384.1409912109375, 'KL/rejected_KL_mean': -640.400634765625, 'KL/mean': -512.2708129882812, 'KL/std': 261.5974426269531, 'logits/chosen': -0.5525113344192505, 'logits/rejected': -0.5497645139694214, 'epoch': 0.61} + 61%|██████ | 414/681 [17:32<11:21, 2.55s/it] 61%|██████ | 415/681 [17:35<11:34, 2.61s/it] {'loss': 1.1493, 'grad_norm': 29.083877563476562, 'learning_rate': 2.002837796667909e-07, 'fcm_dpo/beta': 0.0015998441958799958, 'fcm_dpo/q_t': 0.4255606532096863, 'fcm_dpo/delta': 0.08544344455003738, 'fcm_dpo/margin': 198.34219360351562, 'margin_dpo/margin_mean': 198.34219360351562, 'margin_dpo/margin_std': 297.89312744140625, 'logps/chosen': -475.55975341796875, 'logps/rejected': -695.7611694335938, 'logps/ref_chosen': -78.6182861328125, 'logps/ref_rejected': -100.47752380371094, 'KL/chosen_KL_mean': -396.94146728515625, 'KL/rejected_KL_mean': -595.28369140625, 'KL/mean': -496.112548828125, 'KL/std': 266.48736572265625, 'logits/chosen': -0.5818853974342346, 'logits/rejected': -0.5800877809524536, 'epoch': 0.61} + 61%|██████ | 415/681 [17:35<11:34, 2.61s/it] 61%|██████ | 416/681 [17:37<11:26, 2.59s/it] {'loss': 1.0009, 'grad_norm': 30.425918579101562, 'learning_rate': 1.990267419549914e-07, 'fcm_dpo/beta': 0.0015928513603284955, 'fcm_dpo/q_t': 0.38390302658081055, 'fcm_dpo/delta': -0.10007157921791077, 'fcm_dpo/margin': 310.9083251953125, 'margin_dpo/margin_mean': 310.9082946777344, 'margin_dpo/margin_std': 287.4730224609375, 'logps/chosen': -418.49444580078125, 'logps/rejected': -761.6922607421875, 'logps/ref_chosen': -58.27912521362305, 'logps/ref_rejected': -90.56871795654297, 'KL/chosen_KL_mean': -360.2153015136719, 'KL/rejected_KL_mean': -671.12353515625, 'KL/mean': -515.66943359375, 'KL/std': 292.68878173828125, 'logits/chosen': -0.5869680643081665, 'logits/rejected': -0.5953909754753113, 'epoch': 0.61} + 61%|██████ | 416/681 [17:37<11:26, 2.59s/it] 61%|██████ | 417/681 [17:39<11:06, 2.52s/it] {'loss': 1.0514, 'grad_norm': 36.952362060546875, 'learning_rate': 1.9777104743594686e-07, 'fcm_dpo/beta': 0.0015787691809237003, 'fcm_dpo/q_t': 0.4014412462711334, 'fcm_dpo/delta': -0.016375936567783356, 'fcm_dpo/margin': 263.29327392578125, 'margin_dpo/margin_mean': 263.2933044433594, 'margin_dpo/margin_std': 253.69268798828125, 'logps/chosen': -409.7278747558594, 'logps/rejected': -690.9742431640625, 'logps/ref_chosen': -50.1987190246582, 'logps/ref_rejected': -68.15184020996094, 'KL/chosen_KL_mean': -359.5291748046875, 'KL/rejected_KL_mean': -622.8224487304688, 'KL/mean': -491.1758117675781, 'KL/std': 265.6627197265625, 'logits/chosen': -0.5713890790939331, 'logits/rejected': -0.5573433637619019, 'epoch': 0.61} + 61%|██████ | 417/681 [17:40<11:06, 2.52s/it] 61%|██████▏ | 418/681 [17:42<11:07, 2.54s/it] {'loss': 1.082, 'grad_norm': 22.66263771057129, 'learning_rate': 1.965167291983757e-07, 'fcm_dpo/beta': 0.0015799949178472161, 'fcm_dpo/q_t': 0.40348464250564575, 'fcm_dpo/delta': -0.030489690601825714, 'fcm_dpo/margin': 271.077880859375, 'margin_dpo/margin_mean': 271.077880859375, 'margin_dpo/margin_std': 359.14801025390625, 'logps/chosen': -470.326416015625, 'logps/rejected': -764.1173095703125, 'logps/ref_chosen': -81.97846984863281, 'logps/ref_rejected': -104.69148254394531, 'KL/chosen_KL_mean': -388.34796142578125, 'KL/rejected_KL_mean': -659.4258422851562, 'KL/mean': -523.8869018554688, 'KL/std': 309.9178466796875, 'logits/chosen': -0.6504318714141846, 'logits/rejected': -0.634763240814209, 'epoch': 0.61} + 61%|██████▏ | 418/681 [17:42<11:07, 2.54s/it] 62%|██████▏ | 419/681 [17:45<11:05, 2.54s/it] {'loss': 1.0358, 'grad_norm': 32.60568618774414, 'learning_rate': 1.9526382029472988e-07, 'fcm_dpo/beta': 0.001551083056256175, 'fcm_dpo/q_t': 0.3922021687030792, 'fcm_dpo/delta': -0.06462173163890839, 'fcm_dpo/margin': 297.6322021484375, 'margin_dpo/margin_mean': 297.6322021484375, 'margin_dpo/margin_std': 322.8487854003906, 'logps/chosen': -411.17413330078125, 'logps/rejected': -747.4407958984375, 'logps/ref_chosen': -52.948646545410156, 'logps/ref_rejected': -91.58309936523438, 'KL/chosen_KL_mean': -358.2254638671875, 'KL/rejected_KL_mean': -655.857666015625, 'KL/mean': -507.04156494140625, 'KL/std': 277.0826110839844, 'logits/chosen': -0.5689761638641357, 'logits/rejected': -0.5716375708580017, 'epoch': 0.62} + 62%|██████▏ | 419/681 [17:45<11:05, 2.54s/it] 62%|██████▏ | 420/681 [17:47<11:01, 2.53s/it] {'loss': 1.2113, 'grad_norm': 52.570884704589844, 'learning_rate': 1.9401235374032425e-07, 'fcm_dpo/beta': 0.0015754573978483677, 'fcm_dpo/q_t': 0.43682652711868286, 'fcm_dpo/delta': 0.1201419085264206, 'fcm_dpo/margin': 179.68783569335938, 'margin_dpo/margin_mean': 179.68783569335938, 'margin_dpo/margin_std': 389.99383544921875, 'logps/chosen': -533.4859619140625, 'logps/rejected': -704.7237548828125, 'logps/ref_chosen': -77.7699203491211, 'logps/ref_rejected': -69.31985473632812, 'KL/chosen_KL_mean': -455.716064453125, 'KL/rejected_KL_mean': -635.4039306640625, 'KL/mean': -545.5599365234375, 'KL/std': 290.76470947265625, 'logits/chosen': -0.5963464975357056, 'logits/rejected': -0.5628973245620728, 'epoch': 0.62} + 62%|██████▏ | 420/681 [17:47<11:01, 2.53s/it] 62%|██████▏ | 421/681 [17:50<11:13, 2.59s/it] {'loss': 1.1435, 'grad_norm': 27.965801239013672, 'learning_rate': 1.9276236251246653e-07, 'fcm_dpo/beta': 0.0016072317957878113, 'fcm_dpo/q_t': 0.42217308282852173, 'fcm_dpo/delta': 0.07370726019144058, 'fcm_dpo/margin': 204.13345336914062, 'margin_dpo/margin_mean': 204.13345336914062, 'margin_dpo/margin_std': 298.565185546875, 'logps/chosen': -440.9250793457031, 'logps/rejected': -680.5740966796875, 'logps/ref_chosen': -53.765865325927734, 'logps/ref_rejected': -89.28144836425781, 'KL/chosen_KL_mean': -387.1592102050781, 'KL/rejected_KL_mean': -591.2926025390625, 'KL/mean': -489.2259216308594, 'KL/std': 287.3149719238281, 'logits/chosen': -0.6343262195587158, 'logits/rejected': -0.6225738525390625, 'epoch': 0.62} + 62%|██████▏ | 421/681 [17:50<11:13, 2.59s/it] 62%|██████▏ | 422/681 [17:53<11:28, 2.66s/it] {'loss': 1.1034, 'grad_norm': 36.293704986572266, 'learning_rate': 1.9151387954958792e-07, 'fcm_dpo/beta': 0.0016083747614175081, 'fcm_dpo/q_t': 0.40930503606796265, 'fcm_dpo/delta': 0.010134613141417503, 'fcm_dpo/margin': 242.6432647705078, 'margin_dpo/margin_mean': 242.6432647705078, 'margin_dpo/margin_std': 339.7408142089844, 'logps/chosen': -503.51824951171875, 'logps/rejected': -765.3912353515625, 'logps/ref_chosen': -68.6337661743164, 'logps/ref_rejected': -87.86351013183594, 'KL/chosen_KL_mean': -434.88446044921875, 'KL/rejected_KL_mean': -677.5277099609375, 'KL/mean': -556.2061157226562, 'KL/std': 292.18963623046875, 'logits/chosen': -0.6203078031539917, 'logits/rejected': -0.6245821714401245, 'epoch': 0.62} + 62%|██████▏ | 422/681 [17:53<11:28, 2.66s/it] 62%|██████▏ | 423/681 [17:55<11:05, 2.58s/it] {'loss': 1.0598, 'grad_norm': 34.40102005004883, 'learning_rate': 1.902669377503756e-07, 'fcm_dpo/beta': 0.0016007790109142661, 'fcm_dpo/q_t': 0.40004196763038635, 'fcm_dpo/delta': -0.032924652099609375, 'fcm_dpo/margin': 269.53515625, 'margin_dpo/margin_mean': 269.53515625, 'margin_dpo/margin_std': 315.0151672363281, 'logps/chosen': -480.6947021484375, 'logps/rejected': -781.5460815429688, 'logps/ref_chosen': -54.99030303955078, 'logps/ref_rejected': -86.30654907226562, 'KL/chosen_KL_mean': -425.70440673828125, 'KL/rejected_KL_mean': -695.239501953125, 'KL/mean': -560.471923828125, 'KL/std': 282.6575012207031, 'logits/chosen': -0.613274097442627, 'logits/rejected': -0.6200335025787354, 'epoch': 0.62} + 62%|██████▏ | 423/681 [17:55<11:05, 2.58s/it] 62%|██████▏ | 424/681 [17:58<11:11, 2.61s/it] {'loss': 1.0981, 'grad_norm': 35.320560455322266, 'learning_rate': 1.890215699729057e-07, 'fcm_dpo/beta': 0.0015928398352116346, 'fcm_dpo/q_t': 0.40977025032043457, 'fcm_dpo/delta': 0.0005655810236930847, 'fcm_dpo/margin': 250.56887817382812, 'margin_dpo/margin_mean': 250.56887817382812, 'margin_dpo/margin_std': 348.552978515625, 'logps/chosen': -443.8102722167969, 'logps/rejected': -704.84619140625, 'logps/ref_chosen': -56.01192092895508, 'logps/ref_rejected': -66.47896575927734, 'KL/chosen_KL_mean': -387.79833984375, 'KL/rejected_KL_mean': -638.3672485351562, 'KL/mean': -513.0828247070312, 'KL/std': 285.3108215332031, 'logits/chosen': -0.6114071011543274, 'logits/rejected': -0.5866736769676208, 'epoch': 0.62} + 62%|██████▏ | 424/681 [17:58<11:11, 2.61s/it] 62%|██████▏ | 425/681 [18:00<11:02, 2.59s/it] {'loss': 1.1745, 'grad_norm': 35.080318450927734, 'learning_rate': 1.8777780903377732e-07, 'fcm_dpo/beta': 0.00162741937674582, 'fcm_dpo/q_t': 0.42789530754089355, 'fcm_dpo/delta': 0.09813511371612549, 'fcm_dpo/margin': 187.08584594726562, 'margin_dpo/margin_mean': 187.0858612060547, 'margin_dpo/margin_std': 329.364990234375, 'logps/chosen': -488.7164306640625, 'logps/rejected': -724.8587646484375, 'logps/ref_chosen': -46.86899948120117, 'logps/ref_rejected': -95.92545318603516, 'KL/chosen_KL_mean': -441.84747314453125, 'KL/rejected_KL_mean': -628.933349609375, 'KL/mean': -535.390380859375, 'KL/std': 266.4034423828125, 'logits/chosen': -0.6289581060409546, 'logits/rejected': -0.630668044090271, 'epoch': 0.62} + 62%|██████▏ | 425/681 [18:00<11:02, 2.59s/it] 63%|██████▎ | 426/681 [18:03<11:05, 2.61s/it] {'loss': 1.0926, 'grad_norm': 30.283872604370117, 'learning_rate': 1.8653568770724803e-07, 'fcm_dpo/beta': 0.0016408449737355113, 'fcm_dpo/q_t': 0.4089978337287903, 'fcm_dpo/delta': 0.008432462811470032, 'fcm_dpo/margin': 238.56373596191406, 'margin_dpo/margin_mean': 238.56373596191406, 'margin_dpo/margin_std': 298.9935607910156, 'logps/chosen': -476.708740234375, 'logps/rejected': -719.95556640625, 'logps/ref_chosen': -76.58354187011719, 'logps/ref_rejected': -81.26658630371094, 'KL/chosen_KL_mean': -400.1252136230469, 'KL/rejected_KL_mean': -638.68896484375, 'KL/mean': -519.4071044921875, 'KL/std': 272.60137939453125, 'logits/chosen': -0.6305758953094482, 'logits/rejected': -0.5998473167419434, 'epoch': 0.63} + 63%|██████▎ | 426/681 [18:03<11:05, 2.61s/it] 63%|██████▎ | 427/681 [18:06<11:02, 2.61s/it] {'loss': 1.1522, 'grad_norm': 27.282377243041992, 'learning_rate': 1.8529523872436977e-07, 'fcm_dpo/beta': 0.0016517244512215257, 'fcm_dpo/q_t': 0.42767786979675293, 'fcm_dpo/delta': 0.09017623960971832, 'fcm_dpo/margin': 189.3392333984375, 'margin_dpo/margin_mean': 189.33921813964844, 'margin_dpo/margin_std': 286.495849609375, 'logps/chosen': -437.66839599609375, 'logps/rejected': -640.7197265625, 'logps/ref_chosen': -64.8538818359375, 'logps/ref_rejected': -78.5660171508789, 'KL/chosen_KL_mean': -372.81451416015625, 'KL/rejected_KL_mean': -562.1536865234375, 'KL/mean': -467.484130859375, 'KL/std': 240.36863708496094, 'logits/chosen': -0.6336793899536133, 'logits/rejected': -0.6121144890785217, 'epoch': 0.63} + 63%|██████▎ | 427/681 [18:06<11:02, 2.61s/it] 63%|██████▎ | 428/681 [18:08<11:06, 2.63s/it] {'loss': 1.1038, 'grad_norm': 31.117158889770508, 'learning_rate': 1.8405649477212697e-07, 'fcm_dpo/beta': 0.0016518604243174195, 'fcm_dpo/q_t': 0.40379005670547485, 'fcm_dpo/delta': -0.02350825071334839, 'fcm_dpo/margin': 255.57199096679688, 'margin_dpo/margin_mean': 255.57199096679688, 'margin_dpo/margin_std': 387.40362548828125, 'logps/chosen': -515.7324829101562, 'logps/rejected': -811.9496459960938, 'logps/ref_chosen': -62.63666534423828, 'logps/ref_rejected': -103.28181457519531, 'KL/chosen_KL_mean': -453.0958251953125, 'KL/rejected_KL_mean': -708.6678466796875, 'KL/mean': -580.8818359375, 'KL/std': 316.7676696777344, 'logits/chosen': -0.6245919466018677, 'logits/rejected': -0.6301891803741455, 'epoch': 0.63} + 63%|██████▎ | 428/681 [18:08<11:06, 2.63s/it] 63%|██████▎ | 429/681 [18:11<11:00, 2.62s/it] {'loss': 1.1566, 'grad_norm': 32.68413543701172, 'learning_rate': 1.828194884925749e-07, 'fcm_dpo/beta': 0.001649289857596159, 'fcm_dpo/q_t': 0.42213714122772217, 'fcm_dpo/delta': -0.040331680327653885, 'fcm_dpo/margin': 206.8295440673828, 'margin_dpo/margin_mean': 206.82952880859375, 'margin_dpo/margin_std': 348.011474609375, 'logps/chosen': -530.4644775390625, 'logps/rejected': -747.85498046875, 'logps/ref_chosen': -81.23401641845703, 'logps/ref_rejected': -91.79493713378906, 'KL/chosen_KL_mean': -449.23046875, 'KL/rejected_KL_mean': -656.06005859375, 'KL/mean': -552.645263671875, 'KL/std': 283.962158203125, 'logits/chosen': -0.652100682258606, 'logits/rejected': -0.6345040798187256, 'epoch': 0.63} + 63%|██████▎ | 429/681 [18:11<11:00, 2.62s/it] 63%|██████▎ | 430/681 [18:14<11:07, 2.66s/it] {'loss': 1.1256, 'grad_norm': 29.323801040649414, 'learning_rate': 1.8158425248197928e-07, 'fcm_dpo/beta': 0.0016597865615040064, 'fcm_dpo/q_t': 0.4206964075565338, 'fcm_dpo/delta': 0.058922089636325836, 'fcm_dpo/margin': 206.62753295898438, 'margin_dpo/margin_mean': 206.62753295898438, 'margin_dpo/margin_std': 286.5379638671875, 'logps/chosen': -437.4637451171875, 'logps/rejected': -687.59375, 'logps/ref_chosen': -60.920326232910156, 'logps/ref_rejected': -104.42280578613281, 'KL/chosen_KL_mean': -376.54345703125, 'KL/rejected_KL_mean': -583.1709594726562, 'KL/mean': -479.85723876953125, 'KL/std': 259.899169921875, 'logits/chosen': -0.5997954607009888, 'logits/rejected': -0.598332941532135, 'epoch': 0.63} + 63%|██████▎ | 430/681 [18:14<11:07, 2.66s/it] 63%|██████▎ | 431/681 [18:16<11:05, 2.66s/it] {'loss': 1.0216, 'grad_norm': 30.228837966918945, 'learning_rate': 1.8035081928995788e-07, 'fcm_dpo/beta': 0.0016381596215069294, 'fcm_dpo/q_t': 0.38691771030426025, 'fcm_dpo/delta': -0.09109188616275787, 'fcm_dpo/margin': 297.0145263671875, 'margin_dpo/margin_mean': 297.0145568847656, 'margin_dpo/margin_std': 313.81036376953125, 'logps/chosen': -406.90380859375, 'logps/rejected': -739.4097900390625, 'logps/ref_chosen': -57.34874725341797, 'logps/ref_rejected': -92.84022521972656, 'KL/chosen_KL_mean': -349.5550537109375, 'KL/rejected_KL_mean': -646.569580078125, 'KL/mean': -498.06231689453125, 'KL/std': 281.20587158203125, 'logits/chosen': -0.59189772605896, 'logits/rejected': -0.5967794060707092, 'epoch': 0.63} + 63%|██████▎ | 431/681 [18:16<11:05, 2.66s/it] 63%|██████▎ | 432/681 [18:19<11:09, 2.69s/it] {'loss': 1.0208, 'grad_norm': 34.41071701049805, 'learning_rate': 1.791192214186223e-07, 'fcm_dpo/beta': 0.0016237597446888685, 'fcm_dpo/q_t': 0.39008086919784546, 'fcm_dpo/delta': -0.07570492476224899, 'fcm_dpo/margin': 290.58984375, 'margin_dpo/margin_mean': 290.58984375, 'margin_dpo/margin_std': 279.1397705078125, 'logps/chosen': -411.045166015625, 'logps/rejected': -729.1397705078125, 'logps/ref_chosen': -71.07479095458984, 'logps/ref_rejected': -98.57952880859375, 'KL/chosen_KL_mean': -339.9703674316406, 'KL/rejected_KL_mean': -630.5601806640625, 'KL/mean': -485.2652893066406, 'KL/std': 279.104736328125, 'logits/chosen': -0.5879085063934326, 'logits/rejected': -0.5795068740844727, 'epoch': 0.63} + 63%|██████▎ | 432/681 [18:19<11:09, 2.69s/it] 64%|██████▎ | 433/681 [18:22<11:02, 2.67s/it] {'loss': 1.1745, 'grad_norm': 33.45824432373047, 'learning_rate': 1.7788949132172193e-07, 'fcm_dpo/beta': 0.0016333262901753187, 'fcm_dpo/q_t': 0.4265033006668091, 'fcm_dpo/delta': 0.09479224681854248, 'fcm_dpo/margin': 188.51522827148438, 'margin_dpo/margin_mean': 188.51522827148438, 'margin_dpo/margin_std': 336.2310485839844, 'logps/chosen': -494.8766784667969, 'logps/rejected': -721.069580078125, 'logps/ref_chosen': -58.273193359375, 'logps/ref_rejected': -95.95089721679688, 'KL/chosen_KL_mean': -436.6034851074219, 'KL/rejected_KL_mean': -625.1187133789062, 'KL/mean': -530.861083984375, 'KL/std': 284.5777587890625, 'logits/chosen': -0.6549836993217468, 'logits/rejected': -0.6436095237731934, 'epoch': 0.64} + 64%|██████▎ | 433/681 [18:22<11:02, 2.67s/it] 64%|██████▎ | 434/681 [18:24<10:55, 2.66s/it] {'loss': 1.1216, 'grad_norm': 22.17997932434082, 'learning_rate': 1.7666166140378853e-07, 'fcm_dpo/beta': 0.0016410250682383776, 'fcm_dpo/q_t': 0.41886186599731445, 'fcm_dpo/delta': 0.031055327504873276, 'fcm_dpo/margin': 225.47265625, 'margin_dpo/margin_mean': 225.47265625, 'margin_dpo/margin_std': 347.4930725097656, 'logps/chosen': -439.6679992675781, 'logps/rejected': -681.6655883789062, 'logps/ref_chosen': -61.97370147705078, 'logps/ref_rejected': -78.49861145019531, 'KL/chosen_KL_mean': -377.69427490234375, 'KL/rejected_KL_mean': -603.1669921875, 'KL/mean': -490.43060302734375, 'KL/std': 274.6502685546875, 'logits/chosen': -0.6539719104766846, 'logits/rejected': -0.6557008624076843, 'epoch': 0.64} + 64%|██████▎ | 434/681 [18:24<10:55, 2.66s/it] 64%|██████▍ | 435/681 [18:26<10:22, 2.53s/it] {'loss': 1.084, 'grad_norm': 26.81846046447754, 'learning_rate': 1.7543576401928218e-07, 'fcm_dpo/beta': 0.0016461058985441923, 'fcm_dpo/q_t': 0.4061928987503052, 'fcm_dpo/delta': -0.003797288052737713, 'fcm_dpo/margin': 245.1993408203125, 'margin_dpo/margin_mean': 245.1993408203125, 'margin_dpo/margin_std': 302.0467529296875, 'logps/chosen': -398.1992492675781, 'logps/rejected': -679.46337890625, 'logps/ref_chosen': -51.502052307128906, 'logps/ref_rejected': -87.56689453125, 'KL/chosen_KL_mean': -346.69720458984375, 'KL/rejected_KL_mean': -591.896484375, 'KL/mean': -469.296875, 'KL/std': 269.6709289550781, 'logits/chosen': -0.6933680772781372, 'logits/rejected': -0.6857917308807373, 'epoch': 0.64} + 64%|██████▍ | 435/681 [18:27<10:22, 2.53s/it] 64%|██████▍ | 436/681 [18:29<10:28, 2.57s/it] {'loss': 1.1089, 'grad_norm': 44.578636169433594, 'learning_rate': 1.742118314717391e-07, 'fcm_dpo/beta': 0.001651562051847577, 'fcm_dpo/q_t': 0.41518351435661316, 'fcm_dpo/delta': 0.03701151907444, 'fcm_dpo/margin': 220.5616912841797, 'margin_dpo/margin_mean': 220.5616912841797, 'margin_dpo/margin_std': 287.855224609375, 'logps/chosen': -433.4754943847656, 'logps/rejected': -665.3612060546875, 'logps/ref_chosen': -71.40371704101562, 'logps/ref_rejected': -82.72775268554688, 'KL/chosen_KL_mean': -362.07177734375, 'KL/rejected_KL_mean': -582.6334838867188, 'KL/mean': -472.35260009765625, 'KL/std': 247.13198852539062, 'logits/chosen': -0.6292225122451782, 'logits/rejected': -0.5981835126876831, 'epoch': 0.64} + 64%|██████▍ | 436/681 [18:29<10:28, 2.57s/it] 64%|██████▍ | 437/681 [18:32<10:36, 2.61s/it] {'loss': 1.098, 'grad_norm': 30.342931747436523, 'learning_rate': 1.7298989601292036e-07, 'fcm_dpo/beta': 0.0016632757615298033, 'fcm_dpo/q_t': 0.41175174713134766, 'fcm_dpo/delta': 0.02373369038105011, 'fcm_dpo/margin': 226.75625610351562, 'margin_dpo/margin_mean': 226.75625610351562, 'margin_dpo/margin_std': 285.7284851074219, 'logps/chosen': -434.78302001953125, 'logps/rejected': -678.838623046875, 'logps/ref_chosen': -64.7442626953125, 'logps/ref_rejected': -82.04356384277344, 'KL/chosen_KL_mean': -370.03875732421875, 'KL/rejected_KL_mean': -596.7950439453125, 'KL/mean': -483.41693115234375, 'KL/std': 239.50619506835938, 'logits/chosen': -0.6698806881904602, 'logits/rejected': -0.64947509765625, 'epoch': 0.64} + 64%|██████▍ | 437/681 [18:32<10:36, 2.61s/it] 64%|██████▍ | 438/681 [18:34<10:19, 2.55s/it] {'loss': 1.0551, 'grad_norm': 32.4385871887207, 'learning_rate': 1.7176998984196144e-07, 'fcm_dpo/beta': 0.0016521508805453777, 'fcm_dpo/q_t': 0.39868584275245667, 'fcm_dpo/delta': -0.033741071820259094, 'fcm_dpo/margin': 261.35968017578125, 'margin_dpo/margin_mean': 261.35968017578125, 'margin_dpo/margin_std': 287.7158508300781, 'logps/chosen': -445.3568115234375, 'logps/rejected': -730.774658203125, 'logps/ref_chosen': -59.0186653137207, 'logps/ref_rejected': -83.07682800292969, 'KL/chosen_KL_mean': -386.3381652832031, 'KL/rejected_KL_mean': -647.6978759765625, 'KL/mean': -517.0180053710938, 'KL/std': 274.5076904296875, 'logits/chosen': -0.6905832290649414, 'logits/rejected': -0.6742756366729736, 'epoch': 0.64} + 64%|██████▍ | 438/681 [18:34<10:19, 2.55s/it] 64%|██████▍ | 439/681 [18:37<10:08, 2.51s/it] {'loss': 1.1405, 'grad_norm': 35.499446868896484, 'learning_rate': 1.7055214510452458e-07, 'fcm_dpo/beta': 0.001632839790545404, 'fcm_dpo/q_t': 0.4208637773990631, 'fcm_dpo/delta': -0.0710936188697815, 'fcm_dpo/margin': 210.98980712890625, 'margin_dpo/margin_mean': 210.98980712890625, 'margin_dpo/margin_std': 313.9350891113281, 'logps/chosen': -451.9991760253906, 'logps/rejected': -693.1903076171875, 'logps/ref_chosen': -53.78407669067383, 'logps/ref_rejected': -83.98545837402344, 'KL/chosen_KL_mean': -398.215087890625, 'KL/rejected_KL_mean': -609.204833984375, 'KL/mean': -503.7099914550781, 'KL/std': 279.349609375, 'logits/chosen': -0.6591403484344482, 'logits/rejected': -0.662467360496521, 'epoch': 0.64} + 64%|██████▍ | 439/681 [18:37<10:08, 2.51s/it] 65%|██████▍ | 440/681 [18:39<09:55, 2.47s/it] {'loss': 1.0949, 'grad_norm': 42.761192321777344, 'learning_rate': 1.6933639389195134e-07, 'fcm_dpo/beta': 0.001635729568079114, 'fcm_dpo/q_t': 0.410071462392807, 'fcm_dpo/delta': -0.0017141718417406082, 'fcm_dpo/margin': 245.49813842773438, 'margin_dpo/margin_mean': 245.49813842773438, 'margin_dpo/margin_std': 340.76123046875, 'logps/chosen': -499.59130859375, 'logps/rejected': -763.0205078125, 'logps/ref_chosen': -78.56671905517578, 'logps/ref_rejected': -96.49775695800781, 'KL/chosen_KL_mean': -421.02459716796875, 'KL/rejected_KL_mean': -666.5227661132812, 'KL/mean': -543.773681640625, 'KL/std': 341.6332092285156, 'logits/chosen': -0.705498218536377, 'logits/rejected': -0.7023329138755798, 'epoch': 0.65} + 65%|██████▍ | 440/681 [18:39<09:55, 2.47s/it] 65%|██████▍ | 441/681 [18:42<10:06, 2.53s/it] {'loss': 1.1424, 'grad_norm': 46.89374923706055, 'learning_rate': 1.681227682404166e-07, 'fcm_dpo/beta': 0.00163905113004148, 'fcm_dpo/q_t': 0.41395312547683716, 'fcm_dpo/delta': 0.001527242362499237, 'fcm_dpo/margin': 242.963134765625, 'margin_dpo/margin_mean': 242.963134765625, 'margin_dpo/margin_std': 429.1325988769531, 'logps/chosen': -551.3304443359375, 'logps/rejected': -829.93994140625, 'logps/ref_chosen': -60.824440002441406, 'logps/ref_rejected': -96.47080993652344, 'KL/chosen_KL_mean': -490.50604248046875, 'KL/rejected_KL_mean': -733.4691162109375, 'KL/mean': -611.987548828125, 'KL/std': 356.1271667480469, 'logits/chosen': -0.7099902629852295, 'logits/rejected': -0.6972057223320007, 'epoch': 0.65} + 65%|██████▍ | 441/681 [18:42<10:06, 2.53s/it] 65%|██████▍ | 442/681 [18:44<09:53, 2.48s/it] {'loss': 1.0551, 'grad_norm': 32.3649787902832, 'learning_rate': 1.669113001300851e-07, 'fcm_dpo/beta': 0.0016239210963249207, 'fcm_dpo/q_t': 0.3937837481498718, 'fcm_dpo/delta': -0.08454307168722153, 'fcm_dpo/margin': 295.62847900390625, 'margin_dpo/margin_mean': 295.6284484863281, 'margin_dpo/margin_std': 383.97320556640625, 'logps/chosen': -461.8374938964844, 'logps/rejected': -786.9940185546875, 'logps/ref_chosen': -47.01121520996094, 'logps/ref_rejected': -76.53926086425781, 'KL/chosen_KL_mean': -414.8262939453125, 'KL/rejected_KL_mean': -710.4547119140625, 'KL/mean': -562.6405029296875, 'KL/std': 340.4912109375, 'logits/chosen': -0.6760904788970947, 'logits/rejected': -0.6695461273193359, 'epoch': 0.65} + 65%|██████▍ | 442/681 [18:44<09:53, 2.48s/it] 65%|██████▌ | 443/681 [18:47<09:57, 2.51s/it] {'loss': 1.2151, 'grad_norm': 35.34192657470703, 'learning_rate': 1.6570202148426815e-07, 'fcm_dpo/beta': 0.0016035648295655847, 'fcm_dpo/q_t': 0.43321898579597473, 'fcm_dpo/delta': -0.005727704148739576, 'fcm_dpo/margin': 187.87205505371094, 'margin_dpo/margin_mean': 187.87205505371094, 'margin_dpo/margin_std': 419.6290283203125, 'logps/chosen': -549.55810546875, 'logps/rejected': -752.8370361328125, 'logps/ref_chosen': -71.27301788330078, 'logps/ref_rejected': -86.679931640625, 'KL/chosen_KL_mean': -478.28509521484375, 'KL/rejected_KL_mean': -666.1571044921875, 'KL/mean': -572.2210693359375, 'KL/std': 346.36370849609375, 'logits/chosen': -0.6955731511116028, 'logits/rejected': -0.6718583106994629, 'epoch': 0.65} + 65%|██████▌ | 443/681 [18:47<09:57, 2.51s/it] 65%|██████▌ | 444/681 [18:49<09:55, 2.51s/it] {'loss': 1.0553, 'grad_norm': 29.70403480529785, 'learning_rate': 1.6449496416858282e-07, 'fcm_dpo/beta': 0.0015793245984241366, 'fcm_dpo/q_t': 0.3940558433532715, 'fcm_dpo/delta': -0.07941662520170212, 'fcm_dpo/margin': 300.8608093261719, 'margin_dpo/margin_mean': 300.8608093261719, 'margin_dpo/margin_std': 400.1059265136719, 'logps/chosen': -526.8837280273438, 'logps/rejected': -867.7857666015625, 'logps/ref_chosen': -57.213706970214844, 'logps/ref_rejected': -97.25489807128906, 'KL/chosen_KL_mean': -469.6700439453125, 'KL/rejected_KL_mean': -770.5308837890625, 'KL/mean': -620.1004638671875, 'KL/std': 358.370849609375, 'logits/chosen': -0.6584120988845825, 'logits/rejected': -0.6683436632156372, 'epoch': 0.65} + 65%|██████▌ | 444/681 [18:49<09:55, 2.51s/it] 65%|██████▌ | 445/681 [18:52<10:11, 2.59s/it] {'loss': 1.067, 'grad_norm': 34.561370849609375, 'learning_rate': 1.6329015999011182e-07, 'fcm_dpo/beta': 0.0015705095138400793, 'fcm_dpo/q_t': 0.3998865485191345, 'fcm_dpo/delta': -0.04293816536664963, 'fcm_dpo/margin': 280.84063720703125, 'margin_dpo/margin_mean': 280.84063720703125, 'margin_dpo/margin_std': 354.77947998046875, 'logps/chosen': -483.9425964355469, 'logps/rejected': -790.1661376953125, 'logps/ref_chosen': -67.29979705810547, 'logps/ref_rejected': -92.68267059326172, 'KL/chosen_KL_mean': -416.6427917480469, 'KL/rejected_KL_mean': -697.4834594726562, 'KL/mean': -557.0631103515625, 'KL/std': 289.13763427734375, 'logits/chosen': -0.666840672492981, 'logits/rejected': -0.6581023931503296, 'epoch': 0.65} + 65%|██████▌ | 445/681 [18:52<10:11, 2.59s/it] 65%|██████▌ | 446/681 [18:55<10:09, 2.60s/it] {'loss': 1.0346, 'grad_norm': 32.68497085571289, 'learning_rate': 1.6208764069656578e-07, 'fcm_dpo/beta': 0.0015590311959385872, 'fcm_dpo/q_t': 0.39108988642692566, 'fcm_dpo/delta': -0.07091644406318665, 'fcm_dpo/margin': 299.64715576171875, 'margin_dpo/margin_mean': 299.64715576171875, 'margin_dpo/margin_std': 317.11669921875, 'logps/chosen': -440.89520263671875, 'logps/rejected': -782.7080688476562, 'logps/ref_chosen': -59.098487854003906, 'logps/ref_rejected': -101.26419067382812, 'KL/chosen_KL_mean': -381.79669189453125, 'KL/rejected_KL_mean': -681.4439086914062, 'KL/mean': -531.6203002929688, 'KL/std': 308.4264221191406, 'logits/chosen': -0.6791973114013672, 'logits/rejected': -0.6917370557785034, 'epoch': 0.65} + 65%|██████▌ | 446/681 [18:55<10:09, 2.60s/it] 66%|██████▌ | 447/681 [18:57<09:51, 2.53s/it] {'loss': 1.0442, 'grad_norm': 32.39216613769531, 'learning_rate': 1.608874379754465e-07, 'fcm_dpo/beta': 0.0015180823393166065, 'fcm_dpo/q_t': 0.3928487300872803, 'fcm_dpo/delta': -0.08475878089666367, 'fcm_dpo/margin': 316.29620361328125, 'margin_dpo/margin_mean': 316.2962341308594, 'margin_dpo/margin_std': 406.5645751953125, 'logps/chosen': -452.8814697265625, 'logps/rejected': -811.797119140625, 'logps/ref_chosen': -56.07533264160156, 'logps/ref_rejected': -98.69475555419922, 'KL/chosen_KL_mean': -396.80615234375, 'KL/rejected_KL_mean': -713.1023559570312, 'KL/mean': -554.9542236328125, 'KL/std': 362.07012939453125, 'logits/chosen': -0.7233532667160034, 'logits/rejected': -0.7404334545135498, 'epoch': 0.66} + 66%|██████▌ | 447/681 [18:57<09:51, 2.53s/it] 66%|██████▌ | 448/681 [19:00<09:51, 2.54s/it] {'loss': 1.058, 'grad_norm': 37.61946105957031, 'learning_rate': 1.5968958345321177e-07, 'fcm_dpo/beta': 0.001512328744865954, 'fcm_dpo/q_t': 0.398230642080307, 'fcm_dpo/delta': -0.04575078934431076, 'fcm_dpo/margin': 293.324462890625, 'margin_dpo/margin_mean': 293.324462890625, 'margin_dpo/margin_std': 352.77362060546875, 'logps/chosen': -495.0689392089844, 'logps/rejected': -830.6541748046875, 'logps/ref_chosen': -60.00384521484375, 'logps/ref_rejected': -102.26465606689453, 'KL/chosen_KL_mean': -435.0650939941406, 'KL/rejected_KL_mean': -728.3895263671875, 'KL/mean': -581.727294921875, 'KL/std': 313.0533447265625, 'logits/chosen': -0.6439417600631714, 'logits/rejected': -0.6513484716415405, 'epoch': 0.66} + 66%|██████▌ | 448/681 [19:00<09:51, 2.54s/it] 66%|██████▌ | 449/681 [19:02<09:47, 2.53s/it] {'loss': 1.0842, 'grad_norm': 27.254419326782227, 'learning_rate': 1.584941086944423e-07, 'fcm_dpo/beta': 0.0014895712956786156, 'fcm_dpo/q_t': 0.4015531539916992, 'fcm_dpo/delta': -0.05026708170771599, 'fcm_dpo/margin': 300.6900939941406, 'margin_dpo/margin_mean': 300.6900939941406, 'margin_dpo/margin_std': 448.55859375, 'logps/chosen': -505.8792419433594, 'logps/rejected': -827.6396484375, 'logps/ref_chosen': -67.52661895751953, 'logps/ref_rejected': -88.59690856933594, 'KL/chosen_KL_mean': -438.35260009765625, 'KL/rejected_KL_mean': -739.042724609375, 'KL/mean': -588.6976318359375, 'KL/std': 370.13311767578125, 'logits/chosen': -0.6604284048080444, 'logits/rejected': -0.6543349623680115, 'epoch': 0.66} + 66%|██████▌ | 449/681 [19:02<09:47, 2.53s/it] 66%|██████▌ | 450/681 [19:05<09:45, 2.53s/it] {'loss': 1.0136, 'grad_norm': 44.962730407714844, 'learning_rate': 1.573010452010098e-07, 'fcm_dpo/beta': 0.001471104216761887, 'fcm_dpo/q_t': 0.38630813360214233, 'fcm_dpo/delta': -0.08891390264034271, 'fcm_dpo/margin': 329.4759216308594, 'margin_dpo/margin_mean': 329.4759521484375, 'margin_dpo/margin_std': 328.4858703613281, 'logps/chosen': -410.5118713378906, 'logps/rejected': -785.6346435546875, 'logps/ref_chosen': -57.10811996459961, 'logps/ref_rejected': -102.75494384765625, 'KL/chosen_KL_mean': -353.40374755859375, 'KL/rejected_KL_mean': -682.8796997070312, 'KL/mean': -518.1417236328125, 'KL/std': 330.11187744140625, 'logits/chosen': -0.6974214315414429, 'logits/rejected': -0.7095401883125305, 'epoch': 0.66} + 66%|██████▌ | 450/681 [19:05<09:45, 2.53s/it] 66%|██████▌ | 451/681 [19:07<09:20, 2.44s/it] {'loss': 1.1722, 'grad_norm': 39.84464645385742, 'learning_rate': 1.5611042441124687e-07, 'fcm_dpo/beta': 0.0014787260442972183, 'fcm_dpo/q_t': 0.41907864809036255, 'fcm_dpo/delta': 0.05972132831811905, 'fcm_dpo/margin': 231.30206298828125, 'margin_dpo/margin_mean': 231.3020782470703, 'margin_dpo/margin_std': 440.70330810546875, 'logps/chosen': -538.9622192382812, 'logps/rejected': -784.7249145507812, 'logps/ref_chosen': -58.46883010864258, 'logps/ref_rejected': -72.92941284179688, 'KL/chosen_KL_mean': -480.493408203125, 'KL/rejected_KL_mean': -711.7955322265625, 'KL/mean': -596.1444702148438, 'KL/std': 380.8516540527344, 'logits/chosen': -0.7402889728546143, 'logits/rejected': -0.7134509086608887, 'epoch': 0.66} + 66%|██████▌ | 451/681 [19:07<09:20, 2.44s/it] 66%|██████▋ | 452/681 [19:09<09:18, 2.44s/it] {'loss': 1.0562, 'grad_norm': 21.744396209716797, 'learning_rate': 1.549222776991186e-07, 'fcm_dpo/beta': 0.0014726007357239723, 'fcm_dpo/q_t': 0.400713711977005, 'fcm_dpo/delta': -0.022314528003335, 'fcm_dpo/margin': 286.1051330566406, 'margin_dpo/margin_mean': 286.10516357421875, 'margin_dpo/margin_std': 303.95037841796875, 'logps/chosen': -390.4502868652344, 'logps/rejected': -723.936279296875, 'logps/ref_chosen': -50.39055252075195, 'logps/ref_rejected': -97.77142333984375, 'KL/chosen_KL_mean': -340.0597229003906, 'KL/rejected_KL_mean': -626.1648559570312, 'KL/mean': -483.1123046875, 'KL/std': 296.316162109375, 'logits/chosen': -0.6544848680496216, 'logits/rejected': -0.6705133318901062, 'epoch': 0.66} + 66%|██████▋ | 452/681 [19:09<09:18, 2.44s/it] 67%|██████▋ | 453/681 [19:12<09:06, 2.40s/it] {'loss': 1.0919, 'grad_norm': 30.068889617919922, 'learning_rate': 1.5373663637339584e-07, 'fcm_dpo/beta': 0.0014694023411720991, 'fcm_dpo/q_t': 0.40955957770347595, 'fcm_dpo/delta': 0.0009205006062984467, 'fcm_dpo/margin': 271.54254150390625, 'margin_dpo/margin_mean': 271.54254150390625, 'margin_dpo/margin_std': 364.5362548828125, 'logps/chosen': -449.896240234375, 'logps/rejected': -745.9313354492188, 'logps/ref_chosen': -57.71485137939453, 'logps/ref_rejected': -82.20741271972656, 'KL/chosen_KL_mean': -392.181396484375, 'KL/rejected_KL_mean': -663.723876953125, 'KL/mean': -527.95263671875, 'KL/std': 302.70947265625, 'logits/chosen': -0.7128562331199646, 'logits/rejected': -0.6981433629989624, 'epoch': 0.67} + 67%|██████▋ | 453/681 [19:12<09:06, 2.40s/it] 67%|██████▋ | 454/681 [19:14<09:11, 2.43s/it] {'loss': 1.067, 'grad_norm': 31.32180404663086, 'learning_rate': 1.5255353167683017e-07, 'fcm_dpo/beta': 0.0014605964533984661, 'fcm_dpo/q_t': 0.39894014596939087, 'fcm_dpo/delta': -0.043758489191532135, 'fcm_dpo/margin': 302.390625, 'margin_dpo/margin_mean': 302.390625, 'margin_dpo/margin_std': 393.43988037109375, 'logps/chosen': -543.726318359375, 'logps/rejected': -870.1220703125, 'logps/ref_chosen': -60.945648193359375, 'logps/ref_rejected': -84.95079040527344, 'KL/chosen_KL_mean': -482.7806701660156, 'KL/rejected_KL_mean': -785.1713256835938, 'KL/mean': -633.9759521484375, 'KL/std': 346.2369384765625, 'logits/chosen': -0.7373279333114624, 'logits/rejected': -0.725917637348175, 'epoch': 0.67} + 67%|██████▋ | 454/681 [19:14<09:11, 2.43s/it] 67%|██████▋ | 455/681 [19:16<09:05, 2.41s/it] {'loss': 1.014, 'grad_norm': 35.17242431640625, 'learning_rate': 1.5137299478533064e-07, 'fcm_dpo/beta': 0.0014415888581424952, 'fcm_dpo/q_t': 0.3820483982563019, 'fcm_dpo/delta': -0.12789547443389893, 'fcm_dpo/margin': 361.5738220214844, 'margin_dpo/margin_mean': 361.5738220214844, 'margin_dpo/margin_std': 407.70379638671875, 'logps/chosen': -454.8466491699219, 'logps/rejected': -886.835205078125, 'logps/ref_chosen': -44.88671112060547, 'logps/ref_rejected': -115.30147552490234, 'KL/chosen_KL_mean': -409.9599609375, 'KL/rejected_KL_mean': -771.53369140625, 'KL/mean': -590.746826171875, 'KL/std': 357.16259765625, 'logits/chosen': -0.6761902570724487, 'logits/rejected': -0.6945501565933228, 'epoch': 0.67} + 67%|██████▋ | 455/681 [19:16<09:05, 2.41s/it] 67%|██████▋ | 456/681 [19:19<09:14, 2.47s/it] {'loss': 1.0206, 'grad_norm': 24.956411361694336, 'learning_rate': 1.5019505680714232e-07, 'fcm_dpo/beta': 0.0014074372593313456, 'fcm_dpo/q_t': 0.38988521695137024, 'fcm_dpo/delta': -0.08486473560333252, 'fcm_dpo/margin': 341.6557312011719, 'margin_dpo/margin_mean': 341.65576171875, 'margin_dpo/margin_std': 363.93701171875, 'logps/chosen': -498.0615234375, 'logps/rejected': -887.8983154296875, 'logps/ref_chosen': -57.036781311035156, 'logps/ref_rejected': -105.21784210205078, 'KL/chosen_KL_mean': -441.02471923828125, 'KL/rejected_KL_mean': -782.6804809570312, 'KL/mean': -611.8526000976562, 'KL/std': 368.62628173828125, 'logits/chosen': -0.7183812260627747, 'logits/rejected': -0.7420483827590942, 'epoch': 0.67} + 67%|██████▋ | 456/681 [19:19<09:14, 2.47s/it] 67%|██████▋ | 457/681 [19:22<09:28, 2.54s/it] {'loss': 1.0307, 'grad_norm': 29.466060638427734, 'learning_rate': 1.4901974878202627e-07, 'fcm_dpo/beta': 0.0013823909685015678, 'fcm_dpo/q_t': 0.39179015159606934, 'fcm_dpo/delta': -0.06590519100427628, 'fcm_dpo/margin': 334.6352233886719, 'margin_dpo/margin_mean': 334.6352233886719, 'margin_dpo/margin_std': 347.06805419921875, 'logps/chosen': -482.16619873046875, 'logps/rejected': -847.66845703125, 'logps/ref_chosen': -54.24253845214844, 'logps/ref_rejected': -85.10956573486328, 'KL/chosen_KL_mean': -427.92364501953125, 'KL/rejected_KL_mean': -762.558837890625, 'KL/mean': -595.2412719726562, 'KL/std': 349.4745178222656, 'logits/chosen': -0.6991932392120361, 'logits/rejected': -0.7005044221878052, 'epoch': 0.67} + 67%|██████▋ | 457/681 [19:22<09:28, 2.54s/it] 67%|██████▋ | 458/681 [19:24<09:22, 2.52s/it] {'loss': 1.0701, 'grad_norm': 30.682571411132812, 'learning_rate': 1.4784710168044212e-07, 'fcm_dpo/beta': 0.001369113102555275, 'fcm_dpo/q_t': 0.4038897454738617, 'fcm_dpo/delta': -0.022949304431676865, 'fcm_dpo/margin': 307.73828125, 'margin_dpo/margin_mean': 307.7383117675781, 'margin_dpo/margin_std': 368.6239929199219, 'logps/chosen': -508.1318359375, 'logps/rejected': -858.14453125, 'logps/ref_chosen': -55.40888214111328, 'logps/ref_rejected': -97.68325805664062, 'KL/chosen_KL_mean': -452.7229309082031, 'KL/rejected_KL_mean': -760.4613037109375, 'KL/mean': -606.5921020507812, 'KL/std': 326.22540283203125, 'logits/chosen': -0.751007616519928, 'logits/rejected': -0.7468206286430359, 'epoch': 0.67} + 67%|██████▋ | 458/681 [19:24<09:22, 2.52s/it] 67%|██████▋ | 459/681 [19:27<09:23, 2.54s/it] {'loss': 1.0571, 'grad_norm': 32.14663314819336, 'learning_rate': 1.466771464027316e-07, 'fcm_dpo/beta': 0.001356898806989193, 'fcm_dpo/q_t': 0.3953377604484558, 'fcm_dpo/delta': -0.0636778175830841, 'fcm_dpo/margin': 339.2184753417969, 'margin_dpo/margin_mean': 339.218505859375, 'margin_dpo/margin_std': 428.857177734375, 'logps/chosen': -531.904541015625, 'logps/rejected': -910.7340698242188, 'logps/ref_chosen': -46.55748748779297, 'logps/ref_rejected': -86.16854095458984, 'KL/chosen_KL_mean': -485.3470458984375, 'KL/rejected_KL_mean': -824.5655517578125, 'KL/mean': -654.956298828125, 'KL/std': 385.02587890625, 'logits/chosen': -0.7115896940231323, 'logits/rejected': -0.7303779125213623, 'epoch': 0.67} + 67%|██████▋ | 459/681 [19:27<09:23, 2.54s/it] 68%|██████▊ | 460/681 [19:29<09:30, 2.58s/it] {'loss': 1.0181, 'grad_norm': 35.66582107543945, 'learning_rate': 1.4550991377830423e-07, 'fcm_dpo/beta': 0.001337511232122779, 'fcm_dpo/q_t': 0.3863397240638733, 'fcm_dpo/delta': -0.10346446931362152, 'fcm_dpo/margin': 372.607421875, 'margin_dpo/margin_mean': 372.607421875, 'margin_dpo/margin_std': 416.0313415527344, 'logps/chosen': -581.792724609375, 'logps/rejected': -1006.884521484375, 'logps/ref_chosen': -51.63489532470703, 'logps/ref_rejected': -104.11935424804688, 'KL/chosen_KL_mean': -530.1577758789062, 'KL/rejected_KL_mean': -902.7651977539062, 'KL/mean': -716.4615478515625, 'KL/std': 376.7371826171875, 'logits/chosen': -0.7767213582992554, 'logits/rejected': -0.8095457553863525, 'epoch': 0.68} + 68%|██████▊ | 460/681 [19:29<09:30, 2.58s/it] 68%|██████▊ | 461/681 [19:32<09:28, 2.58s/it] {'loss': 1.1054, 'grad_norm': 29.899106979370117, 'learning_rate': 1.4434543456482518e-07, 'fcm_dpo/beta': 0.001335039036348462, 'fcm_dpo/q_t': 0.4116092324256897, 'fcm_dpo/delta': 0.00526130385696888, 'fcm_dpo/margin': 295.65155029296875, 'margin_dpo/margin_mean': 295.65155029296875, 'margin_dpo/margin_std': 430.65924072265625, 'logps/chosen': -617.1226196289062, 'logps/rejected': -944.069091796875, 'logps/ref_chosen': -55.18195724487305, 'logps/ref_rejected': -86.47689819335938, 'KL/chosen_KL_mean': -561.940673828125, 'KL/rejected_KL_mean': -857.5922241210938, 'KL/mean': -709.7664794921875, 'KL/std': 384.3741149902344, 'logits/chosen': -0.7882189750671387, 'logits/rejected': -0.8021144270896912, 'epoch': 0.68} + 68%|██████▊ | 461/681 [19:32<09:28, 2.58s/it] 68%|██████▊ | 462/681 [19:35<09:19, 2.55s/it] {'loss': 1.1632, 'grad_norm': 39.60346984863281, 'learning_rate': 1.4318373944740484e-07, 'fcm_dpo/beta': 0.0013445301447063684, 'fcm_dpo/q_t': 0.42686232924461365, 'fcm_dpo/delta': 0.07435386627912521, 'fcm_dpo/margin': 244.05184936523438, 'margin_dpo/margin_mean': 244.05184936523438, 'margin_dpo/margin_std': 432.3650817871094, 'logps/chosen': -648.2914428710938, 'logps/rejected': -901.25634765625, 'logps/ref_chosen': -69.92803192138672, 'logps/ref_rejected': -78.84111022949219, 'KL/chosen_KL_mean': -578.3634033203125, 'KL/rejected_KL_mean': -822.4152221679688, 'KL/mean': -700.3892822265625, 'KL/std': 376.43658447265625, 'logits/chosen': -0.8968935012817383, 'logits/rejected': -0.8908392190933228, 'epoch': 0.68} + 68%|██████▊ | 462/681 [19:35<09:19, 2.55s/it] 68%|██████▊ | 463/681 [19:37<09:08, 2.52s/it] {'loss': 1.0814, 'grad_norm': 44.926334381103516, 'learning_rate': 1.4202485903778976e-07, 'fcm_dpo/beta': 0.0013473678845912218, 'fcm_dpo/q_t': 0.401495099067688, 'fcm_dpo/delta': -0.04382871836423874, 'fcm_dpo/margin': 327.86480712890625, 'margin_dpo/margin_mean': 327.86480712890625, 'margin_dpo/margin_std': 460.8346862792969, 'logps/chosen': -644.43017578125, 'logps/rejected': -1006.0455932617188, 'logps/ref_chosen': -55.27437210083008, 'logps/ref_rejected': -89.02497863769531, 'KL/chosen_KL_mean': -589.15576171875, 'KL/rejected_KL_mean': -917.0206298828125, 'KL/mean': -753.0881958007812, 'KL/std': 411.322021484375, 'logits/chosen': -0.8459576964378357, 'logits/rejected': -0.8583585023880005, 'epoch': 0.68} + 68%|██████▊ | 463/681 [19:37<09:08, 2.52s/it] 68%|██████▊ | 464/681 [19:39<08:50, 2.44s/it] {'loss': 0.9347, 'grad_norm': 50.258697509765625, 'learning_rate': 1.4086882387355658e-07, 'fcm_dpo/beta': 0.0012808447936549783, 'fcm_dpo/q_t': 0.35675048828125, 'fcm_dpo/delta': -0.25145474076271057, 'fcm_dpo/margin': 494.1038818359375, 'margin_dpo/margin_mean': 494.1038818359375, 'margin_dpo/margin_std': 484.71392822265625, 'logps/chosen': -628.4513549804688, 'logps/rejected': -1174.13232421875, 'logps/ref_chosen': -50.91230010986328, 'logps/ref_rejected': -102.4893798828125, 'KL/chosen_KL_mean': -577.5390625, 'KL/rejected_KL_mean': -1071.6429443359375, 'KL/mean': -824.5910034179688, 'KL/std': 482.9642333984375, 'logits/chosen': -0.8322213888168335, 'logits/rejected': -0.8996630311012268, 'epoch': 0.68} + 68%|██████▊ | 464/681 [19:39<08:50, 2.44s/it] 68%|██████▊ | 465/681 [19:42<08:55, 2.48s/it] {'loss': 1.0558, 'grad_norm': 39.39644241333008, 'learning_rate': 1.3971566441730714e-07, 'fcm_dpo/beta': 0.0012595669832080603, 'fcm_dpo/q_t': 0.3856554627418518, 'fcm_dpo/delta': -0.09877628087997437, 'fcm_dpo/margin': 392.17816162109375, 'margin_dpo/margin_mean': 392.17816162109375, 'margin_dpo/margin_std': 528.9464111328125, 'logps/chosen': -661.684814453125, 'logps/rejected': -1107.692138671875, 'logps/ref_chosen': -60.116851806640625, 'logps/ref_rejected': -113.94602966308594, 'KL/chosen_KL_mean': -601.5679321289062, 'KL/rejected_KL_mean': -993.74609375, 'KL/mean': -797.656982421875, 'KL/std': 499.0876159667969, 'logits/chosen': -0.8465057015419006, 'logits/rejected': -0.8691214323043823, 'epoch': 0.68} + 68%|██████▊ | 465/681 [19:42<08:55, 2.48s/it] 68%|██████▊ | 466/681 [19:44<09:02, 2.52s/it] {'loss': 1.0962, 'grad_norm': 38.076751708984375, 'learning_rate': 1.3856541105586545e-07, 'fcm_dpo/beta': 0.0012364451540634036, 'fcm_dpo/q_t': 0.4021310806274414, 'fcm_dpo/delta': -0.03083794191479683, 'fcm_dpo/margin': 346.8487854003906, 'margin_dpo/margin_mean': 346.84881591796875, 'margin_dpo/margin_std': 507.0184326171875, 'logps/chosen': -713.0208740234375, 'logps/rejected': -1097.26416015625, 'logps/ref_chosen': -52.920921325683594, 'logps/ref_rejected': -90.3154296875, 'KL/chosen_KL_mean': -660.0999755859375, 'KL/rejected_KL_mean': -1006.94873046875, 'KL/mean': -833.5242919921875, 'KL/std': 460.6139221191406, 'logits/chosen': -0.8880220651626587, 'logits/rejected': -0.8946011662483215, 'epoch': 0.68} + 68%|██████▊ | 466/681 [19:44<09:02, 2.52s/it] 69%|██████▊ | 467/681 [19:47<08:58, 2.51s/it] {'loss': 1.1519, 'grad_norm': 62.107765197753906, 'learning_rate': 1.3741809409947729e-07, 'fcm_dpo/beta': 0.0012187270913273096, 'fcm_dpo/q_t': 0.40229350328445435, 'fcm_dpo/delta': -0.060437288135290146, 'fcm_dpo/margin': 373.8318786621094, 'margin_dpo/margin_mean': 373.8318786621094, 'margin_dpo/margin_std': 707.5423583984375, 'logps/chosen': -902.7098388671875, 'logps/rejected': -1300.68603515625, 'logps/ref_chosen': -78.7158203125, 'logps/ref_rejected': -102.86019897460938, 'KL/chosen_KL_mean': -823.9940185546875, 'KL/rejected_KL_mean': -1197.8258056640625, 'KL/mean': -1010.909912109375, 'KL/std': 614.4384155273438, 'logits/chosen': -0.9601616859436035, 'logits/rejected': -0.9515029788017273, 'epoch': 0.69} + 69%|██████▊ | 467/681 [19:47<08:58, 2.51s/it] 69%|██████▊ | 468/681 [19:50<09:03, 2.55s/it] {'loss': 1.0136, 'grad_norm': 45.27859878540039, 'learning_rate': 1.362737437810114e-07, 'fcm_dpo/beta': 0.001194630516692996, 'fcm_dpo/q_t': 0.3789057433605194, 'fcm_dpo/delta': -0.17134541273117065, 'fcm_dpo/margin': 470.14892578125, 'margin_dpo/margin_mean': 470.14892578125, 'margin_dpo/margin_std': 618.6029663085938, 'logps/chosen': -731.9397583007812, 'logps/rejected': -1233.18212890625, 'logps/ref_chosen': -69.93536376953125, 'logps/ref_rejected': -101.02880859375, 'KL/chosen_KL_mean': -662.00439453125, 'KL/rejected_KL_mean': -1132.1533203125, 'KL/mean': -897.078857421875, 'KL/std': 548.8661499023438, 'logits/chosen': -0.9165079593658447, 'logits/rejected': -0.9324535131454468, 'epoch': 0.69} + 69%|██████▊ | 468/681 [19:50<09:03, 2.55s/it] 69%|██████▉ | 469/681 [19:52<09:16, 2.62s/it] {'loss': 1.0078, 'grad_norm': 44.61856460571289, 'learning_rate': 1.351323902551631e-07, 'fcm_dpo/beta': 0.0011510958429425955, 'fcm_dpo/q_t': 0.37772035598754883, 'fcm_dpo/delta': -0.150983989238739, 'fcm_dpo/margin': 470.13177490234375, 'margin_dpo/margin_mean': 470.13177490234375, 'margin_dpo/margin_std': 545.7459716796875, 'logps/chosen': -750.744384765625, 'logps/rejected': -1257.537841796875, 'logps/ref_chosen': -68.12469482421875, 'logps/ref_rejected': -104.78640747070312, 'KL/chosen_KL_mean': -682.61962890625, 'KL/rejected_KL_mean': -1152.75146484375, 'KL/mean': -917.685546875, 'KL/std': 480.97088623046875, 'logits/chosen': -0.9298604726791382, 'logits/rejected': -0.9497323036193848, 'epoch': 0.69} + 69%|██████▉ | 469/681 [19:52<09:16, 2.62s/it] 69%|██████▉ | 470/681 [19:55<09:08, 2.60s/it] {'loss': 1.0646, 'grad_norm': 27.795562744140625, 'learning_rate': 1.339940635976592e-07, 'fcm_dpo/beta': 0.0011422440875321627, 'fcm_dpo/q_t': 0.39377570152282715, 'fcm_dpo/delta': -0.06988134980201721, 'fcm_dpo/margin': 408.4580993652344, 'margin_dpo/margin_mean': 408.45806884765625, 'margin_dpo/margin_std': 547.7627563476562, 'logps/chosen': -628.96923828125, 'logps/rejected': -1076.3382568359375, 'logps/ref_chosen': -43.791927337646484, 'logps/ref_rejected': -82.70285034179688, 'KL/chosen_KL_mean': -585.1773071289062, 'KL/rejected_KL_mean': -993.6353759765625, 'KL/mean': -789.4063720703125, 'KL/std': 501.39984130859375, 'logits/chosen': -0.9022126197814941, 'logits/rejected': -0.9164737462997437, 'epoch': 0.69} + 69%|██████▉ | 470/681 [19:55<09:08, 2.60s/it] 69%|██████▉ | 471/681 [19:57<08:48, 2.52s/it] {'loss': 1.1312, 'grad_norm': 53.5421142578125, 'learning_rate': 1.3285879380446563e-07, 'fcm_dpo/beta': 0.0011305524967610836, 'fcm_dpo/q_t': 0.41612815856933594, 'fcm_dpo/delta': 0.019698694348335266, 'fcm_dpo/margin': 336.62200927734375, 'margin_dpo/margin_mean': 336.62200927734375, 'margin_dpo/margin_std': 550.3225708007812, 'logps/chosen': -774.2908935546875, 'logps/rejected': -1131.183837890625, 'logps/ref_chosen': -63.33952331542969, 'logps/ref_rejected': -83.61048126220703, 'KL/chosen_KL_mean': -710.951416015625, 'KL/rejected_KL_mean': -1047.5733642578125, 'KL/mean': -879.2623291015625, 'KL/std': 499.705810546875, 'logits/chosen': -0.9983842968940735, 'logits/rejected': -1.0077528953552246, 'epoch': 0.69} + 69%|██████▉ | 471/681 [19:57<08:48, 2.52s/it] 69%|██████▉ | 472/681 [20:00<09:01, 2.59s/it] {'loss': 1.1093, 'grad_norm': 36.24667739868164, 'learning_rate': 1.317266107909975e-07, 'fcm_dpo/beta': 0.0011264560744166374, 'fcm_dpo/q_t': 0.40537628531455994, 'fcm_dpo/delta': -0.04338788241147995, 'fcm_dpo/margin': 391.5537109375, 'margin_dpo/margin_mean': 391.5537109375, 'margin_dpo/margin_std': 631.884033203125, 'logps/chosen': -774.8431396484375, 'logps/rejected': -1199.93994140625, 'logps/ref_chosen': -83.66610717773438, 'logps/ref_rejected': -117.20919799804688, 'KL/chosen_KL_mean': -691.177001953125, 'KL/rejected_KL_mean': -1082.730712890625, 'KL/mean': -886.953857421875, 'KL/std': 583.6131591796875, 'logits/chosen': -0.9884932041168213, 'logits/rejected': -0.9719465970993042, 'epoch': 0.69} + 69%|██████▉ | 472/681 [20:00<09:01, 2.59s/it] 69%|██████▉ | 473/681 [20:03<09:06, 2.63s/it] {'loss': 1.3563, 'grad_norm': 122.36241912841797, 'learning_rate': 1.3059754439133002e-07, 'fcm_dpo/beta': 0.0011280329199507833, 'fcm_dpo/q_t': 0.4554038643836975, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 185.667236328125, 'margin_dpo/margin_mean': 185.667236328125, 'margin_dpo/margin_std': 783.8460693359375, 'logps/chosen': -896.0145874023438, 'logps/rejected': -1099.3314208984375, 'logps/ref_chosen': -63.49696731567383, 'logps/ref_rejected': -81.14657592773438, 'KL/chosen_KL_mean': -832.517578125, 'KL/rejected_KL_mean': -1018.184814453125, 'KL/mean': -925.3512573242188, 'KL/std': 606.7120361328125, 'logits/chosen': -0.988477349281311, 'logits/rejected': -0.9607559442520142, 'epoch': 0.69} + 69%|██████▉ | 473/681 [20:03<09:06, 2.63s/it] 70%|██████▉ | 474/681 [20:05<09:08, 2.65s/it] {'loss': 1.1495, 'grad_norm': 37.246849060058594, 'learning_rate': 1.2947162435741277e-07, 'fcm_dpo/beta': 0.0011190182995051146, 'fcm_dpo/q_t': 0.4132459759712219, 'fcm_dpo/delta': -0.08055973798036575, 'fcm_dpo/margin': 338.5450134277344, 'margin_dpo/margin_mean': 338.5450439453125, 'margin_dpo/margin_std': 580.7293701171875, 'logps/chosen': -707.3243408203125, 'logps/rejected': -1083.337890625, 'logps/ref_chosen': -52.6119384765625, 'logps/ref_rejected': -90.08041381835938, 'KL/chosen_KL_mean': -654.71240234375, 'KL/rejected_KL_mean': -993.2574462890625, 'KL/mean': -823.9849853515625, 'KL/std': 526.5281982421875, 'logits/chosen': -0.92474365234375, 'logits/rejected': -0.9312121868133545, 'epoch': 0.7} + 70%|██████▉ | 474/681 [20:05<09:08, 2.65s/it] 70%|██████▉ | 475/681 [20:08<08:54, 2.60s/it] {'loss': 1.0281, 'grad_norm': 51.85737991333008, 'learning_rate': 1.2834888035828596e-07, 'fcm_dpo/beta': 0.0010959157953038812, 'fcm_dpo/q_t': 0.39104607701301575, 'fcm_dpo/delta': -0.0721563771367073, 'fcm_dpo/margin': 427.5458679199219, 'margin_dpo/margin_mean': 427.5458984375, 'margin_dpo/margin_std': 449.01556396484375, 'logps/chosen': -507.7729797363281, 'logps/rejected': -982.8865966796875, 'logps/ref_chosen': -42.49519348144531, 'logps/ref_rejected': -90.06294250488281, 'KL/chosen_KL_mean': -465.27777099609375, 'KL/rejected_KL_mean': -892.8236083984375, 'KL/mean': -679.0506591796875, 'KL/std': 421.060546875, 'logits/chosen': -0.9467837810516357, 'logits/rejected': -0.977871835231781, 'epoch': 0.7} + 70%|██████▉ | 475/681 [20:08<08:54, 2.60s/it] 70%|██████▉ | 476/681 [20:10<08:49, 2.58s/it] {'loss': 1.1116, 'grad_norm': 79.2264175415039, 'learning_rate': 1.2722934197929802e-07, 'fcm_dpo/beta': 0.0010992654133588076, 'fcm_dpo/q_t': 0.41674578189849854, 'fcm_dpo/delta': 0.03918338194489479, 'fcm_dpo/margin': 329.5415344238281, 'margin_dpo/margin_mean': 329.54150390625, 'margin_dpo/margin_std': 444.04339599609375, 'logps/chosen': -617.3216552734375, 'logps/rejected': -977.6240234375, 'logps/ref_chosen': -42.94938278198242, 'logps/ref_rejected': -73.71023559570312, 'KL/chosen_KL_mean': -574.3722534179688, 'KL/rejected_KL_mean': -903.913818359375, 'KL/mean': -739.1430053710938, 'KL/std': 451.5216064453125, 'logits/chosen': -0.9211336970329285, 'logits/rejected': -0.9354342222213745, 'epoch': 0.7} + 70%|██████▉ | 476/681 [20:10<08:49, 2.58s/it] 70%|███████ | 477/681 [20:13<08:47, 2.58s/it] {'loss': 1.1286, 'grad_norm': 36.04801940917969, 'learning_rate': 1.2611303872132631e-07, 'fcm_dpo/beta': 0.0011070938780903816, 'fcm_dpo/q_t': 0.4111800789833069, 'fcm_dpo/delta': 0.009110800921916962, 'fcm_dpo/margin': 353.25738525390625, 'margin_dpo/margin_mean': 353.2573547363281, 'margin_dpo/margin_std': 582.523193359375, 'logps/chosen': -672.7725219726562, 'logps/rejected': -1031.3946533203125, 'logps/ref_chosen': -70.77261352539062, 'logps/ref_rejected': -76.13737487792969, 'KL/chosen_KL_mean': -601.9998779296875, 'KL/rejected_KL_mean': -955.2572021484375, 'KL/mean': -778.6286010742188, 'KL/std': 492.07171630859375, 'logits/chosen': -0.9658732414245605, 'logits/rejected': -0.9317635297775269, 'epoch': 0.7} + 70%|███████ | 477/681 [20:13<08:47, 2.58s/it] 70%|███████ | 478/681 [20:16<08:49, 2.61s/it] {'loss': 1.0694, 'grad_norm': 55.68071365356445, 'learning_rate': 1.2500000000000005e-07, 'fcm_dpo/beta': 0.0011018933728337288, 'fcm_dpo/q_t': 0.40104353427886963, 'fcm_dpo/delta': -0.0336417555809021, 'fcm_dpo/margin': 392.1885986328125, 'margin_dpo/margin_mean': 392.1885986328125, 'margin_dpo/margin_std': 496.35369873046875, 'logps/chosen': -526.66064453125, 'logps/rejected': -962.7706298828125, 'logps/ref_chosen': -41.440513610839844, 'logps/ref_rejected': -85.36196899414062, 'KL/chosen_KL_mean': -485.2200927734375, 'KL/rejected_KL_mean': -877.40869140625, 'KL/mean': -681.3143920898438, 'KL/std': 427.36639404296875, 'logits/chosen': -0.8429279327392578, 'logits/rejected': -0.8667222857475281, 'epoch': 0.7} + 70%|███████ | 478/681 [20:16<08:49, 2.61s/it] 70%|███████ | 479/681 [20:18<08:35, 2.55s/it] {'loss': 1.1182, 'grad_norm': 31.531246185302734, 'learning_rate': 1.2389025514492456e-07, 'fcm_dpo/beta': 0.0011019103694707155, 'fcm_dpo/q_t': 0.4086768627166748, 'fcm_dpo/delta': -0.015925616025924683, 'fcm_dpo/margin': 376.192626953125, 'margin_dpo/margin_mean': 376.192626953125, 'margin_dpo/margin_std': 613.9232177734375, 'logps/chosen': -700.1594848632812, 'logps/rejected': -1117.560546875, 'logps/ref_chosen': -53.907920837402344, 'logps/ref_rejected': -95.1163330078125, 'KL/chosen_KL_mean': -646.2515869140625, 'KL/rejected_KL_mean': -1022.4442138671875, 'KL/mean': -834.347900390625, 'KL/std': 526.5473022460938, 'logits/chosen': -0.8888028860092163, 'logits/rejected': -0.9207658767700195, 'epoch': 0.7} + 70%|███████ | 479/681 [20:18<08:35, 2.55s/it] 70%|███████ | 480/681 [20:20<08:24, 2.51s/it] {'loss': 1.1792, 'grad_norm': 53.24702453613281, 'learning_rate': 1.227838333989088e-07, 'fcm_dpo/beta': 0.0010890522971749306, 'fcm_dpo/q_t': 0.42761245369911194, 'fcm_dpo/delta': -0.04379244148731232, 'fcm_dpo/margin': 296.2763366699219, 'margin_dpo/margin_mean': 296.27630615234375, 'margin_dpo/margin_std': 538.973876953125, 'logps/chosen': -825.3011474609375, 'logps/rejected': -1145.827392578125, 'logps/ref_chosen': -58.682701110839844, 'logps/ref_rejected': -82.93248748779297, 'KL/chosen_KL_mean': -766.6184692382812, 'KL/rejected_KL_mean': -1062.894775390625, 'KL/mean': -914.756591796875, 'KL/std': 496.72235107421875, 'logits/chosen': -0.9728115797042847, 'logits/rejected': -0.9693245887756348, 'epoch': 0.7} + 70%|███████ | 480/681 [20:21<08:24, 2.51s/it] 71%|███████ | 481/681 [20:23<08:20, 2.50s/it] {'loss': 1.0295, 'grad_norm': 51.05171203613281, 'learning_rate': 1.2168076391719489e-07, 'fcm_dpo/beta': 0.0010695490054786205, 'fcm_dpo/q_t': 0.38825610280036926, 'fcm_dpo/delta': -0.09937489032745361, 'fcm_dpo/margin': 462.38153076171875, 'margin_dpo/margin_mean': 462.38153076171875, 'margin_dpo/margin_std': 550.9169311523438, 'logps/chosen': -690.2264404296875, 'logps/rejected': -1190.0640869140625, 'logps/ref_chosen': -54.964271545410156, 'logps/ref_rejected': -92.42044067382812, 'KL/chosen_KL_mean': -635.2621459960938, 'KL/rejected_KL_mean': -1097.6435546875, 'KL/mean': -866.452880859375, 'KL/std': 512.14208984375, 'logits/chosen': -0.9634227752685547, 'logits/rejected': -0.9947628974914551, 'epoch': 0.71} + 71%|███████ | 481/681 [20:23<08:20, 2.50s/it] 71%|███████ | 482/681 [20:26<08:34, 2.59s/it] {'loss': 1.2719, 'grad_norm': 65.0921859741211, 'learning_rate': 1.2058107576668938e-07, 'fcm_dpo/beta': 0.001071346690878272, 'fcm_dpo/q_t': 0.4453909397125244, 'fcm_dpo/delta': 0.07841724902391434, 'fcm_dpo/margin': 213.99391174316406, 'margin_dpo/margin_mean': 213.993896484375, 'margin_dpo/margin_std': 620.146484375, 'logps/chosen': -780.3325805664062, 'logps/rejected': -1014.362548828125, 'logps/ref_chosen': -67.553466796875, 'logps/ref_rejected': -87.58953857421875, 'KL/chosen_KL_mean': -712.7791137695312, 'KL/rejected_KL_mean': -926.7730102539062, 'KL/mean': -819.776123046875, 'KL/std': 536.703857421875, 'logits/chosen': -0.8759874105453491, 'logits/rejected': -0.8658995032310486, 'epoch': 0.71} + 71%|███████ | 482/681 [20:26<08:34, 2.59s/it] 71%|███████ | 483/681 [20:28<08:33, 2.59s/it] {'loss': 1.0269, 'grad_norm': 32.96552276611328, 'learning_rate': 1.194847979251979e-07, 'fcm_dpo/beta': 0.0010598013177514076, 'fcm_dpo/q_t': 0.38709717988967896, 'fcm_dpo/delta': -0.11290125548839569, 'fcm_dpo/margin': 478.30712890625, 'margin_dpo/margin_mean': 478.30712890625, 'margin_dpo/margin_std': 586.1543579101562, 'logps/chosen': -709.5712890625, 'logps/rejected': -1220.3355712890625, 'logps/ref_chosen': -63.32981872558594, 'logps/ref_rejected': -95.78697204589844, 'KL/chosen_KL_mean': -646.241455078125, 'KL/rejected_KL_mean': -1124.548583984375, 'KL/mean': -885.39501953125, 'KL/std': 534.4652099609375, 'logits/chosen': -0.9770244359970093, 'logits/rejected': -0.9915624856948853, 'epoch': 0.71} + 71%|███████ | 483/681 [20:28<08:33, 2.59s/it] 71%|███████ | 484/681 [20:31<08:15, 2.52s/it] {'loss': 1.0446, 'grad_norm': 53.46791458129883, 'learning_rate': 1.1839195928066101e-07, 'fcm_dpo/beta': 0.001048530451953411, 'fcm_dpo/q_t': 0.39412179589271545, 'fcm_dpo/delta': -0.07025562971830368, 'fcm_dpo/margin': 445.3449401855469, 'margin_dpo/margin_mean': 445.3449401855469, 'margin_dpo/margin_std': 531.8111572265625, 'logps/chosen': -612.5665283203125, 'logps/rejected': -1083.144775390625, 'logps/ref_chosen': -59.13812255859375, 'logps/ref_rejected': -84.37144470214844, 'KL/chosen_KL_mean': -553.4284057617188, 'KL/rejected_KL_mean': -998.7734375, 'KL/mean': -776.1009521484375, 'KL/std': 526.0048828125, 'logits/chosen': -0.9660162329673767, 'logits/rejected': -0.993166983127594, 'epoch': 0.71} + 71%|███████ | 484/681 [20:31<08:15, 2.52s/it] 71%|███████ | 485/681 [20:33<08:09, 2.50s/it] {'loss': 1.0789, 'grad_norm': 34.87718200683594, 'learning_rate': 1.1730258863039347e-07, 'fcm_dpo/beta': 0.001037056790664792, 'fcm_dpo/q_t': 0.40099409222602844, 'fcm_dpo/delta': -0.043720267713069916, 'fcm_dpo/margin': 425.9684753417969, 'margin_dpo/margin_mean': 425.968505859375, 'margin_dpo/margin_std': 593.173583984375, 'logps/chosen': -646.9246826171875, 'logps/rejected': -1117.40771484375, 'logps/ref_chosen': -58.849571228027344, 'logps/ref_rejected': -103.36408233642578, 'KL/chosen_KL_mean': -588.0751342773438, 'KL/rejected_KL_mean': -1014.043701171875, 'KL/mean': -801.0594482421875, 'KL/std': 530.8162841796875, 'logits/chosen': -0.8909621238708496, 'logits/rejected': -0.9123867750167847, 'epoch': 0.71} + 71%|███████ | 485/681 [20:33<08:09, 2.50s/it] 71%|███████▏ | 486/681 [20:35<07:41, 2.37s/it] {'loss': 1.0787, 'grad_norm': 46.1490364074707, 'learning_rate': 1.1621671468032493e-07, 'fcm_dpo/beta': 0.0010174668859690428, 'fcm_dpo/q_t': 0.39317959547042847, 'fcm_dpo/delta': -0.08924780786037445, 'fcm_dpo/margin': 476.5207824707031, 'margin_dpo/margin_mean': 476.5207824707031, 'margin_dpo/margin_std': 722.4910278320312, 'logps/chosen': -746.310546875, 'logps/rejected': -1259.7109375, 'logps/ref_chosen': -55.25966262817383, 'logps/ref_rejected': -92.13936614990234, 'KL/chosen_KL_mean': -691.0508422851562, 'KL/rejected_KL_mean': -1167.5716552734375, 'KL/mean': -929.311279296875, 'KL/std': 593.0984497070312, 'logits/chosen': -0.9853001832962036, 'logits/rejected': -0.999464750289917, 'epoch': 0.71} + 71%|███████▏ | 486/681 [20:35<07:41, 2.37s/it] 72%|███████▏ | 487/681 [20:38<07:56, 2.46s/it] {'loss': 1.136, 'grad_norm': 39.361175537109375, 'learning_rate': 1.1513436604424378e-07, 'fcm_dpo/beta': 0.0010237495880573988, 'fcm_dpo/q_t': 0.4159180819988251, 'fcm_dpo/delta': 0.03809621185064316, 'fcm_dpo/margin': 354.3516845703125, 'margin_dpo/margin_mean': 354.3516845703125, 'margin_dpo/margin_std': 571.5254516601562, 'logps/chosen': -799.0213623046875, 'logps/rejected': -1192.728515625, 'logps/ref_chosen': -53.06330871582031, 'logps/ref_rejected': -92.41883087158203, 'KL/chosen_KL_mean': -745.9581298828125, 'KL/rejected_KL_mean': -1100.3096923828125, 'KL/mean': -923.1339111328125, 'KL/std': 566.0206298828125, 'logits/chosen': -1.0012881755828857, 'logits/rejected': -1.0079126358032227, 'epoch': 0.72} + 72%|███████▏ | 487/681 [20:38<07:56, 2.46s/it] 72%|███████▏ | 488/681 [20:40<08:03, 2.51s/it] {'loss': 1.1032, 'grad_norm': 33.7827033996582, 'learning_rate': 1.1405557124304335e-07, 'fcm_dpo/beta': 0.0010283133015036583, 'fcm_dpo/q_t': 0.41300255060195923, 'fcm_dpo/delta': 0.019743794575333595, 'fcm_dpo/margin': 370.1629943847656, 'margin_dpo/margin_mean': 370.16302490234375, 'margin_dpo/margin_std': 498.775390625, 'logps/chosen': -681.3538818359375, 'logps/rejected': -1083.2952880859375, 'logps/ref_chosen': -52.22815704345703, 'logps/ref_rejected': -84.00656127929688, 'KL/chosen_KL_mean': -629.125732421875, 'KL/rejected_KL_mean': -999.2886962890625, 'KL/mean': -814.207275390625, 'KL/std': 508.64453125, 'logits/chosen': -0.9368076324462891, 'logits/rejected': -0.9440141916275024, 'epoch': 0.72} + 72%|███████▏ | 488/681 [20:41<08:03, 2.51s/it] 72%|███████▏ | 489/681 [20:43<07:59, 2.49s/it] {'loss': 1.1184, 'grad_norm': 27.7478084564209, 'learning_rate': 1.1298035870396985e-07, 'fcm_dpo/beta': 0.0010333817917853594, 'fcm_dpo/q_t': 0.4154972434043884, 'fcm_dpo/delta': 0.022133469581604004, 'fcm_dpo/margin': 366.0002136230469, 'margin_dpo/margin_mean': 366.00018310546875, 'margin_dpo/margin_std': 550.1982421875, 'logps/chosen': -631.0543212890625, 'logps/rejected': -1020.4630126953125, 'logps/ref_chosen': -55.989627838134766, 'logps/ref_rejected': -79.39812469482422, 'KL/chosen_KL_mean': -575.0646362304688, 'KL/rejected_KL_mean': -941.0648803710938, 'KL/mean': -758.0647583007812, 'KL/std': 495.6026611328125, 'logits/chosen': -0.9634197354316711, 'logits/rejected': -0.9643290042877197, 'epoch': 0.72} + 72%|███████▏ | 489/681 [20:43<07:59, 2.49s/it] 72%|███████▏ | 490/681 [20:46<08:09, 2.57s/it] {'loss': 1.1483, 'grad_norm': 40.58791732788086, 'learning_rate': 1.1190875675987355e-07, 'fcm_dpo/beta': 0.001032583648338914, 'fcm_dpo/q_t': 0.41282522678375244, 'fcm_dpo/delta': 0.00223751924932003, 'fcm_dpo/margin': 385.1979064941406, 'margin_dpo/margin_mean': 385.19793701171875, 'margin_dpo/margin_std': 706.33056640625, 'logps/chosen': -727.138671875, 'logps/rejected': -1170.379150390625, 'logps/ref_chosen': -52.36639404296875, 'logps/ref_rejected': -110.4090576171875, 'KL/chosen_KL_mean': -674.772216796875, 'KL/rejected_KL_mean': -1059.97021484375, 'KL/mean': -867.3712158203125, 'KL/std': 602.034912109375, 'logits/chosen': -0.9857407808303833, 'logits/rejected': -1.026228427886963, 'epoch': 0.72} + 72%|███████▏ | 490/681 [20:46<08:09, 2.57s/it] 72%|███████▏ | 491/681 [20:48<08:01, 2.53s/it] {'loss': 1.1963, 'grad_norm': 33.82534408569336, 'learning_rate': 1.1084079364846241e-07, 'fcm_dpo/beta': 0.0010518557392060757, 'fcm_dpo/q_t': 0.4379756450653076, 'fcm_dpo/delta': 0.13058799505233765, 'fcm_dpo/margin': 259.36181640625, 'margin_dpo/margin_mean': 259.3618469238281, 'margin_dpo/margin_std': 497.3216552734375, 'logps/chosen': -695.0809936523438, 'logps/rejected': -967.599365234375, 'logps/ref_chosen': -60.11626434326172, 'logps/ref_rejected': -73.27278900146484, 'KL/chosen_KL_mean': -634.9647216796875, 'KL/rejected_KL_mean': -894.3265380859375, 'KL/mean': -764.6456298828125, 'KL/std': 501.4613952636719, 'logits/chosen': -0.9741504192352295, 'logits/rejected': -0.9680135250091553, 'epoch': 0.72} + 72%|███████▏ | 491/681 [20:48<08:01, 2.53s/it] 72%|███████▏ | 492/681 [20:51<08:02, 2.55s/it] {'loss': 1.23, 'grad_norm': 44.77201843261719, 'learning_rate': 1.097764975115576e-07, 'fcm_dpo/beta': 0.0010776289273053408, 'fcm_dpo/q_t': 0.44000089168548584, 'fcm_dpo/delta': 0.13482382893562317, 'fcm_dpo/margin': 249.57763671875, 'margin_dpo/margin_mean': 249.57763671875, 'margin_dpo/margin_std': 586.6539306640625, 'logps/chosen': -717.174560546875, 'logps/rejected': -985.4176635742188, 'logps/ref_chosen': -53.994178771972656, 'logps/ref_rejected': -72.65962219238281, 'KL/chosen_KL_mean': -663.180419921875, 'KL/rejected_KL_mean': -912.758056640625, 'KL/mean': -787.96923828125, 'KL/std': 512.7420043945312, 'logits/chosen': -1.0639835596084595, 'logits/rejected': -1.0435137748718262, 'epoch': 0.72} + 72%|███████▏ | 492/681 [20:51<08:02, 2.55s/it] 72%|███████▏ | 493/681 [20:53<08:08, 2.60s/it] {'loss': 1.1701, 'grad_norm': 34.14286804199219, 'learning_rate': 1.0871589639435203e-07, 'fcm_dpo/beta': 0.0010814403649419546, 'fcm_dpo/q_t': 0.4249815344810486, 'fcm_dpo/delta': -0.023665668442845345, 'fcm_dpo/margin': 295.9699401855469, 'margin_dpo/margin_mean': 295.9699401855469, 'margin_dpo/margin_std': 527.4650268554688, 'logps/chosen': -761.8388061523438, 'logps/rejected': -1069.634521484375, 'logps/ref_chosen': -75.49723815917969, 'logps/ref_rejected': -87.32301330566406, 'KL/chosen_KL_mean': -686.341552734375, 'KL/rejected_KL_mean': -982.3115234375, 'KL/mean': -834.3265380859375, 'KL/std': 570.59423828125, 'logits/chosen': -1.0923945903778076, 'logits/rejected': -1.0642774105072021, 'epoch': 0.72} + 72%|███████▏ | 493/681 [20:53<08:08, 2.60s/it] 73%|███████▎ | 494/681 [20:56<08:06, 2.60s/it] {'loss': 1.022, 'grad_norm': 52.453407287597656, 'learning_rate': 1.0765901824467166e-07, 'fcm_dpo/beta': 0.0010658178944140673, 'fcm_dpo/q_t': 0.3873167037963867, 'fcm_dpo/delta': -0.09456932544708252, 'fcm_dpo/margin': 459.6351318359375, 'margin_dpo/margin_mean': 459.6351318359375, 'margin_dpo/margin_std': 506.0677490234375, 'logps/chosen': -574.34326171875, 'logps/rejected': -1078.71044921875, 'logps/ref_chosen': -41.35926818847656, 'logps/ref_rejected': -86.09136962890625, 'KL/chosen_KL_mean': -532.9840087890625, 'KL/rejected_KL_mean': -992.619140625, 'KL/mean': -762.801513671875, 'KL/std': 505.04736328125, 'logits/chosen': -0.9276965856552124, 'logits/rejected': -0.9675771594047546, 'epoch': 0.73} + 73%|███████▎ | 494/681 [20:56<08:06, 2.60s/it] 73%|███████▎ | 495/681 [20:59<08:14, 2.66s/it] {'loss': 1.1072, 'grad_norm': 33.18449020385742, 'learning_rate': 1.0660589091223854e-07, 'fcm_dpo/beta': 0.0010593379847705364, 'fcm_dpo/q_t': 0.40819650888442993, 'fcm_dpo/delta': -0.016908157616853714, 'fcm_dpo/margin': 392.8917236328125, 'margin_dpo/margin_mean': 392.8917236328125, 'margin_dpo/margin_std': 618.4354248046875, 'logps/chosen': -665.22607421875, 'logps/rejected': -1086.0072021484375, 'logps/ref_chosen': -63.53507995605469, 'logps/ref_rejected': -91.42443084716797, 'KL/chosen_KL_mean': -601.6910400390625, 'KL/rejected_KL_mean': -994.582763671875, 'KL/mean': -798.136962890625, 'KL/std': 516.9666137695312, 'logits/chosen': -1.020897388458252, 'logits/rejected': -1.0267536640167236, 'epoch': 0.73} + 73%|███████▎ | 495/681 [20:59<08:14, 2.66s/it] 73%|███████▎ | 496/681 [21:02<08:12, 2.66s/it] {'loss': 1.2514, 'grad_norm': 70.50399017333984, 'learning_rate': 1.0555654214793722e-07, 'fcm_dpo/beta': 0.0010575959458947182, 'fcm_dpo/q_t': 0.4504912197589874, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 197.38121032714844, 'margin_dpo/margin_mean': 197.3812255859375, 'margin_dpo/margin_std': 476.3318176269531, 'logps/chosen': -816.9910888671875, 'logps/rejected': -1026.109619140625, 'logps/ref_chosen': -72.5919189453125, 'logps/ref_rejected': -84.32933807373047, 'KL/chosen_KL_mean': -744.3991088867188, 'KL/rejected_KL_mean': -941.7803344726562, 'KL/mean': -843.0897216796875, 'KL/std': 415.9389343261719, 'logits/chosen': -0.9933898448944092, 'logits/rejected': -0.957461953163147, 'epoch': 0.73} + 73%|███████▎ | 496/681 [21:02<08:12, 2.66s/it] 73%|███████▎ | 497/681 [21:04<08:06, 2.64s/it] {'loss': 1.2436, 'grad_norm': 35.43773651123047, 'learning_rate': 1.0451099960308374e-07, 'fcm_dpo/beta': 0.0010635224170982838, 'fcm_dpo/q_t': 0.44891273975372314, 'fcm_dpo/delta': 0.02794058434665203, 'fcm_dpo/margin': 207.71685791015625, 'margin_dpo/margin_mean': 207.7168731689453, 'margin_dpo/margin_std': 479.673095703125, 'logps/chosen': -762.9359130859375, 'logps/rejected': -988.34716796875, 'logps/ref_chosen': -58.59397506713867, 'logps/ref_rejected': -76.28836822509766, 'KL/chosen_KL_mean': -704.3419189453125, 'KL/rejected_KL_mean': -912.058837890625, 'KL/mean': -808.200439453125, 'KL/std': 518.0106201171875, 'logits/chosen': -0.9681912660598755, 'logits/rejected': -0.9516055583953857, 'epoch': 0.73} + 73%|███████▎ | 497/681 [21:04<08:06, 2.64s/it] 73%|███████▎ | 498/681 [21:07<08:08, 2.67s/it] {'loss': 1.145, 'grad_norm': 37.042057037353516, 'learning_rate': 1.0346929082869641e-07, 'fcm_dpo/beta': 0.0010668218601495028, 'fcm_dpo/q_t': 0.4150667190551758, 'fcm_dpo/delta': 0.024778790771961212, 'fcm_dpo/margin': 352.5992126464844, 'margin_dpo/margin_mean': 352.5992126464844, 'margin_dpo/margin_std': 618.8363647460938, 'logps/chosen': -738.2352294921875, 'logps/rejected': -1103.586669921875, 'logps/ref_chosen': -71.20565795898438, 'logps/ref_rejected': -83.95803833007812, 'KL/chosen_KL_mean': -667.029541015625, 'KL/rejected_KL_mean': -1019.628662109375, 'KL/mean': -843.3291015625, 'KL/std': 551.8720703125, 'logits/chosen': -0.9754823446273804, 'logits/rejected': -0.9640058279037476, 'epoch': 0.73} + 73%|███████▎ | 498/681 [21:07<08:08, 2.67s/it] 73%|███████▎ | 499/681 [21:09<07:57, 2.62s/it] {'loss': 1.0651, 'grad_norm': 41.18540573120117, 'learning_rate': 1.0243144327477013e-07, 'fcm_dpo/beta': 0.0010582150425761938, 'fcm_dpo/q_t': 0.39665037393569946, 'fcm_dpo/delta': -0.058853406459093094, 'fcm_dpo/margin': 430.98388671875, 'margin_dpo/margin_mean': 430.98388671875, 'margin_dpo/margin_std': 575.690673828125, 'logps/chosen': -624.9869384765625, 'logps/rejected': -1105.79443359375, 'logps/ref_chosen': -51.25519561767578, 'logps/ref_rejected': -101.07870483398438, 'KL/chosen_KL_mean': -573.7318115234375, 'KL/rejected_KL_mean': -1004.7156982421875, 'KL/mean': -789.2236938476562, 'KL/std': 525.0882568359375, 'logits/chosen': -1.0105154514312744, 'logits/rejected': -1.0502002239227295, 'epoch': 0.73} + 73%|███████▎ | 499/681 [21:09<07:57, 2.62s/it] 73%|███████▎ | 500/681 [21:12<07:44, 2.57s/it] {'loss': 1.1312, 'grad_norm': 31.199586868286133, 'learning_rate': 1.0139748428955333e-07, 'fcm_dpo/beta': 0.001059262314811349, 'fcm_dpo/q_t': 0.4146909713745117, 'fcm_dpo/delta': 0.02782963030040264, 'fcm_dpo/margin': 352.3294677734375, 'margin_dpo/margin_mean': 352.3294677734375, 'margin_dpo/margin_std': 558.6328125, 'logps/chosen': -736.72314453125, 'logps/rejected': -1125.9593505859375, 'logps/ref_chosen': -57.027442932128906, 'logps/ref_rejected': -93.93421173095703, 'KL/chosen_KL_mean': -679.6956787109375, 'KL/rejected_KL_mean': -1032.025146484375, 'KL/mean': -855.8604125976562, 'KL/std': 456.21795654296875, 'logits/chosen': -0.979456901550293, 'logits/rejected': -1.0102999210357666, 'epoch': 0.73} + 73%|███████▎ | 500/681 [21:12<07:44, 2.57s/it] 74%|███████▎ | 501/681 [21:14<07:40, 2.56s/it] {'loss': 1.1161, 'grad_norm': 32.033573150634766, 'learning_rate': 1.0036744111882672e-07, 'fcm_dpo/beta': 0.001063595525920391, 'fcm_dpo/q_t': 0.4093387722969055, 'fcm_dpo/delta': -0.004017947241663933, 'fcm_dpo/margin': 379.6474304199219, 'margin_dpo/margin_mean': 379.64739990234375, 'margin_dpo/margin_std': 597.58349609375, 'logps/chosen': -648.205078125, 'logps/rejected': -1053.649658203125, 'logps/ref_chosen': -54.359527587890625, 'logps/ref_rejected': -80.15670013427734, 'KL/chosen_KL_mean': -593.8455810546875, 'KL/rejected_KL_mean': -973.4930419921875, 'KL/mean': -783.6693115234375, 'KL/std': 487.61322021484375, 'logits/chosen': -0.9334768056869507, 'logits/rejected': -0.9198960065841675, 'epoch': 0.74} + 74%|███████▎ | 501/681 [21:14<07:40, 2.56s/it] 74%|███████▎ | 502/681 [21:17<07:40, 2.57s/it] {'loss': 1.0855, 'grad_norm': 33.64237594604492, 'learning_rate': 9.934134090518592e-08, 'fcm_dpo/beta': 0.0010629099560901523, 'fcm_dpo/q_t': 0.4092276096343994, 'fcm_dpo/delta': 0.003547299187630415, 'fcm_dpo/margin': 373.10076904296875, 'margin_dpo/margin_mean': 373.1007995605469, 'margin_dpo/margin_std': 468.3107604980469, 'logps/chosen': -610.1109008789062, 'logps/rejected': -998.5599365234375, 'logps/ref_chosen': -67.60050964355469, 'logps/ref_rejected': -82.94876098632812, 'KL/chosen_KL_mean': -542.5103759765625, 'KL/rejected_KL_mean': -915.6112060546875, 'KL/mean': -729.060791015625, 'KL/std': 439.54412841796875, 'logits/chosen': -0.8644669055938721, 'logits/rejected': -0.8459637761116028, 'epoch': 0.74} + 74%|███████▎ | 502/681 [21:17<07:40, 2.57s/it] 74%|███████▍ | 503/681 [21:20<07:45, 2.62s/it] {'loss': 1.1087, 'grad_norm': 27.046812057495117, 'learning_rate': 9.831921068732571e-08, 'fcm_dpo/beta': 0.0010641318513080478, 'fcm_dpo/q_t': 0.41630834341049194, 'fcm_dpo/delta': 0.03300508111715317, 'fcm_dpo/margin': 345.9414367675781, 'margin_dpo/margin_mean': 345.94140625, 'margin_dpo/margin_std': 465.1625671386719, 'logps/chosen': -592.337646484375, 'logps/rejected': -965.7060546875, 'logps/ref_chosen': -55.078407287597656, 'logps/ref_rejected': -82.50544738769531, 'KL/chosen_KL_mean': -537.25927734375, 'KL/rejected_KL_mean': -883.2006225585938, 'KL/mean': -710.22998046875, 'KL/std': 428.82354736328125, 'logits/chosen': -0.8558259010314941, 'logits/rejected': -0.8414930105209351, 'epoch': 0.74} + 74%|███████▍ | 503/681 [21:20<07:45, 2.62s/it] 74%|███████▍ | 504/681 [21:22<07:41, 2.61s/it] {'loss': 1.0722, 'grad_norm': 29.352018356323242, 'learning_rate': 9.730107739932805e-08, 'fcm_dpo/beta': 0.0010626555886119604, 'fcm_dpo/q_t': 0.3984360992908478, 'fcm_dpo/delta': -0.052036985754966736, 'fcm_dpo/margin': 423.208984375, 'margin_dpo/margin_mean': 423.208984375, 'margin_dpo/margin_std': 561.8984375, 'logps/chosen': -651.0655517578125, 'logps/rejected': -1118.0709228515625, 'logps/ref_chosen': -59.96575164794922, 'logps/ref_rejected': -103.76212310791016, 'KL/chosen_KL_mean': -591.099853515625, 'KL/rejected_KL_mean': -1014.3087768554688, 'KL/mean': -802.704345703125, 'KL/std': 516.8352661132812, 'logits/chosen': -0.917930006980896, 'logits/rejected': -0.9410198926925659, 'epoch': 0.74} + 74%|███████▍ | 504/681 [21:22<07:41, 2.61s/it] 74%|███████▍ | 505/681 [21:25<07:37, 2.60s/it] {'loss': 1.2142, 'grad_norm': 47.79065704345703, 'learning_rate': 9.628696786995188e-08, 'fcm_dpo/beta': 0.001084325835108757, 'fcm_dpo/q_t': 0.4433596134185791, 'fcm_dpo/delta': 0.1583971083164215, 'fcm_dpo/margin': 226.2662811279297, 'margin_dpo/margin_mean': 226.26626586914062, 'margin_dpo/margin_std': 447.0828857421875, 'logps/chosen': -724.6549072265625, 'logps/rejected': -963.3516235351562, 'logps/ref_chosen': -76.1549072265625, 'logps/ref_rejected': -88.58537292480469, 'KL/chosen_KL_mean': -648.5, 'KL/rejected_KL_mean': -874.7662353515625, 'KL/mean': -761.6331176757812, 'KL/std': 479.6357421875, 'logits/chosen': -0.9595932960510254, 'logits/rejected': -0.9328126907348633, 'epoch': 0.74} + 74%|███████▍ | 505/681 [21:25<07:37, 2.60s/it] 74%|███████▍ | 506/681 [21:27<07:23, 2.53s/it] {'loss': 1.0999, 'grad_norm': 31.51370620727539, 'learning_rate': 9.527690882192635e-08, 'fcm_dpo/beta': 0.0010876674205064774, 'fcm_dpo/q_t': 0.40826907753944397, 'fcm_dpo/delta': -0.0015001185238361359, 'fcm_dpo/margin': 368.80908203125, 'margin_dpo/margin_mean': 368.8091125488281, 'margin_dpo/margin_std': 518.3751220703125, 'logps/chosen': -570.1845703125, 'logps/rejected': -968.4482421875, 'logps/ref_chosen': -48.96050262451172, 'logps/ref_rejected': -78.41505432128906, 'KL/chosen_KL_mean': -521.2240600585938, 'KL/rejected_KL_mean': -890.033203125, 'KL/mean': -705.6286010742188, 'KL/std': 477.63641357421875, 'logits/chosen': -0.9328019618988037, 'logits/rejected': -0.9463713765144348, 'epoch': 0.74} + 74%|███████▍ | 506/681 [21:27<07:23, 2.53s/it] 74%|███████▍ | 507/681 [21:30<07:22, 2.54s/it] {'loss': 1.1629, 'grad_norm': 29.80331039428711, 'learning_rate': 9.427092687124691e-08, 'fcm_dpo/beta': 0.0010977927595376968, 'fcm_dpo/q_t': 0.4235384464263916, 'fcm_dpo/delta': 0.043928615748882294, 'fcm_dpo/margin': 325.8041076660156, 'margin_dpo/margin_mean': 325.80413818359375, 'margin_dpo/margin_std': 623.13720703125, 'logps/chosen': -685.7183837890625, 'logps/rejected': -1040.093994140625, 'logps/ref_chosen': -66.80149841308594, 'logps/ref_rejected': -95.37289428710938, 'KL/chosen_KL_mean': -618.9168701171875, 'KL/rejected_KL_mean': -944.7210693359375, 'KL/mean': -781.8189697265625, 'KL/std': 559.7078857421875, 'logits/chosen': -0.9519898891448975, 'logits/rejected': -0.956289529800415, 'epoch': 0.74} + 74%|███████▍ | 507/681 [21:30<07:22, 2.54s/it] 75%|███████▍ | 508/681 [21:32<07:28, 2.59s/it] {'loss': 1.2098, 'grad_norm': 37.2701416015625, 'learning_rate': 9.326904852647344e-08, 'fcm_dpo/beta': 0.0011185563635081053, 'fcm_dpo/q_t': 0.43127191066741943, 'fcm_dpo/delta': 0.0871460884809494, 'fcm_dpo/margin': 281.86297607421875, 'margin_dpo/margin_mean': 281.86297607421875, 'margin_dpo/margin_std': 628.1875610351562, 'logps/chosen': -724.9205322265625, 'logps/rejected': -1031.107666015625, 'logps/ref_chosen': -71.303466796875, 'logps/ref_rejected': -95.6275405883789, 'KL/chosen_KL_mean': -653.6170654296875, 'KL/rejected_KL_mean': -935.4800415039062, 'KL/mean': -794.548583984375, 'KL/std': 542.185546875, 'logits/chosen': -0.9200087189674377, 'logits/rejected': -0.9178076386451721, 'epoch': 0.75} + 75%|███████▍ | 508/681 [21:33<07:28, 2.59s/it] 75%|███████▍ | 509/681 [21:35<07:27, 2.60s/it] {'loss': 1.1457, 'grad_norm': 36.30656433105469, 'learning_rate': 9.227130018803195e-08, 'fcm_dpo/beta': 0.001136223552748561, 'fcm_dpo/q_t': 0.4222422242164612, 'fcm_dpo/delta': 0.058409832417964935, 'fcm_dpo/margin': 301.7255554199219, 'margin_dpo/margin_mean': 301.72552490234375, 'margin_dpo/margin_std': 484.4477844238281, 'logps/chosen': -568.8052368164062, 'logps/rejected': -889.96826171875, 'logps/ref_chosen': -63.81895065307617, 'logps/ref_rejected': -83.25643920898438, 'KL/chosen_KL_mean': -504.98626708984375, 'KL/rejected_KL_mean': -806.7117919921875, 'KL/mean': -655.8489990234375, 'KL/std': 394.4847412109375, 'logits/chosen': -0.8557006120681763, 'logits/rejected': -0.8502145409584045, 'epoch': 0.75} + 75%|███████▍ | 509/681 [21:35<07:27, 2.60s/it] 75%|███████▍ | 510/681 [21:38<07:29, 2.63s/it] {'loss': 1.0478, 'grad_norm': 40.821044921875, 'learning_rate': 9.127770814751932e-08, 'fcm_dpo/beta': 0.0011275724973529577, 'fcm_dpo/q_t': 0.3957008123397827, 'fcm_dpo/delta': -0.04966657981276512, 'fcm_dpo/margin': 396.84149169921875, 'margin_dpo/margin_mean': 396.84149169921875, 'margin_dpo/margin_std': 446.3687438964844, 'logps/chosen': -649.8004760742188, 'logps/rejected': -1097.5286865234375, 'logps/ref_chosen': -51.878448486328125, 'logps/ref_rejected': -102.7651596069336, 'KL/chosen_KL_mean': -597.9219970703125, 'KL/rejected_KL_mean': -994.7635498046875, 'KL/mean': -796.3427734375, 'KL/std': 451.759521484375, 'logits/chosen': -0.8606332540512085, 'logits/rejected': -0.8811938166618347, 'epoch': 0.75} + 75%|███████▍ | 510/681 [21:38<07:29, 2.63s/it] 75%|███████▌ | 511/681 [21:40<07:24, 2.61s/it] {'loss': 1.165, 'grad_norm': 44.68586730957031, 'learning_rate': 9.028829858700973e-08, 'fcm_dpo/beta': 0.001131793251261115, 'fcm_dpo/q_t': 0.4202183187007904, 'fcm_dpo/delta': 0.04954507201910019, 'fcm_dpo/margin': 311.153564453125, 'margin_dpo/margin_mean': 311.153564453125, 'margin_dpo/margin_std': 587.4927978515625, 'logps/chosen': -622.2727661132812, 'logps/rejected': -966.0450439453125, 'logps/ref_chosen': -60.23811721801758, 'logps/ref_rejected': -92.85676574707031, 'KL/chosen_KL_mean': -562.03466796875, 'KL/rejected_KL_mean': -873.188232421875, 'KL/mean': -717.6114501953125, 'KL/std': 484.3371887207031, 'logits/chosen': -0.9428844451904297, 'logits/rejected': -0.9506068825721741, 'epoch': 0.75} + 75%|███████▌ | 511/681 [21:40<07:24, 2.61s/it] 75%|███████▌ | 512/681 [21:43<07:04, 2.51s/it] {'loss': 1.0178, 'grad_norm': 60.86901092529297, 'learning_rate': 8.930309757836516e-08, 'fcm_dpo/beta': 0.0011163100134581327, 'fcm_dpo/q_t': 0.38778460025787354, 'fcm_dpo/delta': -0.08939085900783539, 'fcm_dpo/margin': 434.24078369140625, 'margin_dpo/margin_mean': 434.24078369140625, 'margin_dpo/margin_std': 454.16876220703125, 'logps/chosen': -503.8018798828125, 'logps/rejected': -965.0130615234375, 'logps/ref_chosen': -54.905494689941406, 'logps/ref_rejected': -81.87586975097656, 'KL/chosen_KL_mean': -448.8963623046875, 'KL/rejected_KL_mean': -883.13720703125, 'KL/mean': -666.0167846679688, 'KL/std': 444.51080322265625, 'logits/chosen': -0.8777337074279785, 'logits/rejected': -0.8991394639015198, 'epoch': 0.75} + 75%|███████▌ | 512/681 [21:43<07:04, 2.51s/it] 75%|███████▌ | 513/681 [21:45<07:12, 2.58s/it] {'loss': 1.1469, 'grad_norm': 50.96452331542969, 'learning_rate': 8.832213108254863e-08, 'fcm_dpo/beta': 0.00110536755528301, 'fcm_dpo/q_t': 0.422201544046402, 'fcm_dpo/delta': -0.03816516324877739, 'fcm_dpo/margin': 306.30194091796875, 'margin_dpo/margin_mean': 306.30194091796875, 'margin_dpo/margin_std': 481.57958984375, 'logps/chosen': -631.9314575195312, 'logps/rejected': -949.3794555664062, 'logps/ref_chosen': -64.91644287109375, 'logps/ref_rejected': -76.06245422363281, 'KL/chosen_KL_mean': -567.0150146484375, 'KL/rejected_KL_mean': -873.3170166015625, 'KL/mean': -720.166015625, 'KL/std': 418.872802734375, 'logits/chosen': -0.9502312541007996, 'logits/rejected': -0.9348673820495605, 'epoch': 0.75} + 75%|███████▌ | 513/681 [21:45<07:12, 2.58s/it] 75%|███████▌ | 514/681 [21:48<07:15, 2.61s/it] {'loss': 1.1551, 'grad_norm': 38.09934616088867, 'learning_rate': 8.734542494893954e-08, 'fcm_dpo/beta': 0.0011181586887687445, 'fcm_dpo/q_t': 0.4251614212989807, 'fcm_dpo/delta': 0.06485524028539658, 'fcm_dpo/margin': 301.52301025390625, 'margin_dpo/margin_mean': 301.52301025390625, 'margin_dpo/margin_std': 522.4912109375, 'logps/chosen': -666.2926025390625, 'logps/rejected': -972.5315551757812, 'logps/ref_chosen': -74.22957611083984, 'logps/ref_rejected': -78.945556640625, 'KL/chosen_KL_mean': -592.06298828125, 'KL/rejected_KL_mean': -893.5859985351562, 'KL/mean': -742.8245239257812, 'KL/std': 463.70751953125, 'logits/chosen': -0.8954925537109375, 'logits/rejected': -0.8867564797401428, 'epoch': 0.75} + 75%|███████▌ | 514/681 [21:48<07:15, 2.61s/it] 76%|███████▌ | 515/681 [21:51<07:11, 2.60s/it] {'loss': 1.1981, 'grad_norm': 50.51891326904297, 'learning_rate': 8.637300491465272e-08, 'fcm_dpo/beta': 0.0011437044013291597, 'fcm_dpo/q_t': 0.437407523393631, 'fcm_dpo/delta': 0.13827666640281677, 'fcm_dpo/margin': 231.9891357421875, 'margin_dpo/margin_mean': 231.9891357421875, 'margin_dpo/margin_std': 439.46319580078125, 'logps/chosen': -531.556884765625, 'logps/rejected': -800.2422485351562, 'logps/ref_chosen': -50.40156555175781, 'logps/ref_rejected': -87.09774780273438, 'KL/chosen_KL_mean': -481.15533447265625, 'KL/rejected_KL_mean': -713.14453125, 'KL/mean': -597.14990234375, 'KL/std': 382.8921813964844, 'logits/chosen': -0.8350532054901123, 'logits/rejected': -0.846880316734314, 'epoch': 0.76} + 76%|███████▌ | 515/681 [21:51<07:11, 2.60s/it] 76%|███████▌ | 516/681 [21:53<06:49, 2.48s/it] {'loss': 1.0912, 'grad_norm': 50.28904724121094, 'learning_rate': 8.540489660386064e-08, 'fcm_dpo/beta': 0.0011569425696507096, 'fcm_dpo/q_t': 0.4088793396949768, 'fcm_dpo/delta': 0.012162066996097565, 'fcm_dpo/margin': 335.4332275390625, 'margin_dpo/margin_mean': 335.4331970214844, 'margin_dpo/margin_std': 415.19305419921875, 'logps/chosen': -573.4461669921875, 'logps/rejected': -955.9521484375, 'logps/ref_chosen': -64.64956665039062, 'logps/ref_rejected': -111.72237396240234, 'KL/chosen_KL_mean': -508.79656982421875, 'KL/rejected_KL_mean': -844.229736328125, 'KL/mean': -676.51318359375, 'KL/std': 420.5047607421875, 'logits/chosen': -0.910446286201477, 'logits/rejected': -0.9385887384414673, 'epoch': 0.76} + 76%|███████▌ | 516/681 [21:53<06:49, 2.48s/it] 76%|███████▌ | 517/681 [21:55<06:47, 2.49s/it] {'loss': 1.0563, 'grad_norm': 31.643814086914062, 'learning_rate': 8.444112552711752e-08, 'fcm_dpo/beta': 0.0011394355678930879, 'fcm_dpo/q_t': 0.39602527022361755, 'fcm_dpo/delta': -0.07319086790084839, 'fcm_dpo/margin': 412.10162353515625, 'margin_dpo/margin_mean': 412.10162353515625, 'margin_dpo/margin_std': 548.390380859375, 'logps/chosen': -611.4688720703125, 'logps/rejected': -1051.739990234375, 'logps/ref_chosen': -60.913551330566406, 'logps/ref_rejected': -89.08308410644531, 'KL/chosen_KL_mean': -550.5552978515625, 'KL/rejected_KL_mean': -962.656982421875, 'KL/mean': -756.6061401367188, 'KL/std': 494.27984619140625, 'logits/chosen': -0.8593244552612305, 'logits/rejected': -0.8574497699737549, 'epoch': 0.76} + 76%|███████▌ | 517/681 [21:55<06:47, 2.49s/it] 76%|███████▌ | 518/681 [21:58<06:56, 2.55s/it] {'loss': 1.0946, 'grad_norm': 58.1636848449707, 'learning_rate': 8.348171708068747e-08, 'fcm_dpo/beta': 0.001135983387939632, 'fcm_dpo/q_t': 0.40997710824012756, 'fcm_dpo/delta': 0.015141095966100693, 'fcm_dpo/margin': 339.1503601074219, 'margin_dpo/margin_mean': 339.1503601074219, 'margin_dpo/margin_std': 427.3458557128906, 'logps/chosen': -553.9820556640625, 'logps/rejected': -920.9891357421875, 'logps/ref_chosen': -57.45589065551758, 'logps/ref_rejected': -85.31269836425781, 'KL/chosen_KL_mean': -496.5261535644531, 'KL/rejected_KL_mean': -835.676513671875, 'KL/mean': -666.101318359375, 'KL/std': 392.9124755859375, 'logits/chosen': -0.8889042139053345, 'logits/rejected': -0.904350221157074, 'epoch': 0.76} + 76%|███████▌ | 518/681 [21:58<06:56, 2.55s/it] 76%|███████▌ | 519/681 [22:01<07:14, 2.68s/it] {'loss': 1.2004, 'grad_norm': 37.28139877319336, 'learning_rate': 8.25266965458755e-08, 'fcm_dpo/beta': 0.0011437054490670562, 'fcm_dpo/q_t': 0.43695303797721863, 'fcm_dpo/delta': 0.026432547718286514, 'fcm_dpo/margin': 235.43359375, 'margin_dpo/margin_mean': 235.43359375, 'margin_dpo/margin_std': 449.8384094238281, 'logps/chosen': -584.1768798828125, 'logps/rejected': -849.9913330078125, 'logps/ref_chosen': -74.06331634521484, 'logps/ref_rejected': -104.44416809082031, 'KL/chosen_KL_mean': -510.1135559082031, 'KL/rejected_KL_mean': -745.547119140625, 'KL/mean': -627.830322265625, 'KL/std': 358.55865478515625, 'logits/chosen': -0.8758772611618042, 'logits/rejected': -0.859151303768158, 'epoch': 0.76} + 76%|███████▌ | 519/681 [22:01<07:14, 2.68s/it] 76%|███████▋ | 520/681 [22:04<07:09, 2.67s/it] {'loss': 1.1257, 'grad_norm': 45.021728515625, 'learning_rate': 8.15760890883607e-08, 'fcm_dpo/beta': 0.0011494287755340338, 'fcm_dpo/q_t': 0.4179002046585083, 'fcm_dpo/delta': 0.03775997459888458, 'fcm_dpo/margin': 316.26123046875, 'margin_dpo/margin_mean': 316.26123046875, 'margin_dpo/margin_std': 463.71783447265625, 'logps/chosen': -603.906494140625, 'logps/rejected': -949.8492431640625, 'logps/ref_chosen': -70.2998275756836, 'logps/ref_rejected': -99.98133850097656, 'KL/chosen_KL_mean': -533.606689453125, 'KL/rejected_KL_mean': -849.8678588867188, 'KL/mean': -691.7373046875, 'KL/std': 418.42547607421875, 'logits/chosen': -0.8589770793914795, 'logits/rejected': -0.8674265146255493, 'epoch': 0.76} + 76%|███████▋ | 520/681 [22:04<07:09, 2.67s/it] 77%|███████▋ | 521/681 [22:06<07:05, 2.66s/it] {'loss': 1.0938, 'grad_norm': 29.704362869262695, 'learning_rate': 8.062991975753378e-08, 'fcm_dpo/beta': 0.0011624578619375825, 'fcm_dpo/q_t': 0.4088232219219208, 'fcm_dpo/delta': 0.0017420090734958649, 'fcm_dpo/margin': 342.0847473144531, 'margin_dpo/margin_mean': 342.084716796875, 'margin_dpo/margin_std': 449.4230041503906, 'logps/chosen': -548.624755859375, 'logps/rejected': -915.84716796875, 'logps/ref_chosen': -58.14292526245117, 'logps/ref_rejected': -83.28060913085938, 'KL/chosen_KL_mean': -490.48187255859375, 'KL/rejected_KL_mean': -832.5665283203125, 'KL/mean': -661.524169921875, 'KL/std': 435.0758972167969, 'logits/chosen': -0.9206105470657349, 'logits/rejected': -0.9278547763824463, 'epoch': 0.77} + 77%|███████▋ | 521/681 [22:06<07:05, 2.66s/it] 77%|███████▋ | 522/681 [22:09<07:00, 2.64s/it] {'loss': 1.1346, 'grad_norm': 31.109495162963867, 'learning_rate': 7.968821348583643e-08, 'fcm_dpo/beta': 0.0011631404049694538, 'fcm_dpo/q_t': 0.41798996925354004, 'fcm_dpo/delta': 0.04171000048518181, 'fcm_dpo/margin': 309.32373046875, 'margin_dpo/margin_mean': 309.32373046875, 'margin_dpo/margin_std': 488.1703796386719, 'logps/chosen': -605.5213623046875, 'logps/rejected': -934.311279296875, 'logps/ref_chosen': -46.54766845703125, 'logps/ref_rejected': -66.01388549804688, 'KL/chosen_KL_mean': -558.9736328125, 'KL/rejected_KL_mean': -868.29736328125, 'KL/mean': -713.635498046875, 'KL/std': 458.9757080078125, 'logits/chosen': -0.9124878644943237, 'logits/rejected': -0.9153552055358887, 'epoch': 0.77} + 77%|███████▋ | 522/681 [22:09<07:00, 2.64s/it] 77%|███████▋ | 523/681 [22:12<07:01, 2.67s/it] {'loss': 1.1351, 'grad_norm': 43.91381072998047, 'learning_rate': 7.875099508810484e-08, 'fcm_dpo/beta': 0.001167641719803214, 'fcm_dpo/q_t': 0.4140872061252594, 'fcm_dpo/delta': 0.014641055837273598, 'fcm_dpo/margin': 330.5179443359375, 'margin_dpo/margin_mean': 330.5179443359375, 'margin_dpo/margin_std': 560.5762329101562, 'logps/chosen': -655.3046264648438, 'logps/rejected': -1007.8143310546875, 'logps/ref_chosen': -61.76960372924805, 'logps/ref_rejected': -83.76141357421875, 'KL/chosen_KL_mean': -593.5350341796875, 'KL/rejected_KL_mean': -924.0529174804688, 'KL/mean': -758.7939453125, 'KL/std': 532.2261962890625, 'logits/chosen': -0.9662898182868958, 'logits/rejected': -0.9687439203262329, 'epoch': 0.77} + 77%|███████▋ | 523/681 [22:12<07:01, 2.67s/it] 77%|███████▋ | 524/681 [22:14<06:56, 2.65s/it] {'loss': 1.1068, 'grad_norm': 47.520774841308594, 'learning_rate': 7.781828926091535e-08, 'fcm_dpo/beta': 0.0011622272431850433, 'fcm_dpo/q_t': 0.4057931900024414, 'fcm_dpo/delta': -0.002190619707107544, 'fcm_dpo/margin': 345.46832275390625, 'margin_dpo/margin_mean': 345.46832275390625, 'margin_dpo/margin_std': 491.1083984375, 'logps/chosen': -653.06201171875, 'logps/rejected': -1001.76025390625, 'logps/ref_chosen': -78.0720443725586, 'logps/ref_rejected': -81.30198669433594, 'KL/chosen_KL_mean': -574.9899291992188, 'KL/rejected_KL_mean': -920.458251953125, 'KL/mean': -747.72412109375, 'KL/std': 479.4917297363281, 'logits/chosen': -0.9906701445579529, 'logits/rejected': -0.9787558317184448, 'epoch': 0.77} + 77%|███████▋ | 524/681 [22:14<06:56, 2.65s/it] 77%|███████▋ | 525/681 [22:17<06:56, 2.67s/it] {'loss': 1.0305, 'grad_norm': 42.60094451904297, 'learning_rate': 7.689012058193384e-08, 'fcm_dpo/beta': 0.001145128975622356, 'fcm_dpo/q_t': 0.38664761185646057, 'fcm_dpo/delta': -0.11176417022943497, 'fcm_dpo/margin': 441.4364929199219, 'margin_dpo/margin_mean': 441.43646240234375, 'margin_dpo/margin_std': 542.1448364257812, 'logps/chosen': -632.3170166015625, 'logps/rejected': -1122.978515625, 'logps/ref_chosen': -50.827857971191406, 'logps/ref_rejected': -100.05294036865234, 'KL/chosen_KL_mean': -581.4891357421875, 'KL/rejected_KL_mean': -1022.925537109375, 'KL/mean': -802.2073974609375, 'KL/std': 510.1922607421875, 'logits/chosen': -0.925658106803894, 'logits/rejected': -0.9625715017318726, 'epoch': 0.77} + 77%|███████▋ | 525/681 [22:17<06:56, 2.67s/it] 77%|███████▋ | 526/681 [22:19<06:46, 2.62s/it] {'loss': 1.0485, 'grad_norm': 30.649280548095703, 'learning_rate': 7.596651350926836e-08, 'fcm_dpo/beta': 0.0011303203646093607, 'fcm_dpo/q_t': 0.3878590166568756, 'fcm_dpo/delta': -0.09655816853046417, 'fcm_dpo/margin': 435.17706298828125, 'margin_dpo/margin_mean': 435.17706298828125, 'margin_dpo/margin_std': 561.9842529296875, 'logps/chosen': -689.2291259765625, 'logps/rejected': -1147.54833984375, 'logps/ref_chosen': -63.167236328125, 'logps/ref_rejected': -86.30934143066406, 'KL/chosen_KL_mean': -626.0618896484375, 'KL/rejected_KL_mean': -1061.239013671875, 'KL/mean': -843.650390625, 'KL/std': 508.11346435546875, 'logits/chosen': -0.9626432657241821, 'logits/rejected': -0.9604432582855225, 'epoch': 0.77} + 77%|███████▋ | 526/681 [22:20<06:46, 2.62s/it] 77%|███████▋ | 527/681 [22:22<06:39, 2.60s/it] {'loss': 1.1391, 'grad_norm': 32.22301483154297, 'learning_rate': 7.504749238082414e-08, 'fcm_dpo/beta': 0.0011297144228592515, 'fcm_dpo/q_t': 0.4224596917629242, 'fcm_dpo/delta': 0.06883951276540756, 'fcm_dpo/margin': 295.2120361328125, 'margin_dpo/margin_mean': 295.2120361328125, 'margin_dpo/margin_std': 437.1567687988281, 'logps/chosen': -708.5220947265625, 'logps/rejected': -1010.947998046875, 'logps/ref_chosen': -71.12867736816406, 'logps/ref_rejected': -78.3425521850586, 'KL/chosen_KL_mean': -637.3934326171875, 'KL/rejected_KL_mean': -932.60546875, 'KL/mean': -784.9993896484375, 'KL/std': 524.7011108398438, 'logits/chosen': -1.1435140371322632, 'logits/rejected': -1.1115856170654297, 'epoch': 0.77} + 77%|███████▋ | 527/681 [22:22<06:39, 2.60s/it] 78%|███████▊ | 528/681 [22:25<06:42, 2.63s/it] {'loss': 1.1151, 'grad_norm': 43.005401611328125, 'learning_rate': 7.413308141366254e-08, 'fcm_dpo/beta': 0.001133130630478263, 'fcm_dpo/q_t': 0.40897679328918457, 'fcm_dpo/delta': -0.012725085951387882, 'fcm_dpo/margin': 363.7490539550781, 'margin_dpo/margin_mean': 363.7490539550781, 'margin_dpo/margin_std': 581.20263671875, 'logps/chosen': -720.5875244140625, 'logps/rejected': -1110.1572265625, 'logps/ref_chosen': -68.0894546508789, 'logps/ref_rejected': -93.91006469726562, 'KL/chosen_KL_mean': -652.498046875, 'KL/rejected_KL_mean': -1016.2471313476562, 'KL/mean': -834.37255859375, 'KL/std': 502.570068359375, 'logits/chosen': -1.045546293258667, 'logits/rejected': -1.0283198356628418, 'epoch': 0.78} + 78%|███████▊ | 528/681 [22:25<06:42, 2.63s/it] 78%|███████▊ | 529/681 [22:27<06:41, 2.64s/it] {'loss': 1.234, 'grad_norm': 47.4577522277832, 'learning_rate': 7.322330470336313e-08, 'fcm_dpo/beta': 0.0011390424333512783, 'fcm_dpo/q_t': 0.43738028407096863, 'fcm_dpo/delta': 0.02491956390440464, 'fcm_dpo/margin': 240.14520263671875, 'margin_dpo/margin_mean': 240.14520263671875, 'margin_dpo/margin_std': 571.541748046875, 'logps/chosen': -823.8419189453125, 'logps/rejected': -1097.6212158203125, 'logps/ref_chosen': -55.57495880126953, 'logps/ref_rejected': -89.20909118652344, 'KL/chosen_KL_mean': -768.2669677734375, 'KL/rejected_KL_mean': -1008.412109375, 'KL/mean': -888.3395385742188, 'KL/std': 458.5384521484375, 'logits/chosen': -1.0672532320022583, 'logits/rejected': -1.0765759944915771, 'epoch': 0.78} + 78%|███████▊ | 529/681 [22:27<06:41, 2.64s/it] 78%|███████▊ | 530/681 [22:30<06:45, 2.69s/it] {'loss': 1.1208, 'grad_norm': 49.10841369628906, 'learning_rate': 7.231818622338822e-08, 'fcm_dpo/beta': 0.0011311739217489958, 'fcm_dpo/q_t': 0.40238332748413086, 'fcm_dpo/delta': -0.050166938453912735, 'fcm_dpo/margin': 395.99542236328125, 'margin_dpo/margin_mean': 395.99542236328125, 'margin_dpo/margin_std': 689.074462890625, 'logps/chosen': -706.957763671875, 'logps/rejected': -1142.63623046875, 'logps/ref_chosen': -47.601417541503906, 'logps/ref_rejected': -87.2845230102539, 'KL/chosen_KL_mean': -659.3563232421875, 'KL/rejected_KL_mean': -1055.3516845703125, 'KL/mean': -857.35400390625, 'KL/std': 551.8106079101562, 'logits/chosen': -0.9842202663421631, 'logits/rejected': -0.980260968208313, 'epoch': 0.78} + 78%|███████▊ | 530/681 [22:30<06:45, 2.69s/it] 78%|███████▊ | 531/681 [22:32<06:25, 2.57s/it] {'loss': 1.1281, 'grad_norm': 44.63459777832031, 'learning_rate': 7.141774982445147e-08, 'fcm_dpo/beta': 0.0011335888411849737, 'fcm_dpo/q_t': 0.4141680896282196, 'fcm_dpo/delta': 0.015755577012896538, 'fcm_dpo/margin': 339.31494140625, 'margin_dpo/margin_mean': 339.31494140625, 'margin_dpo/margin_std': 551.3843994140625, 'logps/chosen': -782.8472900390625, 'logps/rejected': -1137.522216796875, 'logps/ref_chosen': -55.246063232421875, 'logps/ref_rejected': -70.60598754882812, 'KL/chosen_KL_mean': -727.6012573242188, 'KL/rejected_KL_mean': -1066.916259765625, 'KL/mean': -897.2587280273438, 'KL/std': 580.2291870117188, 'logits/chosen': -1.0530567169189453, 'logits/rejected': -1.0324784517288208, 'epoch': 0.78} + 78%|███████▊ | 531/681 [22:33<06:25, 2.57s/it] 78%|███████▊ | 532/681 [22:35<06:29, 2.61s/it] {'loss': 1.1499, 'grad_norm': 86.36136627197266, 'learning_rate': 7.052201923388953e-08, 'fcm_dpo/beta': 0.0011215780396014452, 'fcm_dpo/q_t': 0.41034865379333496, 'fcm_dpo/delta': -0.00783345103263855, 'fcm_dpo/margin': 362.3275451660156, 'margin_dpo/margin_mean': 362.3275451660156, 'margin_dpo/margin_std': 658.3814697265625, 'logps/chosen': -811.085693359375, 'logps/rejected': -1189.718505859375, 'logps/ref_chosen': -70.28601837158203, 'logps/ref_rejected': -86.5913314819336, 'KL/chosen_KL_mean': -740.7996826171875, 'KL/rejected_KL_mean': -1103.127197265625, 'KL/mean': -921.9634399414062, 'KL/std': 562.6202392578125, 'logits/chosen': -1.013758897781372, 'logits/rejected': -0.9856699705123901, 'epoch': 0.78} + 78%|███████▊ | 532/681 [22:35<06:29, 2.61s/it] 78%|███████▊ | 533/681 [22:38<06:25, 2.60s/it] {'loss': 1.2058, 'grad_norm': 44.21409606933594, 'learning_rate': 6.963101805503646e-08, 'fcm_dpo/beta': 0.001128980191424489, 'fcm_dpo/q_t': 0.43269672989845276, 'fcm_dpo/delta': -0.004243167117238045, 'fcm_dpo/margin': 265.5915222167969, 'margin_dpo/margin_mean': 265.591552734375, 'margin_dpo/margin_std': 571.443359375, 'logps/chosen': -711.7498168945312, 'logps/rejected': -989.0743408203125, 'logps/ref_chosen': -64.8551025390625, 'logps/ref_rejected': -76.58805847167969, 'KL/chosen_KL_mean': -646.8947143554688, 'KL/rejected_KL_mean': -912.4862670898438, 'KL/mean': -779.6904907226562, 'KL/std': 475.87249755859375, 'logits/chosen': -1.0471582412719727, 'logits/rejected': -1.0218884944915771, 'epoch': 0.78} + 78%|███████▊ | 533/681 [22:38<06:25, 2.60s/it] 78%|███████▊ | 534/681 [22:40<06:22, 2.60s/it] {'loss': 1.1133, 'grad_norm': 40.56071853637695, 'learning_rate': 6.874476976660184e-08, 'fcm_dpo/beta': 0.0011213625548407435, 'fcm_dpo/q_t': 0.4103449285030365, 'fcm_dpo/delta': 0.001744687557220459, 'fcm_dpo/margin': 354.456787109375, 'margin_dpo/margin_mean': 354.456787109375, 'margin_dpo/margin_std': 530.1246337890625, 'logps/chosen': -742.862060546875, 'logps/rejected': -1115.742919921875, 'logps/ref_chosen': -60.119388580322266, 'logps/ref_rejected': -78.54347229003906, 'KL/chosen_KL_mean': -682.74267578125, 'KL/rejected_KL_mean': -1037.199462890625, 'KL/mean': -859.9710693359375, 'KL/std': 517.587890625, 'logits/chosen': -1.0277010202407837, 'logits/rejected': -1.022787094116211, 'epoch': 0.78} + 78%|███████▊ | 534/681 [22:40<06:22, 2.60s/it] 79%|███████▊ | 535/681 [22:43<06:19, 2.60s/it] {'loss': 1.0635, 'grad_norm': 32.73942565917969, 'learning_rate': 6.786329772205246e-08, 'fcm_dpo/beta': 0.0011251430260017514, 'fcm_dpo/q_t': 0.39604315161705017, 'fcm_dpo/delta': -0.06760876625776291, 'fcm_dpo/margin': 412.41912841796875, 'margin_dpo/margin_mean': 412.41912841796875, 'margin_dpo/margin_std': 538.2338256835938, 'logps/chosen': -652.687255859375, 'logps/rejected': -1107.083740234375, 'logps/ref_chosen': -54.330238342285156, 'logps/ref_rejected': -96.30763244628906, 'KL/chosen_KL_mean': -598.3570556640625, 'KL/rejected_KL_mean': -1010.776123046875, 'KL/mean': -804.5665283203125, 'KL/std': 515.3653564453125, 'logits/chosen': -0.9553531408309937, 'logits/rejected': -0.9562033414840698, 'epoch': 0.79} + 79%|███████▊ | 535/681 [22:43<06:19, 2.60s/it] 79%|███████▊ | 536/681 [22:46<06:20, 2.62s/it] {'loss': 1.0253, 'grad_norm': 26.8385066986084, 'learning_rate': 6.698662514899638e-08, 'fcm_dpo/beta': 0.0010907297255471349, 'fcm_dpo/q_t': 0.38460248708724976, 'fcm_dpo/delta': -0.14658187329769135, 'fcm_dpo/margin': 493.97308349609375, 'margin_dpo/margin_mean': 493.97308349609375, 'margin_dpo/margin_std': 671.49755859375, 'logps/chosen': -557.59375, 'logps/rejected': -1093.5841064453125, 'logps/ref_chosen': -47.08053207397461, 'logps/ref_rejected': -89.09783935546875, 'KL/chosen_KL_mean': -510.51318359375, 'KL/rejected_KL_mean': -1004.4862060546875, 'KL/mean': -757.499755859375, 'KL/std': 566.7451782226562, 'logits/chosen': -0.9329211711883545, 'logits/rejected': -0.9625818729400635, 'epoch': 0.79} + 79%|███████▊ | 536/681 [22:46<06:20, 2.62s/it] 79%|███████▉ | 537/681 [22:48<06:15, 2.61s/it] {'loss': 1.1432, 'grad_norm': 46.854251861572266, 'learning_rate': 6.611477514857114e-08, 'fcm_dpo/beta': 0.0010922504588961601, 'fcm_dpo/q_t': 0.4157649874687195, 'fcm_dpo/delta': 0.0329880453646183, 'fcm_dpo/margin': 336.547119140625, 'margin_dpo/margin_mean': 336.547119140625, 'margin_dpo/margin_std': 562.57177734375, 'logps/chosen': -608.1195678710938, 'logps/rejected': -957.3576049804688, 'logps/ref_chosen': -57.747467041015625, 'logps/ref_rejected': -70.43838500976562, 'KL/chosen_KL_mean': -550.3721313476562, 'KL/rejected_KL_mean': -886.919189453125, 'KL/mean': -718.6456298828125, 'KL/std': 459.65936279296875, 'logits/chosen': -0.9757102727890015, 'logits/rejected': -0.9494297504425049, 'epoch': 0.79} + 79%|███████▉ | 537/681 [22:48<06:15, 2.61s/it] 79%|███████▉ | 538/681 [22:51<06:13, 2.61s/it] {'loss': 1.0873, 'grad_norm': 31.442834854125977, 'learning_rate': 6.524777069483525e-08, 'fcm_dpo/beta': 0.0010833143023774028, 'fcm_dpo/q_t': 0.40582871437072754, 'fcm_dpo/delta': -0.014802441000938416, 'fcm_dpo/margin': 382.1605224609375, 'margin_dpo/margin_mean': 382.1605224609375, 'margin_dpo/margin_std': 518.7582397460938, 'logps/chosen': -761.2787475585938, 'logps/rejected': -1161.25146484375, 'logps/ref_chosen': -66.41594696044922, 'logps/ref_rejected': -84.22808837890625, 'KL/chosen_KL_mean': -694.86279296875, 'KL/rejected_KL_mean': -1077.0233154296875, 'KL/mean': -885.943115234375, 'KL/std': 506.5946960449219, 'logits/chosen': -0.9763197898864746, 'logits/rejected': -0.9549228549003601, 'epoch': 0.79} + 79%|███████▉ | 538/681 [22:51<06:13, 2.61s/it] 79%|███████▉ | 539/681 [22:54<06:15, 2.64s/it] {'loss': 1.0956, 'grad_norm': 30.531400680541992, 'learning_rate': 6.438563463416221e-08, 'fcm_dpo/beta': 0.0010887064272537827, 'fcm_dpo/q_t': 0.41017356514930725, 'fcm_dpo/delta': 0.007081391289830208, 'fcm_dpo/margin': 361.0832214355469, 'margin_dpo/margin_mean': 361.0832214355469, 'margin_dpo/margin_std': 479.93939208984375, 'logps/chosen': -653.4474487304688, 'logps/rejected': -1047.8917236328125, 'logps/ref_chosen': -58.492855072021484, 'logps/ref_rejected': -91.85395050048828, 'KL/chosen_KL_mean': -594.95458984375, 'KL/rejected_KL_mean': -956.0377807617188, 'KL/mean': -775.4962158203125, 'KL/std': 433.60675048828125, 'logits/chosen': -1.028662919998169, 'logits/rejected': -1.015451192855835, 'epoch': 0.79} + 79%|███████▉ | 539/681 [22:54<06:15, 2.64s/it] 79%|███████▉ | 540/681 [22:56<06:09, 2.62s/it] {'loss': 1.0431, 'grad_norm': 33.186012268066406, 'learning_rate': 6.352838968463919e-08, 'fcm_dpo/beta': 0.0010707840556278825, 'fcm_dpo/q_t': 0.38717466592788696, 'fcm_dpo/delta': -0.12625397741794586, 'fcm_dpo/margin': 485.5020751953125, 'margin_dpo/margin_mean': 485.5020446777344, 'margin_dpo/margin_std': 648.8251953125, 'logps/chosen': -671.853271484375, 'logps/rejected': -1210.3028564453125, 'logps/ref_chosen': -63.482513427734375, 'logps/ref_rejected': -116.42999267578125, 'KL/chosen_KL_mean': -608.3707885742188, 'KL/rejected_KL_mean': -1093.8729248046875, 'KL/mean': -851.121826171875, 'KL/std': 551.8743896484375, 'logits/chosen': -0.9437620639801025, 'logits/rejected': -0.9658418893814087, 'epoch': 0.79} + 79%|███████▉ | 540/681 [22:56<06:09, 2.62s/it] 79%|███████▉ | 541/681 [22:58<05:55, 2.54s/it] {'loss': 1.2291, 'grad_norm': 45.27843475341797, 'learning_rate': 6.267605843546767e-08, 'fcm_dpo/beta': 0.0010596727952361107, 'fcm_dpo/q_t': 0.4399704039096832, 'fcm_dpo/delta': -0.0012176802847534418, 'fcm_dpo/margin': 249.83154296875, 'margin_dpo/margin_mean': 249.83154296875, 'margin_dpo/margin_std': 572.767822265625, 'logps/chosen': -795.44482421875, 'logps/rejected': -1070.269775390625, 'logps/ref_chosen': -78.28036499023438, 'logps/ref_rejected': -103.273681640625, 'KL/chosen_KL_mean': -717.1644287109375, 'KL/rejected_KL_mean': -966.99609375, 'KL/mean': -842.0802612304688, 'KL/std': 490.08099365234375, 'logits/chosen': -1.0905866622924805, 'logits/rejected': -1.0777101516723633, 'epoch': 0.79} + 79%|███████▉ | 541/681 [22:59<05:55, 2.54s/it] 80%|███████▉ | 542/681 [23:01<06:07, 2.64s/it] {'loss': 1.0595, 'grad_norm': 49.864784240722656, 'learning_rate': 6.182866334636888e-08, 'fcm_dpo/beta': 0.0010356687707826495, 'fcm_dpo/q_t': 0.3910575807094574, 'fcm_dpo/delta': -0.09657715260982513, 'fcm_dpo/margin': 472.7724609375, 'margin_dpo/margin_mean': 472.7724609375, 'margin_dpo/margin_std': 642.970947265625, 'logps/chosen': -684.185791015625, 'logps/rejected': -1195.9483642578125, 'logps/ref_chosen': -57.48497009277344, 'logps/ref_rejected': -96.47506713867188, 'KL/chosen_KL_mean': -626.7008056640625, 'KL/rejected_KL_mean': -1099.4732666015625, 'KL/mean': -863.0870361328125, 'KL/std': 545.5762939453125, 'logits/chosen': -1.0657211542129517, 'logits/rejected': -1.098391056060791, 'epoch': 0.8} + 80%|███████▉ | 542/681 [23:01<06:07, 2.64s/it] 80%|███████▉ | 543/681 [23:04<06:01, 2.62s/it] {'loss': 1.202, 'grad_norm': 39.975833892822266, 'learning_rate': 6.098622674699147e-08, 'fcm_dpo/beta': 0.001045349519699812, 'fcm_dpo/q_t': 0.43216556310653687, 'fcm_dpo/delta': 0.050522927194833755, 'fcm_dpo/margin': 336.0360107421875, 'margin_dpo/margin_mean': 336.0360107421875, 'margin_dpo/margin_std': 774.7861328125, 'logps/chosen': -763.1822509765625, 'logps/rejected': -1144.19970703125, 'logps/ref_chosen': -60.61750793457031, 'logps/ref_rejected': -105.59896850585938, 'KL/chosen_KL_mean': -702.564697265625, 'KL/rejected_KL_mean': -1038.6007080078125, 'KL/mean': -870.582763671875, 'KL/std': 663.33447265625, 'logits/chosen': -0.9809169769287109, 'logits/rejected': -1.0095728635787964, 'epoch': 0.8} + 80%|███████▉ | 543/681 [23:04<06:01, 2.62s/it] 80%|███████▉ | 544/681 [23:06<05:55, 2.60s/it] {'loss': 1.1097, 'grad_norm': 32.359127044677734, 'learning_rate': 6.01487708363232e-08, 'fcm_dpo/beta': 0.0010487872641533613, 'fcm_dpo/q_t': 0.40891605615615845, 'fcm_dpo/delta': -0.003843428334221244, 'fcm_dpo/margin': 384.9021911621094, 'margin_dpo/margin_mean': 384.9022216796875, 'margin_dpo/margin_std': 589.19677734375, 'logps/chosen': -769.101806640625, 'logps/rejected': -1195.31640625, 'logps/ref_chosen': -59.642303466796875, 'logps/ref_rejected': -100.95469665527344, 'KL/chosen_KL_mean': -709.45947265625, 'KL/rejected_KL_mean': -1094.36181640625, 'KL/mean': -901.91064453125, 'KL/std': 522.1925048828125, 'logits/chosen': -1.0074541568756104, 'logits/rejected': -1.025747299194336, 'epoch': 0.8} + 80%|███████▉ | 544/681 [23:06<05:55, 2.60s/it] 80%|████████ | 545/681 [23:09<05:50, 2.57s/it] {'loss': 1.0393, 'grad_norm': 33.80911636352539, 'learning_rate': 5.9316317682106294e-08, 'fcm_dpo/beta': 0.0010332402307540178, 'fcm_dpo/q_t': 0.38913267850875854, 'fcm_dpo/delta': -0.10204954445362091, 'fcm_dpo/margin': 480.9786376953125, 'margin_dpo/margin_mean': 480.9786376953125, 'margin_dpo/margin_std': 609.970458984375, 'logps/chosen': -725.1640625, 'logps/rejected': -1234.402099609375, 'logps/ref_chosen': -67.64859771728516, 'logps/ref_rejected': -95.90800476074219, 'KL/chosen_KL_mean': -657.5155029296875, 'KL/rejected_KL_mean': -1138.494140625, 'KL/mean': -898.0047607421875, 'KL/std': 535.8654174804688, 'logits/chosen': -0.9045934677124023, 'logits/rejected': -0.9373363256454468, 'epoch': 0.8} + 80%|████████ | 545/681 [23:09<05:50, 2.57s/it] 80%|████████ | 546/681 [23:12<05:46, 2.57s/it] {'loss': 1.1632, 'grad_norm': 38.7053108215332, 'learning_rate': 5.848888922025552e-08, 'fcm_dpo/beta': 0.0010417320299893618, 'fcm_dpo/q_t': 0.4263242185115814, 'fcm_dpo/delta': 0.0893513560295105, 'fcm_dpo/margin': 300.89483642578125, 'margin_dpo/margin_mean': 300.89483642578125, 'margin_dpo/margin_std': 500.70306396484375, 'logps/chosen': -681.468017578125, 'logps/rejected': -1013.4849853515625, 'logps/ref_chosen': -50.744232177734375, 'logps/ref_rejected': -81.86622619628906, 'KL/chosen_KL_mean': -630.7238159179688, 'KL/rejected_KL_mean': -931.6187133789062, 'KL/mean': -781.1712646484375, 'KL/std': 462.85028076171875, 'logits/chosen': -0.9846795797348022, 'logits/rejected': -0.9681577682495117, 'epoch': 0.8} + 80%|████████ | 546/681 [23:12<05:46, 2.57s/it] 80%|████████ | 547/681 [23:14<05:50, 2.62s/it] {'loss': 1.0885, 'grad_norm': 48.27106857299805, 'learning_rate': 5.7666507254280265e-08, 'fcm_dpo/beta': 0.0010446913074702024, 'fcm_dpo/q_t': 0.4047175645828247, 'fcm_dpo/delta': -0.02371753193438053, 'fcm_dpo/margin': 404.6095275878906, 'margin_dpo/margin_mean': 404.60955810546875, 'margin_dpo/margin_std': 567.8289184570312, 'logps/chosen': -696.63720703125, 'logps/rejected': -1118.3204345703125, 'logps/ref_chosen': -73.6877212524414, 'logps/ref_rejected': -90.76136779785156, 'KL/chosen_KL_mean': -622.949462890625, 'KL/rejected_KL_mean': -1027.55908203125, 'KL/mean': -825.2542724609375, 'KL/std': 521.1716918945312, 'logits/chosen': -0.9310117363929749, 'logits/rejected': -0.9426393508911133, 'epoch': 0.8} + 80%|████████ | 547/681 [23:14<05:50, 2.62s/it] 80%|████████ | 548/681 [23:17<05:43, 2.58s/it] {'loss': 1.1356, 'grad_norm': 30.078670501708984, 'learning_rate': 5.684919345471029e-08, 'fcm_dpo/beta': 0.0010460072662681341, 'fcm_dpo/q_t': 0.42021819949150085, 'fcm_dpo/delta': 0.02962075173854828, 'fcm_dpo/margin': 355.14385986328125, 'margin_dpo/margin_mean': 355.14385986328125, 'margin_dpo/margin_std': 616.107177734375, 'logps/chosen': -721.80078125, 'logps/rejected': -1105.81640625, 'logps/ref_chosen': -65.24634552001953, 'logps/ref_rejected': -94.11807250976562, 'KL/chosen_KL_mean': -656.554443359375, 'KL/rejected_KL_mean': -1011.6983642578125, 'KL/mean': -834.1263427734375, 'KL/std': 554.41552734375, 'logits/chosen': -1.0434558391571045, 'logits/rejected': -1.0432292222976685, 'epoch': 0.8} + 80%|████████ | 548/681 [23:17<05:43, 2.58s/it] 81%|████████ | 549/681 [23:19<05:37, 2.56s/it] {'loss': 1.2042, 'grad_norm': 55.40736770629883, 'learning_rate': 5.603696935852426e-08, 'fcm_dpo/beta': 0.0010487986728549004, 'fcm_dpo/q_t': 0.4355009198188782, 'fcm_dpo/delta': 0.010232968255877495, 'fcm_dpo/margin': 274.687255859375, 'margin_dpo/margin_mean': 274.687255859375, 'margin_dpo/margin_std': 570.506103515625, 'logps/chosen': -754.4528198242188, 'logps/rejected': -1053.8380126953125, 'logps/ref_chosen': -49.21235656738281, 'logps/ref_rejected': -73.91031646728516, 'KL/chosen_KL_mean': -705.240478515625, 'KL/rejected_KL_mean': -979.927734375, 'KL/mean': -842.5841064453125, 'KL/std': 473.9487609863281, 'logits/chosen': -1.0249577760696411, 'logits/rejected': -1.0135101079940796, 'epoch': 0.81} + 81%|████████ | 549/681 [23:19<05:37, 2.56s/it] 81%|████████ | 550/681 [23:22<05:37, 2.57s/it] {'loss': 1.1355, 'grad_norm': 45.87727737426758, 'learning_rate': 5.5229856368582376e-08, 'fcm_dpo/beta': 0.0010564997792243958, 'fcm_dpo/q_t': 0.4197149872779846, 'fcm_dpo/delta': 0.04886303097009659, 'fcm_dpo/margin': 334.0168762207031, 'margin_dpo/margin_mean': 334.01690673828125, 'margin_dpo/margin_std': 528.5301513671875, 'logps/chosen': -733.1976318359375, 'logps/rejected': -1105.533447265625, 'logps/ref_chosen': -56.80695343017578, 'logps/ref_rejected': -95.12580871582031, 'KL/chosen_KL_mean': -676.3907470703125, 'KL/rejected_KL_mean': -1010.4075927734375, 'KL/mean': -843.399169921875, 'KL/std': 506.3270263671875, 'logits/chosen': -0.9613184928894043, 'logits/rejected': -0.9839845299720764, 'epoch': 0.81} + 81%|████████ | 550/681 [23:22<05:37, 2.57s/it] 81%|████████ | 551/681 [23:24<05:29, 2.54s/it] {'loss': 0.9681, 'grad_norm': 48.99431610107422, 'learning_rate': 5.4427875753062734e-08, 'fcm_dpo/beta': 0.001030082581564784, 'fcm_dpo/q_t': 0.3695809245109558, 'fcm_dpo/delta': -0.18102356791496277, 'fcm_dpo/margin': 553.6406860351562, 'margin_dpo/margin_mean': 553.6406860351562, 'margin_dpo/margin_std': 553.6318359375, 'logps/chosen': -664.5113525390625, 'logps/rejected': -1270.718505859375, 'logps/ref_chosen': -59.10633087158203, 'logps/ref_rejected': -111.67280578613281, 'KL/chosen_KL_mean': -605.405029296875, 'KL/rejected_KL_mean': -1159.045654296875, 'KL/mean': -882.225341796875, 'KL/std': 553.9127197265625, 'logits/chosen': -0.9334255456924438, 'logits/rejected': -0.9928078055381775, 'epoch': 0.81} + 81%|████████ | 551/681 [23:24<05:29, 2.54s/it] 81%|████████ | 552/681 [23:27<05:17, 2.46s/it] {'loss': 0.9781, 'grad_norm': 55.24009704589844, 'learning_rate': 5.363104864490034e-08, 'fcm_dpo/beta': 0.000981001416221261, 'fcm_dpo/q_t': 0.3693755269050598, 'fcm_dpo/delta': -0.21600230038166046, 'fcm_dpo/margin': 611.0693359375, 'margin_dpo/margin_mean': 611.0692749023438, 'margin_dpo/margin_std': 701.7575073242188, 'logps/chosen': -624.0551147460938, 'logps/rejected': -1277.33203125, 'logps/ref_chosen': -62.35459899902344, 'logps/ref_rejected': -104.56210327148438, 'KL/chosen_KL_mean': -561.7005004882812, 'KL/rejected_KL_mean': -1172.769775390625, 'KL/mean': -867.2351684570312, 'KL/std': 621.8682861328125, 'logits/chosen': -0.9999994039535522, 'logits/rejected': -1.0425043106079102, 'epoch': 0.81} + 81%|████████ | 552/681 [23:27<05:17, 2.46s/it] 81%|████████ | 553/681 [23:29<05:19, 2.50s/it] {'loss': 1.1743, 'grad_norm': 28.201841354370117, 'learning_rate': 5.2839396041230415e-08, 'fcm_dpo/beta': 0.0009901414159685373, 'fcm_dpo/q_t': 0.43176624178886414, 'fcm_dpo/delta': 0.09159143269062042, 'fcm_dpo/margin': 314.45751953125, 'margin_dpo/margin_mean': 314.45751953125, 'margin_dpo/margin_std': 584.146240234375, 'logps/chosen': -717.1509399414062, 'logps/rejected': -1061.44677734375, 'logps/ref_chosen': -68.25881958007812, 'logps/ref_rejected': -98.0971450805664, 'KL/chosen_KL_mean': -648.89208984375, 'KL/rejected_KL_mean': -963.349609375, 'KL/mean': -806.120849609375, 'KL/std': 528.423828125, 'logits/chosen': -0.9241914749145508, 'logits/rejected': -0.9152238368988037, 'epoch': 0.81} + 81%|████████ | 553/681 [23:29<05:19, 2.50s/it] 81%|████████▏ | 554/681 [23:32<05:29, 2.59s/it] {'loss': 1.1178, 'grad_norm': 72.81156921386719, 'learning_rate': 5.205293880283551e-08, 'fcm_dpo/beta': 0.000999167561531067, 'fcm_dpo/q_t': 0.4063616394996643, 'fcm_dpo/delta': -0.03213735669851303, 'fcm_dpo/margin': 430.4720153808594, 'margin_dpo/margin_mean': 430.4720153808594, 'margin_dpo/margin_std': 707.259521484375, 'logps/chosen': -729.0787353515625, 'logps/rejected': -1181.3857421875, 'logps/ref_chosen': -67.94767761230469, 'logps/ref_rejected': -89.78272247314453, 'KL/chosen_KL_mean': -661.1310424804688, 'KL/rejected_KL_mean': -1091.60302734375, 'KL/mean': -876.3670654296875, 'KL/std': 559.095947265625, 'logits/chosen': -0.9473394155502319, 'logits/rejected': -0.9234938621520996, 'epoch': 0.81} + 81%|████████▏ | 554/681 [23:32<05:29, 2.59s/it] 81%|████████▏ | 555/681 [23:34<05:21, 2.55s/it] {'loss': 1.078, 'grad_norm': 50.65883255004883, 'learning_rate': 5.127169765359515e-08, 'fcm_dpo/beta': 0.0009781017433851957, 'fcm_dpo/q_t': 0.3947869837284088, 'fcm_dpo/delta': -0.09773121029138565, 'fcm_dpo/margin': 504.04437255859375, 'margin_dpo/margin_mean': 504.04437255859375, 'margin_dpo/margin_std': 776.3997802734375, 'logps/chosen': -737.0382080078125, 'logps/rejected': -1296.2314453125, 'logps/ref_chosen': -53.33049011230469, 'logps/ref_rejected': -108.47937774658203, 'KL/chosen_KL_mean': -683.7077026367188, 'KL/rejected_KL_mean': -1187.7520751953125, 'KL/mean': -935.7298583984375, 'KL/std': 597.616943359375, 'logits/chosen': -1.0049800872802734, 'logits/rejected': -1.0619277954101562, 'epoch': 0.81} + 81%|████████▏ | 555/681 [23:34<05:21, 2.55s/it] 82%|████████▏ | 556/681 [23:37<05:24, 2.59s/it] {'loss': 1.1608, 'grad_norm': 28.539920806884766, 'learning_rate': 5.049569317994012e-08, 'fcm_dpo/beta': 0.0009878533892333508, 'fcm_dpo/q_t': 0.4318218529224396, 'fcm_dpo/delta': 0.10852371156215668, 'fcm_dpo/margin': 298.27655029296875, 'margin_dpo/margin_mean': 298.2765808105469, 'margin_dpo/margin_std': 454.01800537109375, 'logps/chosen': -712.9735107421875, 'logps/rejected': -1053.946044921875, 'logps/ref_chosen': -58.64447021484375, 'logps/ref_rejected': -101.34040832519531, 'KL/chosen_KL_mean': -654.3289794921875, 'KL/rejected_KL_mean': -952.6055908203125, 'KL/mean': -803.46728515625, 'KL/std': 454.75347900390625, 'logits/chosen': -1.0015957355499268, 'logits/rejected': -0.997646689414978, 'epoch': 0.82} + 82%|████████▏ | 556/681 [23:37<05:24, 2.59s/it] 82%|████████▏ | 557/681 [23:40<05:22, 2.60s/it] {'loss': 1.105, 'grad_norm': 74.13558959960938, 'learning_rate': 4.9724945830310144e-08, 'fcm_dpo/beta': 0.0009835660457611084, 'fcm_dpo/q_t': 0.402864545583725, 'fcm_dpo/delta': -0.04802338778972626, 'fcm_dpo/margin': 453.0914306640625, 'margin_dpo/margin_mean': 453.0914306640625, 'margin_dpo/margin_std': 724.352783203125, 'logps/chosen': -784.3936157226562, 'logps/rejected': -1279.583984375, 'logps/ref_chosen': -67.84066009521484, 'logps/ref_rejected': -109.93965911865234, 'KL/chosen_KL_mean': -716.552978515625, 'KL/rejected_KL_mean': -1169.6444091796875, 'KL/mean': -943.0986328125, 'KL/std': 638.390625, 'logits/chosen': -1.025956392288208, 'logits/rejected': -1.0636675357818604, 'epoch': 0.82} + 82%|████████▏ | 557/681 [23:40<05:22, 2.60s/it] 82%|████████▏ | 558/681 [23:42<05:18, 2.59s/it] {'loss': 0.9725, 'grad_norm': 37.18507385253906, 'learning_rate': 4.8959475914614554e-08, 'fcm_dpo/beta': 0.000956161180511117, 'fcm_dpo/q_t': 0.36552464962005615, 'fcm_dpo/delta': -0.19506213068962097, 'fcm_dpo/margin': 610.5439453125, 'margin_dpo/margin_mean': 610.5439453125, 'margin_dpo/margin_std': 645.1649169921875, 'logps/chosen': -701.2913208007812, 'logps/rejected': -1351.628173828125, 'logps/ref_chosen': -62.36824035644531, 'logps/ref_rejected': -102.16102600097656, 'KL/chosen_KL_mean': -638.923095703125, 'KL/rejected_KL_mean': -1249.467041015625, 'KL/mean': -944.195068359375, 'KL/std': 590.7932739257812, 'logits/chosen': -1.0881562232971191, 'logits/rejected': -1.1110167503356934, 'epoch': 0.82} + 82%|████████▏ | 558/681 [23:42<05:18, 2.59s/it] 82%|████████▏ | 559/681 [23:45<05:13, 2.57s/it] {'loss': 1.0549, 'grad_norm': 30.2044734954834, 'learning_rate': 4.8199303603697614e-08, 'fcm_dpo/beta': 0.000936803175136447, 'fcm_dpo/q_t': 0.39399462938308716, 'fcm_dpo/delta': -0.07669977843761444, 'fcm_dpo/margin': 505.05615234375, 'margin_dpo/margin_mean': 505.05621337890625, 'margin_dpo/margin_std': 663.0927734375, 'logps/chosen': -798.4859008789062, 'logps/rejected': -1336.2320556640625, 'logps/ref_chosen': -60.752323150634766, 'logps/ref_rejected': -93.44229125976562, 'KL/chosen_KL_mean': -737.7335205078125, 'KL/rejected_KL_mean': -1242.789794921875, 'KL/mean': -990.26171875, 'KL/std': 609.5598754882812, 'logits/chosen': -1.1786550283432007, 'logits/rejected': -1.18735933303833, 'epoch': 0.82} + 82%|████████▏ | 559/681 [23:45<05:13, 2.57s/it] 82%|████████▏ | 560/681 [23:48<05:18, 2.63s/it] {'loss': 1.1402, 'grad_norm': 34.21842575073242, 'learning_rate': 4.7444448928806615e-08, 'fcm_dpo/beta': 0.0009346292354166508, 'fcm_dpo/q_t': 0.4206470251083374, 'fcm_dpo/delta': 0.0522555373609066, 'fcm_dpo/margin': 373.83026123046875, 'margin_dpo/margin_mean': 373.83026123046875, 'margin_dpo/margin_std': 590.425537109375, 'logps/chosen': -723.9578857421875, 'logps/rejected': -1119.675537109375, 'logps/ref_chosen': -58.10382080078125, 'logps/ref_rejected': -79.99122619628906, 'KL/chosen_KL_mean': -665.8540649414062, 'KL/rejected_KL_mean': -1039.684326171875, 'KL/mean': -852.7691650390625, 'KL/std': 539.5546875, 'logits/chosen': -0.9070395231246948, 'logits/rejected': -0.8892009258270264, 'epoch': 0.82} + 82%|████████▏ | 560/681 [23:48<05:18, 2.63s/it] 82%|████████▏ | 561/681 [23:50<05:09, 2.58s/it] {'loss': 1.2181, 'grad_norm': 41.68199157714844, 'learning_rate': 4.669493178106432e-08, 'fcm_dpo/beta': 0.0009608013788238168, 'fcm_dpo/q_t': 0.4335824251174927, 'fcm_dpo/delta': 0.11739911884069443, 'fcm_dpo/margin': 297.1005554199219, 'margin_dpo/margin_mean': 297.10052490234375, 'margin_dpo/margin_std': 675.0682373046875, 'logps/chosen': -848.35009765625, 'logps/rejected': -1193.6063232421875, 'logps/ref_chosen': -50.912879943847656, 'logps/ref_rejected': -99.06856536865234, 'KL/chosen_KL_mean': -797.4371948242188, 'KL/rejected_KL_mean': -1094.537841796875, 'KL/mean': -945.9874877929688, 'KL/std': 540.9091186523438, 'logits/chosen': -1.0954780578613281, 'logits/rejected': -1.1158504486083984, 'epoch': 0.82} + 82%|████████▏ | 561/681 [23:50<05:09, 2.58s/it] 83%|████████▎ | 562/681 [23:53<05:10, 2.61s/it] {'loss': 1.1008, 'grad_norm': 44.4998664855957, 'learning_rate': 4.5950771910944596e-08, 'fcm_dpo/beta': 0.0009560026228427887, 'fcm_dpo/q_t': 0.40745779871940613, 'fcm_dpo/delta': -0.01568439230322838, 'fcm_dpo/margin': 433.3727722167969, 'margin_dpo/margin_mean': 433.3727722167969, 'margin_dpo/margin_std': 646.6458740234375, 'logps/chosen': -790.7119140625, 'logps/rejected': -1261.162841796875, 'logps/ref_chosen': -59.46440124511719, 'logps/ref_rejected': -96.54266357421875, 'KL/chosen_KL_mean': -731.2474975585938, 'KL/rejected_KL_mean': -1164.6201171875, 'KL/mean': -947.933837890625, 'KL/std': 597.2823486328125, 'logits/chosen': -1.0381966829299927, 'logits/rejected': -1.0505871772766113, 'epoch': 0.83} + 83%|████████▎ | 562/681 [23:53<05:10, 2.61s/it] 83%|████████▎ | 563/681 [23:55<04:59, 2.54s/it] {'loss': 1.2364, 'grad_norm': 57.121238708496094, 'learning_rate': 4.521198892775202e-08, 'fcm_dpo/beta': 0.0009566263761371374, 'fcm_dpo/q_t': 0.42516082525253296, 'fcm_dpo/delta': -0.042735543102025986, 'fcm_dpo/margin': 316.32666015625, 'margin_dpo/margin_mean': 316.32666015625, 'margin_dpo/margin_std': 755.5484619140625, 'logps/chosen': -878.9268188476562, 'logps/rejected': -1229.2130126953125, 'logps/ref_chosen': -60.60819625854492, 'logps/ref_rejected': -94.56770324707031, 'KL/chosen_KL_mean': -818.318603515625, 'KL/rejected_KL_mean': -1134.645263671875, 'KL/mean': -976.48193359375, 'KL/std': 644.12939453125, 'logits/chosen': -1.0335191488265991, 'logits/rejected': -1.0376369953155518, 'epoch': 0.83} + 83%|████████▎ | 563/681 [23:55<04:59, 2.54s/it] 83%|████████▎ | 564/681 [23:58<04:54, 2.52s/it] {'loss': 1.0915, 'grad_norm': 40.964012145996094, 'learning_rate': 4.447860229910544e-08, 'fcm_dpo/beta': 0.0009554863208904862, 'fcm_dpo/q_t': 0.40960174798965454, 'fcm_dpo/delta': 0.004538660869002342, 'fcm_dpo/margin': 413.9423828125, 'margin_dpo/margin_mean': 413.9423828125, 'margin_dpo/margin_std': 534.1187744140625, 'logps/chosen': -794.373779296875, 'logps/rejected': -1227.285888671875, 'logps/ref_chosen': -74.26837921142578, 'logps/ref_rejected': -93.23818969726562, 'KL/chosen_KL_mean': -720.1054077148438, 'KL/rejected_KL_mean': -1134.0478515625, 'KL/mean': -927.0765380859375, 'KL/std': 552.9932250976562, 'logits/chosen': -1.0873092412948608, 'logits/rejected': -1.0802876949310303, 'epoch': 0.83} + 83%|████████▎ | 564/681 [23:58<04:54, 2.52s/it] 83%|████████▎ | 565/681 [24:00<04:55, 2.55s/it] {'loss': 1.1303, 'grad_norm': 60.46991729736328, 'learning_rate': 4.375063135042445e-08, 'fcm_dpo/beta': 0.0009499896550551057, 'fcm_dpo/q_t': 0.4094652831554413, 'fcm_dpo/delta': -0.022613905370235443, 'fcm_dpo/margin': 443.85186767578125, 'margin_dpo/margin_mean': 443.85186767578125, 'margin_dpo/margin_std': 780.3602294921875, 'logps/chosen': -842.2274169921875, 'logps/rejected': -1302.83837890625, 'logps/ref_chosen': -69.0199203491211, 'logps/ref_rejected': -85.7789306640625, 'KL/chosen_KL_mean': -773.20751953125, 'KL/rejected_KL_mean': -1217.059326171875, 'KL/mean': -995.1334228515625, 'KL/std': 649.26171875, 'logits/chosen': -1.0429582595825195, 'logits/rejected': -1.0467889308929443, 'epoch': 0.83} + 83%|████████▎ | 565/681 [24:00<04:55, 2.55s/it] 83%|████████▎ | 566/681 [24:03<04:59, 2.61s/it] {'loss': 1.1021, 'grad_norm': 32.33478927612305, 'learning_rate': 4.3028095264420525e-08, 'fcm_dpo/beta': 0.000945397128816694, 'fcm_dpo/q_t': 0.39865684509277344, 'fcm_dpo/delta': -0.06484313309192657, 'fcm_dpo/margin': 488.22674560546875, 'margin_dpo/margin_mean': 488.22674560546875, 'margin_dpo/margin_std': 773.612060546875, 'logps/chosen': -798.3458251953125, 'logps/rejected': -1323.896484375, 'logps/ref_chosen': -66.5453109741211, 'logps/ref_rejected': -103.86932373046875, 'KL/chosen_KL_mean': -731.8004760742188, 'KL/rejected_KL_mean': -1220.0272216796875, 'KL/mean': -975.913818359375, 'KL/std': 664.28466796875, 'logits/chosen': -1.0355273485183716, 'logits/rejected': -1.0623806715011597, 'epoch': 0.83} + 83%|████████▎ | 566/681 [24:03<04:59, 2.61s/it] 83%|████████▎ | 567/681 [24:06<04:55, 2.59s/it] {'loss': 1.1196, 'grad_norm': 40.58536911010742, 'learning_rate': 4.231101308059165e-08, 'fcm_dpo/beta': 0.0009434693492949009, 'fcm_dpo/q_t': 0.41841405630111694, 'fcm_dpo/delta': 0.053218990564346313, 'fcm_dpo/margin': 369.55865478515625, 'margin_dpo/margin_mean': 369.55865478515625, 'margin_dpo/margin_std': 497.5330505371094, 'logps/chosen': -746.8116455078125, 'logps/rejected': -1148.8829345703125, 'logps/ref_chosen': -52.85829544067383, 'logps/ref_rejected': -85.37095642089844, 'KL/chosen_KL_mean': -693.953369140625, 'KL/rejected_KL_mean': -1063.511962890625, 'KL/mean': -878.732666015625, 'KL/std': 454.4079284667969, 'logits/chosen': -1.1621546745300293, 'logits/rejected': -1.1755425930023193, 'epoch': 0.83} + 83%|████████▎ | 567/681 [24:06<04:55, 2.59s/it] 83%|████████▎ | 568/681 [24:08<04:45, 2.53s/it] {'loss': 1.0361, 'grad_norm': 34.73697280883789, 'learning_rate': 4.1599403694720145e-08, 'fcm_dpo/beta': 0.0009356088703498244, 'fcm_dpo/q_t': 0.39082616567611694, 'fcm_dpo/delta': -0.07556474953889847, 'fcm_dpo/margin': 504.4129333496094, 'margin_dpo/margin_mean': 504.41290283203125, 'margin_dpo/margin_std': 577.991943359375, 'logps/chosen': -742.05517578125, 'logps/rejected': -1290.3680419921875, 'logps/ref_chosen': -45.1923828125, 'logps/ref_rejected': -89.09236907958984, 'KL/chosen_KL_mean': -696.86279296875, 'KL/rejected_KL_mean': -1201.275634765625, 'KL/mean': -949.0692749023438, 'KL/std': 546.216064453125, 'logits/chosen': -1.0284827947616577, 'logits/rejected': -1.069136142730713, 'epoch': 0.83} + 83%|████████▎ | 568/681 [24:08<04:45, 2.53s/it] 84%|████████▎ | 569/681 [24:11<04:46, 2.56s/it] {'loss': 1.1639, 'grad_norm': 68.37850952148438, 'learning_rate': 4.089328585837512e-08, 'fcm_dpo/beta': 0.0009422843577340245, 'fcm_dpo/q_t': 0.4149819612503052, 'fcm_dpo/delta': 0.025416847318410873, 'fcm_dpo/margin': 397.50946044921875, 'margin_dpo/margin_mean': 397.50946044921875, 'margin_dpo/margin_std': 739.0003051757812, 'logps/chosen': -864.2625732421875, 'logps/rejected': -1277.15478515625, 'logps/ref_chosen': -63.72056198120117, 'logps/ref_rejected': -79.10325622558594, 'KL/chosen_KL_mean': -800.5419921875, 'KL/rejected_KL_mean': -1198.051513671875, 'KL/mean': -999.2967529296875, 'KL/std': 707.2173461914062, 'logits/chosen': -1.0683939456939697, 'logits/rejected': -1.0735970735549927, 'epoch': 0.84} + 84%|████████▎ | 569/681 [24:11<04:46, 2.56s/it] 84%|████████▎ | 570/681 [24:13<04:45, 2.57s/it] {'loss': 1.1407, 'grad_norm': 34.74723815917969, 'learning_rate': 4.019267817841834e-08, 'fcm_dpo/beta': 0.0009413023362867534, 'fcm_dpo/q_t': 0.4200833737850189, 'fcm_dpo/delta': 0.04106989875435829, 'fcm_dpo/margin': 382.88421630859375, 'margin_dpo/margin_mean': 382.8842468261719, 'margin_dpo/margin_std': 631.297607421875, 'logps/chosen': -802.755615234375, 'logps/rejected': -1206.167236328125, 'logps/ref_chosen': -61.61454391479492, 'logps/ref_rejected': -82.14186096191406, 'KL/chosen_KL_mean': -741.1410522460938, 'KL/rejected_KL_mean': -1124.025390625, 'KL/mean': -932.583251953125, 'KL/std': 565.6273803710938, 'logits/chosen': -1.177173137664795, 'logits/rejected': -1.1700718402862549, 'epoch': 0.84} + 84%|████████▎ | 570/681 [24:13<04:45, 2.57s/it] 84%|████████▍ | 571/681 [24:16<04:43, 2.58s/it] {'loss': 1.1144, 'grad_norm': 51.04140090942383, 'learning_rate': 3.9497599116513705e-08, 'fcm_dpo/beta': 0.00093979382654652, 'fcm_dpo/q_t': 0.4062590003013611, 'fcm_dpo/delta': -0.02226072922348976, 'fcm_dpo/margin': 448.14007568359375, 'margin_dpo/margin_mean': 448.14007568359375, 'margin_dpo/margin_std': 723.029052734375, 'logps/chosen': -828.529052734375, 'logps/rejected': -1314.9517822265625, 'logps/ref_chosen': -53.05406188964844, 'logps/ref_rejected': -91.33682250976562, 'KL/chosen_KL_mean': -775.4749755859375, 'KL/rejected_KL_mean': -1223.614990234375, 'KL/mean': -999.5449829101562, 'KL/std': 576.8671875, 'logits/chosen': -1.042744755744934, 'logits/rejected': -1.0550953149795532, 'epoch': 0.84} + 84%|████████▍ | 571/681 [24:16<04:43, 2.58s/it] 84%|████████▍ | 572/681 [24:18<04:33, 2.51s/it] {'loss': 1.12, 'grad_norm': 32.49717712402344, 'learning_rate': 3.880806698864086e-08, 'fcm_dpo/beta': 0.0009325648425146937, 'fcm_dpo/q_t': 0.4063248038291931, 'fcm_dpo/delta': -0.04140661656856537, 'fcm_dpo/margin': 471.0883483886719, 'margin_dpo/margin_mean': 471.08837890625, 'margin_dpo/margin_std': 815.548828125, 'logps/chosen': -852.5999755859375, 'logps/rejected': -1358.7861328125, 'logps/ref_chosen': -48.45928955078125, 'logps/ref_rejected': -83.55703735351562, 'KL/chosen_KL_mean': -804.140625, 'KL/rejected_KL_mean': -1275.22900390625, 'KL/mean': -1039.684814453125, 'KL/std': 669.639404296875, 'logits/chosen': -1.0944292545318604, 'logits/rejected': -1.1295243501663208, 'epoch': 0.84} + 84%|████████▍ | 572/681 [24:18<04:33, 2.51s/it] 84%|████████▍ | 573/681 [24:20<04:22, 2.43s/it] {'loss': 1.1041, 'grad_norm': 29.790285110473633, 'learning_rate': 3.812409996461275e-08, 'fcm_dpo/beta': 0.0009360272670164704, 'fcm_dpo/q_t': 0.41275107860565186, 'fcm_dpo/delta': 0.011164238676428795, 'fcm_dpo/margin': 415.822021484375, 'margin_dpo/margin_mean': 415.822021484375, 'margin_dpo/margin_std': 595.5224609375, 'logps/chosen': -843.8782348632812, 'logps/rejected': -1293.402587890625, 'logps/ref_chosen': -51.62262725830078, 'logps/ref_rejected': -85.32499694824219, 'KL/chosen_KL_mean': -792.255615234375, 'KL/rejected_KL_mean': -1208.07763671875, 'KL/mean': -1000.1666259765625, 'KL/std': 596.44189453125, 'logits/chosen': -1.1403576135635376, 'logits/rejected': -1.1524157524108887, 'epoch': 0.84} + 84%|████████▍ | 573/681 [24:20<04:22, 2.43s/it] 84%|████████▍ | 574/681 [24:23<04:29, 2.52s/it] {'loss': 1.0836, 'grad_norm': 44.21940612792969, 'learning_rate': 3.74457160675965e-08, 'fcm_dpo/beta': 0.0009335580398328602, 'fcm_dpo/q_t': 0.4033309519290924, 'fcm_dpo/delta': -0.025494040921330452, 'fcm_dpo/margin': 454.5906982421875, 'margin_dpo/margin_mean': 454.5906982421875, 'margin_dpo/margin_std': 616.3843994140625, 'logps/chosen': -749.768310546875, 'logps/rejected': -1246.12109375, 'logps/ref_chosen': -51.04446029663086, 'logps/ref_rejected': -92.80640411376953, 'KL/chosen_KL_mean': -698.723876953125, 'KL/rejected_KL_mean': -1153.314697265625, 'KL/mean': -926.019287109375, 'KL/std': 534.9053344726562, 'logits/chosen': -1.1207423210144043, 'logits/rejected': -1.147803783416748, 'epoch': 0.84} + 84%|████████▍ | 574/681 [24:23<04:29, 2.52s/it] 84%|████████▍ | 575/681 [24:26<04:31, 2.56s/it] {'loss': 1.1451, 'grad_norm': 47.46909713745117, 'learning_rate': 3.677293317363864e-08, 'fcm_dpo/beta': 0.0009250047150999308, 'fcm_dpo/q_t': 0.41284099221229553, 'fcm_dpo/delta': 0.012776091694831848, 'fcm_dpo/margin': 417.9283447265625, 'margin_dpo/margin_mean': 417.9283447265625, 'margin_dpo/margin_std': 721.6036376953125, 'logps/chosen': -859.8727416992188, 'logps/rejected': -1301.3970947265625, 'logps/ref_chosen': -71.7901382446289, 'logps/ref_rejected': -95.38619995117188, 'KL/chosen_KL_mean': -788.0826416015625, 'KL/rejected_KL_mean': -1206.0108642578125, 'KL/mean': -997.0467529296875, 'KL/std': 558.839599609375, 'logits/chosen': -0.9957201480865479, 'logits/rejected': -1.003206491470337, 'epoch': 0.84} + 84%|████████▍ | 575/681 [24:26<04:31, 2.56s/it] 85%|████████▍ | 576/681 [24:28<04:22, 2.50s/it] {'loss': 1.1847, 'grad_norm': 32.42967224121094, 'learning_rate': 3.6105769011194224e-08, 'fcm_dpo/beta': 0.0009450684301555157, 'fcm_dpo/q_t': 0.4328186810016632, 'fcm_dpo/delta': 0.10788638889789581, 'fcm_dpo/margin': 312.60577392578125, 'margin_dpo/margin_mean': 312.60577392578125, 'margin_dpo/margin_std': 577.78369140625, 'logps/chosen': -779.1849365234375, 'logps/rejected': -1138.281982421875, 'logps/ref_chosen': -54.262962341308594, 'logps/ref_rejected': -100.75428009033203, 'KL/chosen_KL_mean': -724.9219970703125, 'KL/rejected_KL_mean': -1037.5277099609375, 'KL/mean': -881.224853515625, 'KL/std': 497.8431396484375, 'logits/chosen': -1.0785043239593506, 'logits/rejected': -1.1083261966705322, 'epoch': 0.85} + 85%|████████▍ | 576/681 [24:28<04:22, 2.50s/it] 85%|████████▍ | 577/681 [24:30<04:17, 2.47s/it] {'loss': 1.1236, 'grad_norm': 30.890453338623047, 'learning_rate': 3.5444241160659304e-08, 'fcm_dpo/beta': 0.0009535005083307624, 'fcm_dpo/q_t': 0.41230309009552, 'fcm_dpo/delta': 0.01057706493884325, 'fcm_dpo/margin': 408.823486328125, 'margin_dpo/margin_mean': 408.8235168457031, 'margin_dpo/margin_std': 617.4876098632812, 'logps/chosen': -737.3373413085938, 'logps/rejected': -1168.32177734375, 'logps/ref_chosen': -61.909706115722656, 'logps/ref_rejected': -84.07069396972656, 'KL/chosen_KL_mean': -675.4276123046875, 'KL/rejected_KL_mean': -1084.251220703125, 'KL/mean': -879.83935546875, 'KL/std': 572.3829345703125, 'logits/chosen': -1.104528546333313, 'logits/rejected': -1.0938575267791748, 'epoch': 0.85} + 85%|████████▍ | 577/681 [24:31<04:17, 2.47s/it] 85%|████████▍ | 578/681 [24:33<04:19, 2.52s/it] {'loss': 1.0739, 'grad_norm': 39.44662094116211, 'learning_rate': 3.478836705390808e-08, 'fcm_dpo/beta': 0.0009461954468861222, 'fcm_dpo/q_t': 0.40265679359436035, 'fcm_dpo/delta': -0.028605271130800247, 'fcm_dpo/margin': 450.94769287109375, 'margin_dpo/margin_mean': 450.94769287109375, 'margin_dpo/margin_std': 569.8036499023438, 'logps/chosen': -702.3759765625, 'logps/rejected': -1187.4962158203125, 'logps/ref_chosen': -49.26368713378906, 'logps/ref_rejected': -83.4362564086914, 'KL/chosen_KL_mean': -653.1123046875, 'KL/rejected_KL_mean': -1104.0599365234375, 'KL/mean': -878.5861206054688, 'KL/std': 559.1902465820312, 'logits/chosen': -0.960713803768158, 'logits/rejected': -0.9943492412567139, 'epoch': 0.85} + 85%|████████▍ | 578/681 [24:33<04:19, 2.52s/it] 85%|████████▌ | 579/681 [24:36<04:25, 2.60s/it] {'loss': 1.2315, 'grad_norm': 62.6992301940918, 'learning_rate': 3.41381639738331e-08, 'fcm_dpo/beta': 0.0009595105657353997, 'fcm_dpo/q_t': 0.44163453578948975, 'fcm_dpo/delta': 0.050605472177267075, 'fcm_dpo/margin': 263.0865478515625, 'margin_dpo/margin_mean': 263.0865478515625, 'margin_dpo/margin_std': 606.66259765625, 'logps/chosen': -799.587890625, 'logps/rejected': -1098.576171875, 'logps/ref_chosen': -58.88581848144531, 'logps/ref_rejected': -94.78762817382812, 'KL/chosen_KL_mean': -740.7020263671875, 'KL/rejected_KL_mean': -1003.78857421875, 'KL/mean': -872.2453002929688, 'KL/std': 550.201416015625, 'logits/chosen': -0.9965687394142151, 'logits/rejected': -0.9950494766235352, 'epoch': 0.85} + 85%|████████▌ | 579/681 [24:36<04:25, 2.60s/it] 85%|████████▌ | 580/681 [24:39<04:24, 2.62s/it] {'loss': 1.0568, 'grad_norm': 31.18793487548828, 'learning_rate': 3.349364905389032e-08, 'fcm_dpo/beta': 0.0009441368165425956, 'fcm_dpo/q_t': 0.3952370882034302, 'fcm_dpo/delta': -0.08517200499773026, 'fcm_dpo/margin': 508.99420166015625, 'margin_dpo/margin_mean': 508.9941711425781, 'margin_dpo/margin_std': 707.2589111328125, 'logps/chosen': -585.2516479492188, 'logps/rejected': -1127.29736328125, 'logps/ref_chosen': -48.70683670043945, 'logps/ref_rejected': -81.7583999633789, 'KL/chosen_KL_mean': -536.5447998046875, 'KL/rejected_KL_mean': -1045.5389404296875, 'KL/mean': -791.0418701171875, 'KL/std': 616.0582275390625, 'logits/chosen': -0.8859065771102905, 'logits/rejected': -0.9189168214797974, 'epoch': 0.85} + 85%|████████▌ | 580/681 [24:39<04:24, 2.62s/it] 85%|████████▌ | 581/681 [24:41<04:17, 2.57s/it] {'loss': 1.1564, 'grad_norm': 41.60260009765625, 'learning_rate': 3.285483927764726e-08, 'fcm_dpo/beta': 0.0009529366507194936, 'fcm_dpo/q_t': 0.4243428111076355, 'fcm_dpo/delta': 0.06105329841375351, 'fcm_dpo/margin': 357.78289794921875, 'margin_dpo/margin_mean': 357.78289794921875, 'margin_dpo/margin_std': 635.5386962890625, 'logps/chosen': -798.9173583984375, 'logps/rejected': -1186.213623046875, 'logps/ref_chosen': -62.22235107421875, 'logps/ref_rejected': -91.73568725585938, 'KL/chosen_KL_mean': -736.6950073242188, 'KL/rejected_KL_mean': -1094.477783203125, 'KL/mean': -915.5865478515625, 'KL/std': 581.46533203125, 'logits/chosen': -1.1242549419403076, 'logits/rejected': -1.1350033283233643, 'epoch': 0.85} + 85%|████████▌ | 581/681 [24:41<04:17, 2.57s/it] 85%|████████▌ | 582/681 [24:44<04:15, 2.58s/it] {'loss': 1.1143, 'grad_norm': 39.270023345947266, 'learning_rate': 3.222175147833556e-08, 'fcm_dpo/beta': 0.0009470300283282995, 'fcm_dpo/q_t': 0.41153034567832947, 'fcm_dpo/delta': -0.08303224295377731, 'fcm_dpo/margin': 402.34197998046875, 'margin_dpo/margin_mean': 402.34197998046875, 'margin_dpo/margin_std': 554.0859375, 'logps/chosen': -707.0472412109375, 'logps/rejected': -1161.230224609375, 'logps/ref_chosen': -58.228660583496094, 'logps/ref_rejected': -110.06959533691406, 'KL/chosen_KL_mean': -648.818603515625, 'KL/rejected_KL_mean': -1051.16064453125, 'KL/mean': -849.9896240234375, 'KL/std': 508.7437744140625, 'logits/chosen': -1.0631489753723145, 'logits/rejected': -1.087165355682373, 'epoch': 0.85} + 85%|████████▌ | 582/681 [24:44<04:15, 2.58s/it] 86%|████████▌ | 583/681 [24:46<04:15, 2.61s/it] {'loss': 1.2665, 'grad_norm': 69.85670471191406, 'learning_rate': 3.159440233840763e-08, 'fcm_dpo/beta': 0.0009388748439960182, 'fcm_dpo/q_t': 0.44874462485313416, 'fcm_dpo/delta': -0.0015546621289104223, 'fcm_dpo/margin': 243.4691162109375, 'margin_dpo/margin_mean': 243.4691162109375, 'margin_dpo/margin_std': 655.7301025390625, 'logps/chosen': -790.9234619140625, 'logps/rejected': -1065.93359375, 'logps/ref_chosen': -56.86286163330078, 'logps/ref_rejected': -88.4039306640625, 'KL/chosen_KL_mean': -734.060546875, 'KL/rejected_KL_mean': -977.5296630859375, 'KL/mean': -855.7951049804688, 'KL/std': 547.7103271484375, 'logits/chosen': -1.029801845550537, 'logits/rejected': -1.0272910594940186, 'epoch': 0.86} + 86%|████████▌ | 583/681 [24:46<04:15, 2.61s/it] 86%|████████▌ | 584/681 [24:49<04:07, 2.55s/it] {'loss': 1.0467, 'grad_norm': 31.617774963378906, 'learning_rate': 3.0972808389096635e-08, 'fcm_dpo/beta': 0.0009302167454734445, 'fcm_dpo/q_t': 0.3950466513633728, 'fcm_dpo/delta': -0.06771711260080338, 'fcm_dpo/margin': 499.46441650390625, 'margin_dpo/margin_mean': 499.46441650390625, 'margin_dpo/margin_std': 599.928466796875, 'logps/chosen': -671.107421875, 'logps/rejected': -1211.3072509765625, 'logps/ref_chosen': -56.90068054199219, 'logps/ref_rejected': -97.63606262207031, 'KL/chosen_KL_mean': -614.206787109375, 'KL/rejected_KL_mean': -1113.671142578125, 'KL/mean': -863.93896484375, 'KL/std': 565.6908569335938, 'logits/chosen': -1.0071210861206055, 'logits/rejected': -1.0183664560317993, 'epoch': 0.86} + 86%|████████▌ | 584/681 [24:49<04:07, 2.55s/it] 86%|████████▌ | 585/681 [24:51<04:03, 2.54s/it] {'loss': 1.1214, 'grad_norm': 37.25502395629883, 'learning_rate': 3.035698600998121e-08, 'fcm_dpo/beta': 0.0009213717421516776, 'fcm_dpo/q_t': 0.4065605700016022, 'fcm_dpo/delta': -0.016240080818533897, 'fcm_dpo/margin': 450.80718994140625, 'margin_dpo/margin_mean': 450.8072509765625, 'margin_dpo/margin_std': 750.4278564453125, 'logps/chosen': -780.543701171875, 'logps/rejected': -1254.5465087890625, 'logps/ref_chosen': -60.973968505859375, 'logps/ref_rejected': -84.16952514648438, 'KL/chosen_KL_mean': -719.5697631835938, 'KL/rejected_KL_mean': -1170.376953125, 'KL/mean': -944.973388671875, 'KL/std': 628.8306884765625, 'logits/chosen': -1.0849614143371582, 'logits/rejected': -1.1072694063186646, 'epoch': 0.86} + 86%|████████▌ | 585/681 [24:51<04:03, 2.54s/it] 86%|████████▌ | 586/681 [24:54<04:00, 2.54s/it] {'loss': 1.1966, 'grad_norm': 32.1750602722168, 'learning_rate': 2.974695142855388e-08, 'fcm_dpo/beta': 0.0009377297828905284, 'fcm_dpo/q_t': 0.4342753291130066, 'fcm_dpo/delta': 0.11785154044628143, 'fcm_dpo/margin': 304.63677978515625, 'margin_dpo/margin_mean': 304.63677978515625, 'margin_dpo/margin_std': 600.1655883789062, 'logps/chosen': -821.101318359375, 'logps/rejected': -1160.68505859375, 'logps/ref_chosen': -56.85559844970703, 'logps/ref_rejected': -91.80261993408203, 'KL/chosen_KL_mean': -764.2457275390625, 'KL/rejected_KL_mean': -1068.88232421875, 'KL/mean': -916.5640869140625, 'KL/std': 550.64111328125, 'logits/chosen': -1.0714232921600342, 'logits/rejected': -1.0927150249481201, 'epoch': 0.86} + 86%|████████▌ | 586/681 [24:54<04:00, 2.54s/it] 86%|████████▌ | 587/681 [24:56<03:59, 2.54s/it] {'loss': 1.1065, 'grad_norm': 39.31088638305664, 'learning_rate': 2.9142720719793122e-08, 'fcm_dpo/beta': 0.0009496827842667699, 'fcm_dpo/q_t': 0.4112315773963928, 'fcm_dpo/delta': 0.01429927907884121, 'fcm_dpo/margin': 406.54833984375, 'margin_dpo/margin_mean': 406.54833984375, 'margin_dpo/margin_std': 574.6107177734375, 'logps/chosen': -576.4086303710938, 'logps/rejected': -1020.8892211914062, 'logps/ref_chosen': -44.69159698486328, 'logps/ref_rejected': -82.62385559082031, 'KL/chosen_KL_mean': -531.717041015625, 'KL/rejected_KL_mean': -938.265380859375, 'KL/mean': -734.9912109375, 'KL/std': 562.0120849609375, 'logits/chosen': -1.075244426727295, 'logits/rejected': -1.0990477800369263, 'epoch': 0.86} + 86%|████████▌ | 587/681 [24:56<03:59, 2.54s/it] 86%|████████▋ | 588/681 [24:59<03:50, 2.47s/it] {'loss': 1.1371, 'grad_norm': 29.434553146362305, 'learning_rate': 2.8544309805740018e-08, 'fcm_dpo/beta': 0.0009533903794363141, 'fcm_dpo/q_t': 0.4219910204410553, 'fcm_dpo/delta': 0.06662734597921371, 'fcm_dpo/margin': 351.92852783203125, 'margin_dpo/margin_mean': 351.92852783203125, 'margin_dpo/margin_std': 510.2389831542969, 'logps/chosen': -741.4427490234375, 'logps/rejected': -1150.4461669921875, 'logps/ref_chosen': -50.29494857788086, 'logps/ref_rejected': -107.36988067626953, 'KL/chosen_KL_mean': -691.1477661132812, 'KL/rejected_KL_mean': -1043.0762939453125, 'KL/mean': -867.112060546875, 'KL/std': 485.6700134277344, 'logits/chosen': -1.0492743253707886, 'logits/rejected': -1.0743939876556396, 'epoch': 0.86} + 86%|████████▋ | 588/681 [24:59<03:50, 2.47s/it] 86%|████████▋ | 589/681 [25:01<03:47, 2.48s/it] {'loss': 1.0566, 'grad_norm': 34.32419204711914, 'learning_rate': 2.7951734455078786e-08, 'fcm_dpo/beta': 0.0009546533692628145, 'fcm_dpo/q_t': 0.3972678780555725, 'fcm_dpo/delta': -0.05010441318154335, 'fcm_dpo/margin': 469.16448974609375, 'margin_dpo/margin_mean': 469.16448974609375, 'margin_dpo/margin_std': 573.5706787109375, 'logps/chosen': -740.38232421875, 'logps/rejected': -1261.2723388671875, 'logps/ref_chosen': -59.929908752441406, 'logps/ref_rejected': -111.65534973144531, 'KL/chosen_KL_mean': -680.4524536132812, 'KL/rejected_KL_mean': -1149.616943359375, 'KL/mean': -915.0347290039062, 'KL/std': 550.6396484375, 'logits/chosen': -1.0105873346328735, 'logits/rejected': -1.0250484943389893, 'epoch': 0.86} + 86%|████████▋ | 589/681 [25:01<03:47, 2.48s/it] 87%|████████▋ | 590/681 [25:04<03:42, 2.45s/it] {'loss': 1.0732, 'grad_norm': 26.10135269165039, 'learning_rate': 2.736501028272095e-08, 'fcm_dpo/beta': 0.0009457060368731618, 'fcm_dpo/q_t': 0.4021071493625641, 'fcm_dpo/delta': -0.034474555402994156, 'fcm_dpo/margin': 457.81378173828125, 'margin_dpo/margin_mean': 457.81378173828125, 'margin_dpo/margin_std': 602.8309936523438, 'logps/chosen': -667.2130126953125, 'logps/rejected': -1175.2799072265625, 'logps/ref_chosen': -55.80979537963867, 'logps/ref_rejected': -106.06282043457031, 'KL/chosen_KL_mean': -611.4031982421875, 'KL/rejected_KL_mean': -1069.217041015625, 'KL/mean': -840.3101196289062, 'KL/std': 545.7060546875, 'logits/chosen': -0.9935860633850098, 'logits/rejected': -1.0191072225570679, 'epoch': 0.87} + 87%|████████▋ | 590/681 [25:04<03:42, 2.45s/it] 87%|████████▋ | 591/681 [25:06<03:32, 2.36s/it] {'loss': 1.1031, 'grad_norm': 33.07461166381836, 'learning_rate': 2.678415274939408e-08, 'fcm_dpo/beta': 0.0009449812932871282, 'fcm_dpo/q_t': 0.4091309905052185, 'fcm_dpo/delta': 0.007675642147660255, 'fcm_dpo/margin': 415.48028564453125, 'margin_dpo/margin_mean': 415.48028564453125, 'margin_dpo/margin_std': 583.4301147460938, 'logps/chosen': -719.00390625, 'logps/rejected': -1162.02978515625, 'logps/ref_chosen': -56.24061965942383, 'logps/ref_rejected': -83.78629302978516, 'KL/chosen_KL_mean': -662.7632446289062, 'KL/rejected_KL_mean': -1078.24365234375, 'KL/mean': -870.50341796875, 'KL/std': 519.09814453125, 'logits/chosen': -1.0695421695709229, 'logits/rejected': -1.0605621337890625, 'epoch': 0.87} + 87%|████████▋ | 591/681 [25:06<03:32, 2.36s/it] 87%|████████▋ | 592/681 [25:08<03:37, 2.44s/it] {'loss': 1.1759, 'grad_norm': 33.25096893310547, 'learning_rate': 2.6209177161234442e-08, 'fcm_dpo/beta': 0.0009508398361504078, 'fcm_dpo/q_t': 0.4189043939113617, 'fcm_dpo/delta': 0.039905961602926254, 'fcm_dpo/margin': 380.2485656738281, 'margin_dpo/margin_mean': 380.24853515625, 'margin_dpo/margin_std': 739.9800415039062, 'logps/chosen': -767.0955810546875, 'logps/rejected': -1175.13671875, 'logps/ref_chosen': -47.94025421142578, 'logps/ref_rejected': -75.73287963867188, 'KL/chosen_KL_mean': -719.1552734375, 'KL/rejected_KL_mean': -1099.40380859375, 'KL/mean': -909.2796020507812, 'KL/std': 545.5037841796875, 'logits/chosen': -1.052534818649292, 'logits/rejected': -1.05497407913208, 'epoch': 0.87} + 87%|████████▋ | 592/681 [25:08<03:37, 2.44s/it] 87%|████████▋ | 593/681 [25:11<03:38, 2.49s/it] {'loss': 1.2163, 'grad_norm': 60.29446029663086, 'learning_rate': 2.564009866938349e-08, 'fcm_dpo/beta': 0.0009585937950760126, 'fcm_dpo/q_t': 0.43642657995224, 'fcm_dpo/delta': 0.026102518662810326, 'fcm_dpo/margin': 297.45526123046875, 'margin_dpo/margin_mean': 297.45526123046875, 'margin_dpo/margin_std': 659.6752319335938, 'logps/chosen': -734.453125, 'logps/rejected': -1044.125732421875, 'logps/ref_chosen': -48.690757751464844, 'logps/ref_rejected': -60.90800094604492, 'KL/chosen_KL_mean': -685.7623901367188, 'KL/rejected_KL_mean': -983.2177124023438, 'KL/mean': -834.489990234375, 'KL/std': 593.387939453125, 'logits/chosen': -0.925950825214386, 'logits/rejected': -0.916424036026001, 'epoch': 0.87} + 87%|████████▋ | 593/681 [25:11<03:38, 2.49s/it] 87%|████████▋ | 594/681 [25:13<03:36, 2.49s/it] {'loss': 1.1458, 'grad_norm': 36.9598388671875, 'learning_rate': 2.5076932269588708e-08, 'fcm_dpo/beta': 0.0009541836334392428, 'fcm_dpo/q_t': 0.41900911927223206, 'fcm_dpo/delta': -0.046220000833272934, 'fcm_dpo/margin': 371.89556884765625, 'margin_dpo/margin_mean': 371.8955993652344, 'margin_dpo/margin_std': 604.8126831054688, 'logps/chosen': -713.7626953125, 'logps/rejected': -1116.822998046875, 'logps/ref_chosen': -54.93488693237305, 'logps/ref_rejected': -86.09967803955078, 'KL/chosen_KL_mean': -658.8277587890625, 'KL/rejected_KL_mean': -1030.723388671875, 'KL/mean': -844.7755737304688, 'KL/std': 573.47119140625, 'logits/chosen': -1.0531035661697388, 'logits/rejected': -1.044195294380188, 'epoch': 0.87} + 87%|████████▋ | 594/681 [25:13<03:36, 2.49s/it] 87%|████████▋ | 595/681 [25:16<03:36, 2.52s/it] {'loss': 1.0908, 'grad_norm': 39.55894470214844, 'learning_rate': 2.451969280180849e-08, 'fcm_dpo/beta': 0.0009470278164371848, 'fcm_dpo/q_t': 0.4093731641769409, 'fcm_dpo/delta': -0.00846764538437128, 'fcm_dpo/margin': 430.8812255859375, 'margin_dpo/margin_mean': 430.8812255859375, 'margin_dpo/margin_std': 596.7766723632812, 'logps/chosen': -667.1563720703125, 'logps/rejected': -1129.24462890625, 'logps/ref_chosen': -49.4204216003418, 'logps/ref_rejected': -80.62731170654297, 'KL/chosen_KL_mean': -617.7359619140625, 'KL/rejected_KL_mean': -1048.6171875, 'KL/mean': -833.1766357421875, 'KL/std': 538.6633911132812, 'logits/chosen': -1.0494012832641602, 'logits/rejected': -1.0700163841247559, 'epoch': 0.87} + 87%|████████▋ | 595/681 [25:16<03:36, 2.52s/it] 88%|████████▊ | 596/681 [25:19<03:36, 2.55s/it] {'loss': 1.2079, 'grad_norm': 68.12952423095703, 'learning_rate': 2.396839494982103e-08, 'fcm_dpo/beta': 0.0009654526365920901, 'fcm_dpo/q_t': 0.43718546628952026, 'fcm_dpo/delta': 0.11698772758245468, 'fcm_dpo/margin': 296.531982421875, 'margin_dpo/margin_mean': 296.531982421875, 'margin_dpo/margin_std': 639.1839599609375, 'logps/chosen': -775.834716796875, 'logps/rejected': -1092.666015625, 'logps/ref_chosen': -59.791683197021484, 'logps/ref_rejected': -80.09111785888672, 'KL/chosen_KL_mean': -716.04296875, 'KL/rejected_KL_mean': -1012.574951171875, 'KL/mean': -864.3089599609375, 'KL/std': 536.0817260742188, 'logits/chosen': -1.018229603767395, 'logits/rejected': -0.9841310381889343, 'epoch': 0.88} + 88%|████████▊ | 596/681 [25:19<03:36, 2.55s/it] 88%|████████▊ | 597/681 [25:21<03:34, 2.56s/it] {'loss': 1.0533, 'grad_norm': 26.535120010375977, 'learning_rate': 2.3423053240837514e-08, 'fcm_dpo/beta': 0.0009458234999328852, 'fcm_dpo/q_t': 0.3902336359024048, 'fcm_dpo/delta': -0.10513734817504883, 'fcm_dpo/margin': 525.3020629882812, 'margin_dpo/margin_mean': 525.3020629882812, 'margin_dpo/margin_std': 688.6737060546875, 'logps/chosen': -740.5325317382812, 'logps/rejected': -1309.267578125, 'logps/ref_chosen': -57.26078796386719, 'logps/ref_rejected': -100.6937255859375, 'KL/chosen_KL_mean': -683.271728515625, 'KL/rejected_KL_mean': -1208.5738525390625, 'KL/mean': -945.9227905273438, 'KL/std': 629.5780639648438, 'logits/chosen': -0.9855027198791504, 'logits/rejected': -1.036144733428955, 'epoch': 0.88} + 88%|████████▊ | 597/681 [25:21<03:34, 2.56s/it] 88%|████████▊ | 598/681 [25:24<03:27, 2.50s/it] {'loss': 1.1156, 'grad_norm': 40.530555725097656, 'learning_rate': 2.2883682045119062e-08, 'fcm_dpo/beta': 0.0009448026539757848, 'fcm_dpo/q_t': 0.41108059883117676, 'fcm_dpo/delta': 0.012614801526069641, 'fcm_dpo/margin': 409.35821533203125, 'margin_dpo/margin_mean': 409.3581848144531, 'margin_dpo/margin_std': 592.7867431640625, 'logps/chosen': -737.45263671875, 'logps/rejected': -1183.7362060546875, 'logps/ref_chosen': -52.51850509643555, 'logps/ref_rejected': -89.44385528564453, 'KL/chosen_KL_mean': -684.9342041015625, 'KL/rejected_KL_mean': -1094.292236328125, 'KL/mean': -889.61328125, 'KL/std': 542.2847900390625, 'logits/chosen': -1.0675361156463623, 'logits/rejected': -1.0781702995300293, 'epoch': 0.88} + 88%|████████▊ | 598/681 [25:24<03:27, 2.50s/it] 88%|████████▊ | 599/681 [25:26<03:29, 2.56s/it] {'loss': 1.1528, 'grad_norm': 32.54823303222656, 'learning_rate': 2.2350295575598367e-08, 'fcm_dpo/beta': 0.0009433372761122882, 'fcm_dpo/q_t': 0.4230959117412567, 'fcm_dpo/delta': -0.04999526962637901, 'fcm_dpo/margin': 336.27593994140625, 'margin_dpo/margin_mean': 336.27593994140625, 'margin_dpo/margin_std': 495.86431884765625, 'logps/chosen': -749.1466064453125, 'logps/rejected': -1118.598388671875, 'logps/ref_chosen': -49.802677154541016, 'logps/ref_rejected': -82.978515625, 'KL/chosen_KL_mean': -699.3439331054688, 'KL/rejected_KL_mean': -1035.619873046875, 'KL/mean': -867.4818725585938, 'KL/std': 521.607421875, 'logits/chosen': -1.0585663318634033, 'logits/rejected': -1.0668901205062866, 'epoch': 0.88} + 88%|████████▊ | 599/681 [25:26<03:29, 2.56s/it] 88%|████████▊ | 600/681 [25:29<03:30, 2.60s/it] {'loss': 1.1881, 'grad_norm': 32.78029251098633, 'learning_rate': 2.1822907887504932e-08, 'fcm_dpo/beta': 0.0009553448762744665, 'fcm_dpo/q_t': 0.4312170147895813, 'fcm_dpo/delta': 0.0948304608464241, 'fcm_dpo/margin': 322.5916748046875, 'margin_dpo/margin_mean': 322.5916748046875, 'margin_dpo/margin_std': 638.26123046875, 'logps/chosen': -810.03076171875, 'logps/rejected': -1151.64404296875, 'logps/ref_chosen': -66.43487548828125, 'logps/ref_rejected': -85.45649719238281, 'KL/chosen_KL_mean': -743.595947265625, 'KL/rejected_KL_mean': -1066.1876220703125, 'KL/mean': -904.8917846679688, 'KL/std': 529.2329711914062, 'logits/chosen': -1.1024036407470703, 'logits/rejected': -1.0984766483306885, 'epoch': 0.88} + 88%|████████▊ | 600/681 [25:29<03:30, 2.60s/it] 88%|████████▊ | 601/681 [25:31<03:21, 2.52s/it] {'loss': 1.0942, 'grad_norm': 36.36787796020508, 'learning_rate': 2.1301532877994742e-08, 'fcm_dpo/beta': 0.0009599350159987807, 'fcm_dpo/q_t': 0.40683579444885254, 'fcm_dpo/delta': -0.0015968242660164833, 'fcm_dpo/margin': 418.25567626953125, 'margin_dpo/margin_mean': 418.2556457519531, 'margin_dpo/margin_std': 568.2978515625, 'logps/chosen': -826.8714599609375, 'logps/rejected': -1280.684326171875, 'logps/ref_chosen': -59.13361358642578, 'logps/ref_rejected': -94.69093322753906, 'KL/chosen_KL_mean': -767.73779296875, 'KL/rejected_KL_mean': -1185.993408203125, 'KL/mean': -976.8656005859375, 'KL/std': 572.8872680664062, 'logits/chosen': -1.050248622894287, 'logits/rejected': -1.0717060565948486, 'epoch': 0.88} + 88%|████████▊ | 601/681 [25:31<03:21, 2.52s/it] 88%|████████▊ | 602/681 [25:34<03:19, 2.52s/it] {'loss': 1.0206, 'grad_norm': 62.27396774291992, 'learning_rate': 2.0786184285784298e-08, 'fcm_dpo/beta': 0.0009524415945634246, 'fcm_dpo/q_t': 0.38715463876724243, 'fcm_dpo/delta': -0.10132233053445816, 'fcm_dpo/margin': 520.951171875, 'margin_dpo/margin_mean': 520.9512329101562, 'margin_dpo/margin_std': 575.708740234375, 'logps/chosen': -586.4228515625, 'logps/rejected': -1146.4490966796875, 'logps/ref_chosen': -48.59352111816406, 'logps/ref_rejected': -87.6685562133789, 'KL/chosen_KL_mean': -537.829345703125, 'KL/rejected_KL_mean': -1058.780517578125, 'KL/mean': -798.304931640625, 'KL/std': 537.489990234375, 'logits/chosen': -1.0703651905059814, 'logits/rejected': -1.1087815761566162, 'epoch': 0.88} + 88%|████████▊ | 602/681 [25:34<03:19, 2.52s/it] 89%|████████▊ | 603/681 [25:36<03:14, 2.49s/it] {'loss': 1.0872, 'grad_norm': 34.53113555908203, 'learning_rate': 2.0276875690788204e-08, 'fcm_dpo/beta': 0.0009342863922938704, 'fcm_dpo/q_t': 0.4025436341762543, 'fcm_dpo/delta': -0.04748653993010521, 'fcm_dpo/margin': 476.6319580078125, 'margin_dpo/margin_mean': 476.6319580078125, 'margin_dpo/margin_std': 712.30078125, 'logps/chosen': -743.2083129882812, 'logps/rejected': -1249.751220703125, 'logps/ref_chosen': -70.41461944580078, 'logps/ref_rejected': -100.32559967041016, 'KL/chosen_KL_mean': -672.793701171875, 'KL/rejected_KL_mean': -1149.425537109375, 'KL/mean': -911.109619140625, 'KL/std': 605.7391967773438, 'logits/chosen': -1.0783579349517822, 'logits/rejected': -1.0710588693618774, 'epoch': 0.89} + 89%|████████▊ | 603/681 [25:36<03:14, 2.49s/it] 89%|████████▊ | 604/681 [25:39<03:22, 2.63s/it] {'loss': 1.0614, 'grad_norm': 40.38688278198242, 'learning_rate': 1.977362051376158e-08, 'fcm_dpo/beta': 0.0009213722078129649, 'fcm_dpo/q_t': 0.39393433928489685, 'fcm_dpo/delta': -0.08345615863800049, 'fcm_dpo/margin': 520.3671264648438, 'margin_dpo/margin_mean': 520.3671875, 'margin_dpo/margin_std': 716.9524536132812, 'logps/chosen': -706.8075561523438, 'logps/rejected': -1272.571044921875, 'logps/ref_chosen': -46.45808029174805, 'logps/ref_rejected': -91.8544921875, 'KL/chosen_KL_mean': -660.3494873046875, 'KL/rejected_KL_mean': -1180.716552734375, 'KL/mean': -920.5330810546875, 'KL/std': 584.669677734375, 'logits/chosen': -1.0343176126480103, 'logits/rejected': -1.0740426778793335, 'epoch': 0.89} + 89%|████████▊ | 604/681 [25:39<03:22, 2.63s/it] 89%|████████▉ | 605/681 [25:42<03:19, 2.63s/it] {'loss': 1.1514, 'grad_norm': 31.172998428344727, 'learning_rate': 1.9276432015946446e-08, 'fcm_dpo/beta': 0.0009236353216692805, 'fcm_dpo/q_t': 0.4245484471321106, 'fcm_dpo/delta': 0.06231696531176567, 'fcm_dpo/margin': 367.9199523925781, 'margin_dpo/margin_mean': 367.919921875, 'margin_dpo/margin_std': 631.5323486328125, 'logps/chosen': -748.0568237304688, 'logps/rejected': -1152.032470703125, 'logps/ref_chosen': -66.24933624267578, 'logps/ref_rejected': -102.30496978759766, 'KL/chosen_KL_mean': -681.8074951171875, 'KL/rejected_KL_mean': -1049.7274169921875, 'KL/mean': -865.7674560546875, 'KL/std': 528.2335205078125, 'logits/chosen': -1.030253291130066, 'logits/rejected': -1.046684980392456, 'epoch': 0.89} + 89%|████████▉ | 605/681 [25:42<03:19, 2.63s/it] 89%|████████▉ | 606/681 [25:44<03:12, 2.56s/it] {'loss': 1.1119, 'grad_norm': 34.67936706542969, 'learning_rate': 1.8785323298722093e-08, 'fcm_dpo/beta': 0.0009358040988445282, 'fcm_dpo/q_t': 0.41396206617355347, 'fcm_dpo/delta': 0.021479565650224686, 'fcm_dpo/margin': 404.9098205566406, 'margin_dpo/margin_mean': 404.90985107421875, 'margin_dpo/margin_std': 580.0758666992188, 'logps/chosen': -745.1249389648438, 'logps/rejected': -1193.587158203125, 'logps/ref_chosen': -54.819122314453125, 'logps/ref_rejected': -98.37146759033203, 'KL/chosen_KL_mean': -690.3057861328125, 'KL/rejected_KL_mean': -1095.2156982421875, 'KL/mean': -892.7607421875, 'KL/std': 546.4476928710938, 'logits/chosen': -1.0415606498718262, 'logits/rejected': -1.0564000606536865, 'epoch': 0.89} + 89%|████████▉ | 606/681 [25:44<03:12, 2.56s/it] 89%|████████▉ | 607/681 [25:47<03:11, 2.59s/it] {'loss': 1.1634, 'grad_norm': 25.185142517089844, 'learning_rate': 1.8300307303259904e-08, 'fcm_dpo/beta': 0.0009470410877838731, 'fcm_dpo/q_t': 0.4270426332950592, 'fcm_dpo/delta': 0.08123958110809326, 'fcm_dpo/margin': 338.9346923828125, 'margin_dpo/margin_mean': 338.9346923828125, 'margin_dpo/margin_std': 585.0699462890625, 'logps/chosen': -763.7471923828125, 'logps/rejected': -1124.375, 'logps/ref_chosen': -58.08403778076172, 'logps/ref_rejected': -79.777099609375, 'KL/chosen_KL_mean': -705.6632080078125, 'KL/rejected_KL_mean': -1044.597900390625, 'KL/mean': -875.1304931640625, 'KL/std': 545.179931640625, 'logits/chosen': -1.0110514163970947, 'logits/rejected': -0.9996987581253052, 'epoch': 0.89} + 89%|████████▉ | 607/681 [25:47<03:11, 2.59s/it] 89%|████████▉ | 608/681 [25:49<03:04, 2.53s/it] {'loss': 1.0917, 'grad_norm': 29.697845458984375, 'learning_rate': 1.7821396810182437e-08, 'fcm_dpo/beta': 0.0009526251233182847, 'fcm_dpo/q_t': 0.410768061876297, 'fcm_dpo/delta': 0.016666967421770096, 'fcm_dpo/margin': 402.94073486328125, 'margin_dpo/margin_mean': 402.94073486328125, 'margin_dpo/margin_std': 497.81024169921875, 'logps/chosen': -677.8575439453125, 'logps/rejected': -1118.120849609375, 'logps/ref_chosen': -57.450836181640625, 'logps/ref_rejected': -94.77339172363281, 'KL/chosen_KL_mean': -620.40673828125, 'KL/rejected_KL_mean': -1023.3474731445312, 'KL/mean': -821.8770751953125, 'KL/std': 486.0280456542969, 'logits/chosen': -1.0471224784851074, 'logits/rejected': -1.0614254474639893, 'epoch': 0.89} + 89%|████████▉ | 608/681 [25:49<03:04, 2.53s/it] 89%|████████▉ | 609/681 [25:52<02:57, 2.46s/it] {'loss': 1.0694, 'grad_norm': 34.812618255615234, 'learning_rate': 1.7348604439226617e-08, 'fcm_dpo/beta': 0.000938827870413661, 'fcm_dpo/q_t': 0.39671069383621216, 'fcm_dpo/delta': -0.09398971498012543, 'fcm_dpo/margin': 521.3295288085938, 'margin_dpo/margin_mean': 521.32958984375, 'margin_dpo/margin_std': 807.185302734375, 'logps/chosen': -717.5792236328125, 'logps/rejected': -1268.91943359375, 'logps/ref_chosen': -58.805355072021484, 'logps/ref_rejected': -88.81600952148438, 'KL/chosen_KL_mean': -658.7738037109375, 'KL/rejected_KL_mean': -1180.103271484375, 'KL/mean': -919.4385986328125, 'KL/std': 681.7255249023438, 'logits/chosen': -1.0940017700195312, 'logits/rejected': -1.117903709411621, 'epoch': 0.89} + 89%|████████▉ | 609/681 [25:52<02:57, 2.46s/it] 90%|████████▉ | 610/681 [25:54<02:53, 2.44s/it] {'loss': 1.1704, 'grad_norm': 41.71244812011719, 'learning_rate': 1.6881942648911074e-08, 'fcm_dpo/beta': 0.0009477235144004226, 'fcm_dpo/q_t': 0.4269651770591736, 'fcm_dpo/delta': 0.08690465986728668, 'fcm_dpo/margin': 333.0103759765625, 'margin_dpo/margin_mean': 333.0104064941406, 'margin_dpo/margin_std': 598.3924560546875, 'logps/chosen': -712.75244140625, 'logps/rejected': -1063.47314453125, 'logps/ref_chosen': -65.69503784179688, 'logps/ref_rejected': -83.40538787841797, 'KL/chosen_KL_mean': -647.0574340820312, 'KL/rejected_KL_mean': -980.0677490234375, 'KL/mean': -813.5626220703125, 'KL/std': 518.0669555664062, 'logits/chosen': -0.9856526851654053, 'logits/rejected': -0.9613279104232788, 'epoch': 0.9} + 90%|████████▉ | 610/681 [25:54<02:53, 2.44s/it] 90%|████████▉ | 611/681 [25:56<02:46, 2.38s/it] {'loss': 1.0449, 'grad_norm': 28.344263076782227, 'learning_rate': 1.6421423736208e-08, 'fcm_dpo/beta': 0.000932648777961731, 'fcm_dpo/q_t': 0.3880399465560913, 'fcm_dpo/delta': -0.12321210652589798, 'fcm_dpo/margin': 554.140625, 'margin_dpo/margin_mean': 554.140625, 'margin_dpo/margin_std': 762.7548828125, 'logps/chosen': -727.41064453125, 'logps/rejected': -1315.2828369140625, 'logps/ref_chosen': -52.59946823120117, 'logps/ref_rejected': -86.33099365234375, 'KL/chosen_KL_mean': -674.8112182617188, 'KL/rejected_KL_mean': -1228.951904296875, 'KL/mean': -951.8814697265625, 'KL/std': 677.4830932617188, 'logits/chosen': -1.058631181716919, 'logits/rejected': -1.1068617105484009, 'epoch': 0.9} + 90%|████████▉ | 611/681 [25:56<02:46, 2.38s/it] 90%|████████▉ | 612/681 [25:59<02:43, 2.37s/it] {'loss': 1.0948, 'grad_norm': 29.17259979248047, 'learning_rate': 1.5967059836219042e-08, 'fcm_dpo/beta': 0.0009295167401432991, 'fcm_dpo/q_t': 0.40825164318084717, 'fcm_dpo/delta': -0.001130029559135437, 'fcm_dpo/margin': 431.320556640625, 'margin_dpo/margin_mean': 431.320556640625, 'margin_dpo/margin_std': 588.9520263671875, 'logps/chosen': -790.8104248046875, 'logps/rejected': -1251.11962890625, 'logps/ref_chosen': -59.32372283935547, 'logps/ref_rejected': -88.31239318847656, 'KL/chosen_KL_mean': -731.4866943359375, 'KL/rejected_KL_mean': -1162.8072509765625, 'KL/mean': -947.14697265625, 'KL/std': 552.690185546875, 'logits/chosen': -1.0582959651947021, 'logits/rejected': -1.0618293285369873, 'epoch': 0.9} + 90%|████████▉ | 612/681 [25:59<02:43, 2.37s/it] 90%|█████████ | 613/681 [26:01<02:43, 2.41s/it] {'loss': 1.0542, 'grad_norm': 36.79396057128906, 'learning_rate': 1.551886292185553e-08, 'fcm_dpo/beta': 0.0009158622706308961, 'fcm_dpo/q_t': 0.39547261595726013, 'fcm_dpo/delta': -0.0603950060904026, 'fcm_dpo/margin': 499.3277282714844, 'margin_dpo/margin_mean': 499.3277282714844, 'margin_dpo/margin_std': 621.1013793945312, 'logps/chosen': -703.5807495117188, 'logps/rejected': -1248.2861328125, 'logps/ref_chosen': -59.72996520996094, 'logps/ref_rejected': -105.10752868652344, 'KL/chosen_KL_mean': -643.850830078125, 'KL/rejected_KL_mean': -1143.178466796875, 'KL/mean': -893.5146484375, 'KL/std': 606.8695068359375, 'logits/chosen': -1.0371217727661133, 'logits/rejected': -1.0906472206115723, 'epoch': 0.9} + 90%|█████████ | 613/681 [26:01<02:43, 2.41s/it] 90%|█████████ | 614/681 [26:04<02:45, 2.46s/it] {'loss': 1.0711, 'grad_norm': 42.726219177246094, 'learning_rate': 1.507684480352292e-08, 'fcm_dpo/beta': 0.0009089302038773894, 'fcm_dpo/q_t': 0.398156076669693, 'fcm_dpo/delta': -0.058258313685655594, 'fcm_dpo/margin': 501.296142578125, 'margin_dpo/margin_mean': 501.296142578125, 'margin_dpo/margin_std': 695.1583251953125, 'logps/chosen': -751.491943359375, 'logps/rejected': -1304.5284423828125, 'logps/ref_chosen': -52.93898010253906, 'logps/ref_rejected': -104.67938232421875, 'KL/chosen_KL_mean': -698.552978515625, 'KL/rejected_KL_mean': -1199.84912109375, 'KL/mean': -949.2009887695312, 'KL/std': 597.0087890625, 'logits/chosen': -1.0097222328186035, 'logits/rejected': -1.0844841003417969, 'epoch': 0.9} + 90%|█████████ | 614/681 [26:04<02:45, 2.46s/it] 90%|█████████ | 615/681 [26:06<02:47, 2.54s/it] {'loss': 1.1256, 'grad_norm': 30.516206741333008, 'learning_rate': 1.4641017128809801e-08, 'fcm_dpo/beta': 0.0009105931967496872, 'fcm_dpo/q_t': 0.41320300102233887, 'fcm_dpo/delta': 0.019142257049679756, 'fcm_dpo/margin': 418.7528076171875, 'margin_dpo/margin_mean': 418.7528076171875, 'margin_dpo/margin_std': 670.531982421875, 'logps/chosen': -739.7527465820312, 'logps/rejected': -1187.86572265625, 'logps/ref_chosen': -65.81727600097656, 'logps/ref_rejected': -95.17749786376953, 'KL/chosen_KL_mean': -673.9354248046875, 'KL/rejected_KL_mean': -1092.688232421875, 'KL/mean': -883.3118896484375, 'KL/std': 623.587158203125, 'logits/chosen': -1.0232172012329102, 'logits/rejected': -1.0445995330810547, 'epoch': 0.9} + 90%|█████████ | 615/681 [26:06<02:47, 2.54s/it] 90%|█████████ | 616/681 [26:09<02:49, 2.61s/it] {'loss': 1.1665, 'grad_norm': 29.76529884338379, 'learning_rate': 1.4211391382180637e-08, 'fcm_dpo/beta': 0.0009216421167366207, 'fcm_dpo/q_t': 0.4289320111274719, 'fcm_dpo/delta': 0.0916953831911087, 'fcm_dpo/margin': 337.53411865234375, 'margin_dpo/margin_mean': 337.53411865234375, 'margin_dpo/margin_std': 574.6236572265625, 'logps/chosen': -854.9956665039062, 'logps/rejected': -1202.097412109375, 'logps/ref_chosen': -65.13285827636719, 'logps/ref_rejected': -74.70050048828125, 'KL/chosen_KL_mean': -789.86279296875, 'KL/rejected_KL_mean': -1127.3968505859375, 'KL/mean': -958.6298828125, 'KL/std': 513.2537841796875, 'logits/chosen': -1.0982820987701416, 'logits/rejected': -1.080725073814392, 'epoch': 0.9} + 90%|█████████ | 616/681 [26:09<02:49, 2.61s/it] 91%|█████████ | 617/681 [26:12<02:46, 2.61s/it] {'loss': 1.2466, 'grad_norm': 61.406524658203125, 'learning_rate': 1.378797888467345e-08, 'fcm_dpo/beta': 0.0009326934814453125, 'fcm_dpo/q_t': 0.4489472508430481, 'fcm_dpo/delta': 0.079354427754879, 'fcm_dpo/margin': 239.94287109375, 'margin_dpo/margin_mean': 239.94284057617188, 'margin_dpo/margin_std': 587.9033813476562, 'logps/chosen': -812.3883056640625, 'logps/rejected': -1053.559814453125, 'logps/ref_chosen': -63.005550384521484, 'logps/ref_rejected': -64.234130859375, 'KL/chosen_KL_mean': -749.3828125, 'KL/rejected_KL_mean': -989.32568359375, 'KL/mean': -869.354248046875, 'KL/std': 496.443359375, 'logits/chosen': -1.001933217048645, 'logits/rejected': -0.9698858261108398, 'epoch': 0.91} + 91%|█████████ | 617/681 [26:12<02:46, 2.61s/it] 91%|█████████ | 618/681 [26:14<02:44, 2.61s/it] {'loss': 1.0962, 'grad_norm': 39.66215896606445, 'learning_rate': 1.3370790793601371e-08, 'fcm_dpo/beta': 0.0009334392379969358, 'fcm_dpo/q_t': 0.39517101645469666, 'fcm_dpo/delta': -0.0920899510383606, 'fcm_dpo/margin': 522.0931396484375, 'margin_dpo/margin_mean': 522.0931396484375, 'margin_dpo/margin_std': 848.4417114257812, 'logps/chosen': -849.7490234375, 'logps/rejected': -1396.8941650390625, 'logps/ref_chosen': -67.10134887695312, 'logps/ref_rejected': -92.15340423583984, 'KL/chosen_KL_mean': -782.6476440429688, 'KL/rejected_KL_mean': -1304.74072265625, 'KL/mean': -1043.6942138671875, 'KL/std': 657.9475708007812, 'logits/chosen': -1.0424597263336182, 'logits/rejected': -1.0812008380889893, 'epoch': 0.91} + 91%|█████████ | 618/681 [26:14<02:44, 2.61s/it] 91%|█████████ | 619/681 [26:17<02:40, 2.60s/it] {'loss': 1.1805, 'grad_norm': 59.24640655517578, 'learning_rate': 1.2959838102258535e-08, 'fcm_dpo/beta': 0.000927778659388423, 'fcm_dpo/q_t': 0.42306482791900635, 'fcm_dpo/delta': 0.030953753739595413, 'fcm_dpo/margin': 398.981201171875, 'margin_dpo/margin_mean': 398.981201171875, 'margin_dpo/margin_std': 824.7608642578125, 'logps/chosen': -838.7863159179688, 'logps/rejected': -1274.974609375, 'logps/ref_chosen': -55.978233337402344, 'logps/ref_rejected': -93.1854019165039, 'KL/chosen_KL_mean': -782.80810546875, 'KL/rejected_KL_mean': -1181.789306640625, 'KL/mean': -982.2987060546875, 'KL/std': 620.9131469726562, 'logits/chosen': -1.0202059745788574, 'logits/rejected': -1.0303071737289429, 'epoch': 0.91} + 91%|█████████ | 619/681 [26:17<02:40, 2.60s/it] 91%|█████████ | 620/681 [26:19<02:36, 2.57s/it] {'loss': 1.1388, 'grad_norm': 35.7849235534668, 'learning_rate': 1.2555131639630567e-08, 'fcm_dpo/beta': 0.0009312491165474057, 'fcm_dpo/q_t': 0.4192659258842468, 'fcm_dpo/delta': 0.04368671402335167, 'fcm_dpo/margin': 384.1690673828125, 'margin_dpo/margin_mean': 384.1690979003906, 'margin_dpo/margin_std': 621.861572265625, 'logps/chosen': -751.7249755859375, 'logps/rejected': -1154.50732421875, 'logps/ref_chosen': -59.79750061035156, 'logps/ref_rejected': -78.41075134277344, 'KL/chosen_KL_mean': -691.927490234375, 'KL/rejected_KL_mean': -1076.0965576171875, 'KL/mean': -884.0120239257812, 'KL/std': 550.659912109375, 'logits/chosen': -1.0948054790496826, 'logits/rejected': -1.1047601699829102, 'epoch': 0.91} + 91%|█████████ | 620/681 [26:19<02:36, 2.57s/it] 91%|█████████ | 621/681 [26:22<02:34, 2.57s/it] {'loss': 1.0337, 'grad_norm': 43.405094146728516, 'learning_rate': 1.2156682070109086e-08, 'fcm_dpo/beta': 0.0009251298615708947, 'fcm_dpo/q_t': 0.3810199499130249, 'fcm_dpo/delta': -0.1372598260641098, 'fcm_dpo/margin': 572.7069091796875, 'margin_dpo/margin_mean': 572.7069091796875, 'margin_dpo/margin_std': 737.669189453125, 'logps/chosen': -755.9588012695312, 'logps/rejected': -1363.1015625, 'logps/ref_chosen': -53.93375778198242, 'logps/ref_rejected': -88.36951446533203, 'KL/chosen_KL_mean': -702.0250244140625, 'KL/rejected_KL_mean': -1274.73193359375, 'KL/mean': -988.3785400390625, 'KL/std': 669.7676391601562, 'logits/chosen': -1.1240839958190918, 'logits/rejected': -1.1762495040893555, 'epoch': 0.91} + 91%|█████████ | 621/681 [26:22<02:34, 2.57s/it] 91%|█████████▏| 622/681 [26:25<02:31, 2.56s/it] {'loss': 1.1357, 'grad_norm': 34.01809310913086, 'learning_rate': 1.1764499893210878e-08, 'fcm_dpo/beta': 0.0009116814471781254, 'fcm_dpo/q_t': 0.4187527894973755, 'fcm_dpo/delta': 0.03691772744059563, 'fcm_dpo/margin': 399.3359680175781, 'margin_dpo/margin_mean': 399.3359680175781, 'margin_dpo/margin_std': 653.330322265625, 'logps/chosen': -739.79833984375, 'logps/rejected': -1164.3673095703125, 'logps/ref_chosen': -60.28582000732422, 'logps/ref_rejected': -85.51873779296875, 'KL/chosen_KL_mean': -679.5125732421875, 'KL/rejected_KL_mean': -1078.8485107421875, 'KL/mean': -879.1805419921875, 'KL/std': 515.3754272460938, 'logits/chosen': -0.9621305465698242, 'logits/rejected': -0.9480363726615906, 'epoch': 0.91} + 91%|█████████▏| 622/681 [26:25<02:31, 2.56s/it] 91%|█████████▏| 623/681 [26:27<02:21, 2.44s/it] {'loss': 1.1971, 'grad_norm': 34.93113327026367, 'learning_rate': 1.1378595443300998e-08, 'fcm_dpo/beta': 0.0009337057126685977, 'fcm_dpo/q_t': 0.435872882604599, 'fcm_dpo/delta': 0.11226323246955872, 'fcm_dpo/margin': 311.66925048828125, 'margin_dpo/margin_mean': 311.66925048828125, 'margin_dpo/margin_std': 637.570556640625, 'logps/chosen': -809.0438232421875, 'logps/rejected': -1141.63916015625, 'logps/ref_chosen': -64.1569595336914, 'logps/ref_rejected': -85.08304595947266, 'KL/chosen_KL_mean': -744.8868408203125, 'KL/rejected_KL_mean': -1056.55615234375, 'KL/mean': -900.721435546875, 'KL/std': 522.1895141601562, 'logits/chosen': -1.1241803169250488, 'logits/rejected': -1.125817060470581, 'epoch': 0.91} + 91%|█████████▏| 623/681 [26:27<02:21, 2.44s/it] 92%|█████████▏| 624/681 [26:29<02:20, 2.46s/it] {'loss': 1.0544, 'grad_norm': 43.926517486572266, 'learning_rate': 1.0998978889320582e-08, 'fcm_dpo/beta': 0.0009321460966020823, 'fcm_dpo/q_t': 0.3945468068122864, 'fcm_dpo/delta': -0.05314317345619202, 'fcm_dpo/margin': 483.5816650390625, 'margin_dpo/margin_mean': 483.5816345214844, 'margin_dpo/margin_std': 579.5042114257812, 'logps/chosen': -785.7073974609375, 'logps/rejected': -1294.50244140625, 'logps/ref_chosen': -71.91862487792969, 'logps/ref_rejected': -97.13203430175781, 'KL/chosen_KL_mean': -713.7887573242188, 'KL/rejected_KL_mean': -1197.370361328125, 'KL/mean': -955.57958984375, 'KL/std': 544.0978393554688, 'logits/chosen': -1.0758100748062134, 'logits/rejected': -1.0797600746154785, 'epoch': 0.92} + 92%|█████████▏| 624/681 [26:29<02:20, 2.46s/it] 92%|█████████▏| 625/681 [26:32<02:18, 2.48s/it] {'loss': 1.0132, 'grad_norm': 63.644493103027344, 'learning_rate': 1.0625660234518913e-08, 'fcm_dpo/beta': 0.0009185270173475146, 'fcm_dpo/q_t': 0.386716365814209, 'fcm_dpo/delta': -0.10053034871816635, 'fcm_dpo/margin': 539.6091918945312, 'margin_dpo/margin_mean': 539.6091918945312, 'margin_dpo/margin_std': 572.4862670898438, 'logps/chosen': -750.4405517578125, 'logps/rejected': -1317.798095703125, 'logps/ref_chosen': -58.342071533203125, 'logps/ref_rejected': -86.09038543701172, 'KL/chosen_KL_mean': -692.0985107421875, 'KL/rejected_KL_mean': -1231.707763671875, 'KL/mean': -961.903076171875, 'KL/std': 610.5233764648438, 'logits/chosen': -1.0029915571212769, 'logits/rejected': -1.0321152210235596, 'epoch': 0.92} + 92%|█████████▏| 625/681 [26:32<02:18, 2.48s/it] 92%|█████████▏| 626/681 [26:35<02:23, 2.61s/it] {'loss': 1.2186, 'grad_norm': 35.488224029541016, 'learning_rate': 1.0258649316189721e-08, 'fcm_dpo/beta': 0.0009280656231567264, 'fcm_dpo/q_t': 0.4352928698062897, 'fcm_dpo/delta': 0.1266339272260666, 'fcm_dpo/margin': 298.32769775390625, 'margin_dpo/margin_mean': 298.3277282714844, 'margin_dpo/margin_std': 647.6847534179688, 'logps/chosen': -928.63720703125, 'logps/rejected': -1251.041015625, 'logps/ref_chosen': -75.11260986328125, 'logps/ref_rejected': -99.188720703125, 'KL/chosen_KL_mean': -853.5245971679688, 'KL/rejected_KL_mean': -1151.852294921875, 'KL/mean': -1002.6884765625, 'KL/std': 662.7627563476562, 'logits/chosen': -0.9830967783927917, 'logits/rejected': -0.974500298500061, 'epoch': 0.92} + 92%|█████████▏| 626/681 [26:35<02:23, 2.61s/it] 92%|█████████▏| 627/681 [26:37<02:24, 2.67s/it] {'loss': 1.0393, 'grad_norm': 31.904329299926758, 'learning_rate': 9.897955805412e-09, 'fcm_dpo/beta': 0.0009197980398312211, 'fcm_dpo/q_t': 0.3880508542060852, 'fcm_dpo/delta': -0.15168313682079315, 'fcm_dpo/margin': 590.864013671875, 'margin_dpo/margin_mean': 590.864013671875, 'margin_dpo/margin_std': 831.529296875, 'logps/chosen': -634.5864868164062, 'logps/rejected': -1284.4617919921875, 'logps/ref_chosen': -47.74314880371094, 'logps/ref_rejected': -106.75448608398438, 'KL/chosen_KL_mean': -586.8433227539062, 'KL/rejected_KL_mean': -1177.707275390625, 'KL/mean': -882.2752685546875, 'KL/std': 712.061767578125, 'logits/chosen': -0.9286566972732544, 'logits/rejected': -1.0041477680206299, 'epoch': 0.92} + 92%|█████████▏| 627/681 [26:37<02:24, 2.67s/it] 92%|█████████▏| 628/681 [26:40<02:19, 2.63s/it] {'loss': 1.1095, 'grad_norm': 33.698768615722656, 'learning_rate': 9.543589206795238e-09, 'fcm_dpo/beta': 0.0009038818534463644, 'fcm_dpo/q_t': 0.4091545343399048, 'fcm_dpo/delta': -0.006778441369533539, 'fcm_dpo/margin': 449.6760559082031, 'margin_dpo/margin_mean': 449.6760559082031, 'margin_dpo/margin_std': 686.2415161132812, 'logps/chosen': -846.303466796875, 'logps/rejected': -1337.351318359375, 'logps/ref_chosen': -60.182945251464844, 'logps/ref_rejected': -101.55467224121094, 'KL/chosen_KL_mean': -786.1205444335938, 'KL/rejected_KL_mean': -1235.796630859375, 'KL/mean': -1010.9585571289062, 'KL/std': 596.643798828125, 'logits/chosen': -1.0994905233383179, 'logits/rejected': -1.1181318759918213, 'epoch': 0.92} + 92%|█████████▏| 628/681 [26:40<02:19, 2.63s/it] 92%|█████████▏| 629/681 [26:42<02:15, 2.61s/it] {'loss': 1.1025, 'grad_norm': 35.47370910644531, 'learning_rate': 9.19555885822887e-09, 'fcm_dpo/beta': 0.0009075739653781056, 'fcm_dpo/q_t': 0.4112434983253479, 'fcm_dpo/delta': 0.020304802805185318, 'fcm_dpo/margin': 419.21453857421875, 'margin_dpo/margin_mean': 419.21453857421875, 'margin_dpo/margin_std': 558.5067138671875, 'logps/chosen': -805.954833984375, 'logps/rejected': -1252.609375, 'logps/ref_chosen': -64.21354675292969, 'logps/ref_rejected': -91.65367126464844, 'KL/chosen_KL_mean': -741.7413330078125, 'KL/rejected_KL_mean': -1160.955810546875, 'KL/mean': -951.3485107421875, 'KL/std': 565.2870483398438, 'logits/chosen': -1.0898232460021973, 'logits/rejected': -1.107914686203003, 'epoch': 0.92} + 92%|█████████▏| 629/681 [26:43<02:15, 2.61s/it] 93%|█████████▎| 630/681 [26:45<02:13, 2.62s/it] {'loss': 1.2755, 'grad_norm': 55.93423080444336, 'learning_rate': 8.85387393063622e-09, 'fcm_dpo/beta': 0.0009130248799920082, 'fcm_dpo/q_t': 0.45570600032806396, 'fcm_dpo/delta': 0.04915432631969452, 'fcm_dpo/margin': 236.30340576171875, 'margin_dpo/margin_mean': 236.30337524414062, 'margin_dpo/margin_std': 700.1854248046875, 'logps/chosen': -748.0706176757812, 'logps/rejected': -1008.6813354492188, 'logps/ref_chosen': -59.29100036621094, 'logps/ref_rejected': -83.59829711914062, 'KL/chosen_KL_mean': -688.7796020507812, 'KL/rejected_KL_mean': -925.0830078125, 'KL/mean': -806.9313354492188, 'KL/std': 583.607421875, 'logits/chosen': -1.0273975133895874, 'logits/rejected': -1.0003504753112793, 'epoch': 0.93} + 93%|█████████▎| 630/681 [26:45<02:13, 2.62s/it] 93%|█████████▎| 631/681 [26:48<02:08, 2.58s/it] {'loss': 1.1544, 'grad_norm': 36.24885559082031, 'learning_rate': 8.518543427732949e-09, 'fcm_dpo/beta': 0.0009250535513274372, 'fcm_dpo/q_t': 0.41905054450035095, 'fcm_dpo/delta': 0.04782557487487793, 'fcm_dpo/margin': 382.4600830078125, 'margin_dpo/margin_mean': 382.4600830078125, 'margin_dpo/margin_std': 670.58447265625, 'logps/chosen': -877.882568359375, 'logps/rejected': -1281.840576171875, 'logps/ref_chosen': -59.45360565185547, 'logps/ref_rejected': -80.95156860351562, 'KL/chosen_KL_mean': -818.428955078125, 'KL/rejected_KL_mean': -1200.888916015625, 'KL/mean': -1009.658935546875, 'KL/std': 587.8419189453125, 'logits/chosen': -1.1538431644439697, 'logits/rejected': -1.1630300283432007, 'epoch': 0.93} + 93%|█████████▎| 631/681 [26:48<02:08, 2.58s/it] 93%|█████████▎| 632/681 [26:50<02:02, 2.50s/it] {'loss': 1.1434, 'grad_norm': 47.56220245361328, 'learning_rate': 8.189576185789637e-09, 'fcm_dpo/beta': 0.0009276444325223565, 'fcm_dpo/q_t': 0.4175952672958374, 'fcm_dpo/delta': 0.039635516703128815, 'fcm_dpo/margin': 389.7721252441406, 'margin_dpo/margin_mean': 389.7721252441406, 'margin_dpo/margin_std': 647.5654296875, 'logps/chosen': -767.737548828125, 'logps/rejected': -1182.318359375, 'logps/ref_chosen': -61.35155487060547, 'logps/ref_rejected': -86.16017150878906, 'KL/chosen_KL_mean': -706.385986328125, 'KL/rejected_KL_mean': -1096.158203125, 'KL/mean': -901.2720947265625, 'KL/std': 529.7587280273438, 'logits/chosen': -1.1132001876831055, 'logits/rejected': -1.1175191402435303, 'epoch': 0.93} + 93%|█████████▎| 632/681 [26:50<02:02, 2.50s/it] 93%|█████████▎| 633/681 [26:52<01:58, 2.48s/it] {'loss': 1.2186, 'grad_norm': 48.036155700683594, 'learning_rate': 7.866980873399015e-09, 'fcm_dpo/beta': 0.0009377728565596044, 'fcm_dpo/q_t': 0.4401985704898834, 'fcm_dpo/delta': 0.043396495282649994, 'fcm_dpo/margin': 275.209716796875, 'margin_dpo/margin_mean': 275.209716796875, 'margin_dpo/margin_std': 591.9662475585938, 'logps/chosen': -849.226318359375, 'logps/rejected': -1158.741943359375, 'logps/ref_chosen': -57.27816390991211, 'logps/ref_rejected': -91.58395385742188, 'KL/chosen_KL_mean': -791.9481201171875, 'KL/rejected_KL_mean': -1067.157958984375, 'KL/mean': -929.552978515625, 'KL/std': 522.26171875, 'logits/chosen': -1.1201856136322021, 'logits/rejected': -1.129399061203003, 'epoch': 0.93} + 93%|█████████▎| 633/681 [26:52<01:58, 2.48s/it] 93%|█████████▎| 634/681 [26:55<01:58, 2.52s/it] {'loss': 1.2358, 'grad_norm': 40.304161071777344, 'learning_rate': 7.550765991247654e-09, 'fcm_dpo/beta': 0.0009550247923471034, 'fcm_dpo/q_t': 0.44587743282318115, 'fcm_dpo/delta': 0.06949655711650848, 'fcm_dpo/margin': 255.07102966308594, 'margin_dpo/margin_mean': 255.071044921875, 'margin_dpo/margin_std': 605.7219848632812, 'logps/chosen': -962.819091796875, 'logps/rejected': -1258.3968505859375, 'logps/ref_chosen': -66.61896514892578, 'logps/ref_rejected': -107.12564849853516, 'KL/chosen_KL_mean': -896.2001342773438, 'KL/rejected_KL_mean': -1151.271240234375, 'KL/mean': -1023.7356567382812, 'KL/std': 637.21044921875, 'logits/chosen': -1.0025546550750732, 'logits/rejected': -0.9946834444999695, 'epoch': 0.93} + 93%|█████████▎| 634/681 [26:55<01:58, 2.52s/it] 93%|█████████▎| 635/681 [26:57<01:55, 2.51s/it] {'loss': 1.1613, 'grad_norm': 48.429481506347656, 'learning_rate': 7.240939871891699e-09, 'fcm_dpo/beta': 0.0009627408580854535, 'fcm_dpo/q_t': 0.42400288581848145, 'fcm_dpo/delta': 0.0557277575135231, 'fcm_dpo/margin': 359.6134948730469, 'margin_dpo/margin_mean': 359.61346435546875, 'margin_dpo/margin_std': 665.019287109375, 'logps/chosen': -843.724853515625, 'logps/rejected': -1211.88330078125, 'logps/ref_chosen': -73.95551300048828, 'logps/ref_rejected': -82.50045776367188, 'KL/chosen_KL_mean': -769.7693481445312, 'KL/rejected_KL_mean': -1129.3828125, 'KL/mean': -949.5761108398438, 'KL/std': 656.5932006835938, 'logits/chosen': -1.0978808403015137, 'logits/rejected': -1.0793735980987549, 'epoch': 0.93} + 93%|█████████▎| 635/681 [26:58<01:55, 2.51s/it] 93%|█████████▎| 636/681 [27:00<01:55, 2.56s/it] {'loss': 1.1025, 'grad_norm': 26.07723617553711, 'learning_rate': 6.937510679537628e-09, 'fcm_dpo/beta': 0.0009705645497888327, 'fcm_dpo/q_t': 0.4088486135005951, 'fcm_dpo/delta': -0.017428025603294373, 'fcm_dpo/margin': 428.31964111328125, 'margin_dpo/margin_mean': 428.3197021484375, 'margin_dpo/margin_std': 656.77587890625, 'logps/chosen': -787.690185546875, 'logps/rejected': -1238.3597412109375, 'logps/ref_chosen': -59.628910064697266, 'logps/ref_rejected': -81.97883605957031, 'KL/chosen_KL_mean': -728.061279296875, 'KL/rejected_KL_mean': -1156.380859375, 'KL/mean': -942.2210693359375, 'KL/std': 644.4195556640625, 'logits/chosen': -1.0272531509399414, 'logits/rejected': -1.0296359062194824, 'epoch': 0.93} + 93%|█████████▎| 636/681 [27:00<01:55, 2.56s/it] 94%|█████████▎| 637/681 [27:03<01:52, 2.56s/it] {'loss': 1.0731, 'grad_norm': 28.57129669189453, 'learning_rate': 6.640486409826785e-09, 'fcm_dpo/beta': 0.0009508421644568443, 'fcm_dpo/q_t': 0.400098979473114, 'fcm_dpo/delta': -0.051047492772340775, 'fcm_dpo/margin': 471.324462890625, 'margin_dpo/margin_mean': 471.32452392578125, 'margin_dpo/margin_std': 648.1543579101562, 'logps/chosen': -772.426513671875, 'logps/rejected': -1292.50341796875, 'logps/ref_chosen': -49.652687072753906, 'logps/ref_rejected': -98.40513610839844, 'KL/chosen_KL_mean': -722.7738647460938, 'KL/rejected_KL_mean': -1194.098388671875, 'KL/mean': -958.4360961914062, 'KL/std': 627.090576171875, 'logits/chosen': -1.144999384880066, 'logits/rejected': -1.1962953805923462, 'epoch': 0.94} + 94%|█████████▎| 637/681 [27:03<01:52, 2.56s/it] 94%|█████████▎| 638/681 [27:06<01:55, 2.69s/it] {'loss': 1.1707, 'grad_norm': 32.758209228515625, 'learning_rate': 6.349874889624962e-09, 'fcm_dpo/beta': 0.0009441774454899132, 'fcm_dpo/q_t': 0.41255509853363037, 'fcm_dpo/delta': -0.08913271129131317, 'fcm_dpo/margin': 380.04638671875, 'margin_dpo/margin_mean': 380.0464172363281, 'margin_dpo/margin_std': 710.3338623046875, 'logps/chosen': -773.4804077148438, 'logps/rejected': -1174.671630859375, 'logps/ref_chosen': -58.156639099121094, 'logps/ref_rejected': -79.3014907836914, 'KL/chosen_KL_mean': -715.32373046875, 'KL/rejected_KL_mean': -1095.3702392578125, 'KL/mean': -905.3469848632812, 'KL/std': 610.3677368164062, 'logits/chosen': -0.9814478158950806, 'logits/rejected': -0.9641016721725464, 'epoch': 0.94} + 94%|█████████▎| 638/681 [27:06<01:55, 2.69s/it] 94%|█████████▍| 639/681 [27:08<01:54, 2.72s/it] {'loss': 1.3504, 'grad_norm': 124.77977752685547, 'learning_rate': 6.065683776815933e-09, 'fcm_dpo/beta': 0.0009357619564980268, 'fcm_dpo/q_t': 0.4668254852294922, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 157.65838623046875, 'margin_dpo/margin_mean': 157.65838623046875, 'margin_dpo/margin_std': 713.7766723632812, 'logps/chosen': -1040.209716796875, 'logps/rejected': -1199.8199462890625, 'logps/ref_chosen': -72.32319641113281, 'logps/ref_rejected': -74.2749252319336, 'KL/chosen_KL_mean': -967.8865966796875, 'KL/rejected_KL_mean': -1125.5450439453125, 'KL/mean': -1046.7158203125, 'KL/std': 574.20703125, 'logits/chosen': -1.0108537673950195, 'logits/rejected': -0.9488674402236938, 'epoch': 0.94} + 94%|█████████▍| 639/681 [27:09<01:54, 2.72s/it] 94%|█████████▍| 640/681 [27:11<01:50, 2.68s/it] {'loss': 1.0416, 'grad_norm': 41.8037223815918, 'learning_rate': 5.7879205600998296e-09, 'fcm_dpo/beta': 0.0009211286087520421, 'fcm_dpo/q_t': 0.3863770365715027, 'fcm_dpo/delta': -0.13610000908374786, 'fcm_dpo/margin': 574.392578125, 'margin_dpo/margin_mean': 574.392578125, 'margin_dpo/margin_std': 796.70458984375, 'logps/chosen': -809.0047607421875, 'logps/rejected': -1435.863037109375, 'logps/ref_chosen': -56.13436508178711, 'logps/ref_rejected': -108.60014343261719, 'KL/chosen_KL_mean': -752.870361328125, 'KL/rejected_KL_mean': -1327.262939453125, 'KL/mean': -1040.066650390625, 'KL/std': 677.1709594726562, 'logits/chosen': -0.9777020215988159, 'logits/rejected': -1.0074682235717773, 'epoch': 0.94} + 94%|█████████▍| 640/681 [27:11<01:50, 2.68s/it] 94%|█████████▍| 641/681 [27:14<01:46, 2.65s/it] {'loss': 1.1765, 'grad_norm': 35.59013366699219, 'learning_rate': 5.516592558795746e-09, 'fcm_dpo/beta': 0.0009160140762105584, 'fcm_dpo/q_t': 0.4241793751716614, 'fcm_dpo/delta': 0.0552375465631485, 'fcm_dpo/margin': 378.48095703125, 'margin_dpo/margin_mean': 378.48095703125, 'margin_dpo/margin_std': 753.5254516601562, 'logps/chosen': -916.2326049804688, 'logps/rejected': -1316.708984375, 'logps/ref_chosen': -64.99689483642578, 'logps/ref_rejected': -86.99232482910156, 'KL/chosen_KL_mean': -851.2357177734375, 'KL/rejected_KL_mean': -1229.7166748046875, 'KL/mean': -1040.4761962890625, 'KL/std': 575.572509765625, 'logits/chosen': -1.040936827659607, 'logits/rejected': -1.0504437685012817, 'epoch': 0.94} + 94%|█████████▍| 641/681 [27:14<01:46, 2.65s/it] 94%|█████████▍| 642/681 [27:16<01:41, 2.60s/it] {'loss': 1.1533, 'grad_norm': 43.08086013793945, 'learning_rate': 5.251706922648868e-09, 'fcm_dpo/beta': 0.0009222212247550488, 'fcm_dpo/q_t': 0.4156268537044525, 'fcm_dpo/delta': -0.02048617973923683, 'fcm_dpo/margin': 454.66107177734375, 'margin_dpo/margin_mean': 454.6611328125, 'margin_dpo/margin_std': 916.9835205078125, 'logps/chosen': -883.28564453125, 'logps/rejected': -1382.4996337890625, 'logps/ref_chosen': -65.68924713134766, 'logps/ref_rejected': -110.24205017089844, 'KL/chosen_KL_mean': -817.596435546875, 'KL/rejected_KL_mean': -1272.257568359375, 'KL/mean': -1044.927001953125, 'KL/std': 752.8553466796875, 'logits/chosen': -0.9874919652938843, 'logits/rejected': -1.022093653678894, 'epoch': 0.94} + 94%|█████████▍| 642/681 [27:16<01:41, 2.60s/it] 94%|█████████▍| 643/681 [27:19<01:39, 2.62s/it] {'loss': 1.1568, 'grad_norm': 44.11703872680664, 'learning_rate': 4.993270631642038e-09, 'fcm_dpo/beta': 0.0009113398264162242, 'fcm_dpo/q_t': 0.42619654536247253, 'fcm_dpo/delta': -0.030877836048603058, 'fcm_dpo/margin': 349.2655334472656, 'margin_dpo/margin_mean': 349.2655029296875, 'margin_dpo/margin_std': 548.5968017578125, 'logps/chosen': -789.7559814453125, 'logps/rejected': -1174.539794921875, 'logps/ref_chosen': -51.94999694824219, 'logps/ref_rejected': -87.46833801269531, 'KL/chosen_KL_mean': -737.8060302734375, 'KL/rejected_KL_mean': -1087.071533203125, 'KL/mean': -912.438720703125, 'KL/std': 547.3657836914062, 'logits/chosen': -1.1141959428787231, 'logits/rejected': -1.1129988431930542, 'epoch': 0.94} + 94%|█████████▍| 643/681 [27:19<01:39, 2.62s/it] 95%|█████████▍| 644/681 [27:21<01:37, 2.63s/it] {'loss': 1.1889, 'grad_norm': 56.25822067260742, 'learning_rate': 4.741290495811873e-09, 'fcm_dpo/beta': 0.0009200773201882839, 'fcm_dpo/q_t': 0.42611658573150635, 'fcm_dpo/delta': 0.06583556532859802, 'fcm_dpo/margin': 365.61663818359375, 'margin_dpo/margin_mean': 365.61663818359375, 'margin_dpo/margin_std': 751.4439697265625, 'logps/chosen': -786.428466796875, 'logps/rejected': -1180.1640625, 'logps/ref_chosen': -59.017662048339844, 'logps/ref_rejected': -87.13668823242188, 'KL/chosen_KL_mean': -727.4107666015625, 'KL/rejected_KL_mean': -1093.02734375, 'KL/mean': -910.2191162109375, 'KL/std': 654.0157470703125, 'logits/chosen': -1.02555513381958, 'logits/rejected': -1.0345053672790527, 'epoch': 0.95} + 95%|█████████▍| 644/681 [27:21<01:37, 2.63s/it] 95%|█████████▍| 645/681 [27:24<01:34, 2.62s/it] {'loss': 1.3301, 'grad_norm': 108.85242462158203, 'learning_rate': 4.495773155069299e-09, 'fcm_dpo/beta': 0.0009371960768476129, 'fcm_dpo/q_t': 0.4641192555427551, 'fcm_dpo/delta': 0.0740480124950409, 'fcm_dpo/margin': 176.36630249023438, 'margin_dpo/margin_mean': 176.3662872314453, 'margin_dpo/margin_std': 675.3572998046875, 'logps/chosen': -808.6253051757812, 'logps/rejected': -1026.896484375, 'logps/ref_chosen': -55.87602233886719, 'logps/ref_rejected': -97.78080749511719, 'KL/chosen_KL_mean': -752.749267578125, 'KL/rejected_KL_mean': -929.1156005859375, 'KL/mean': -840.9324951171875, 'KL/std': 507.17401123046875, 'logits/chosen': -0.961013913154602, 'logits/rejected': -0.9447523355484009, 'epoch': 0.95} + 95%|█████████▍| 645/681 [27:24<01:34, 2.62s/it] 95%|█████████▍| 646/681 [27:26<01:29, 2.56s/it] {'loss': 1.1901, 'grad_norm': 58.64928436279297, 'learning_rate': 4.256725079024553e-09, 'fcm_dpo/beta': 0.0009495633421465755, 'fcm_dpo/q_t': 0.4337136745452881, 'fcm_dpo/delta': 0.10899513214826584, 'fcm_dpo/margin': 309.98443603515625, 'margin_dpo/margin_mean': 309.9844665527344, 'margin_dpo/margin_std': 590.6322021484375, 'logps/chosen': -785.2565307617188, 'logps/rejected': -1111.470947265625, 'logps/ref_chosen': -61.275787353515625, 'logps/ref_rejected': -77.50580596923828, 'KL/chosen_KL_mean': -723.980712890625, 'KL/rejected_KL_mean': -1033.9652099609375, 'KL/mean': -878.9730224609375, 'KL/std': 492.32672119140625, 'logits/chosen': -1.0737169981002808, 'logits/rejected': -1.0561877489089966, 'epoch': 0.95} + 95%|█████████▍| 646/681 [27:27<01:29, 2.56s/it] 95%|█████████▌| 647/681 [27:29<01:28, 2.61s/it] {'loss': 1.1156, 'grad_norm': 30.214832305908203, 'learning_rate': 4.024152566816791e-09, 'fcm_dpo/beta': 0.0009617937030270696, 'fcm_dpo/q_t': 0.41403427720069885, 'fcm_dpo/delta': 0.028707262128591537, 'fcm_dpo/margin': 387.1577453613281, 'margin_dpo/margin_mean': 387.1577453613281, 'margin_dpo/margin_std': 551.2623291015625, 'logps/chosen': -699.7001342773438, 'logps/rejected': -1125.52490234375, 'logps/ref_chosen': -54.8524169921875, 'logps/ref_rejected': -93.5194091796875, 'KL/chosen_KL_mean': -644.8477172851562, 'KL/rejected_KL_mean': -1032.00537109375, 'KL/mean': -838.4265747070312, 'KL/std': 562.4344482421875, 'logits/chosen': -0.966199517250061, 'logits/rejected': -0.9970808029174805, 'epoch': 0.95} + 95%|█████████▌| 647/681 [27:29<01:28, 2.61s/it] 95%|█████████▌| 648/681 [27:32<01:23, 2.53s/it] {'loss': 1.0315, 'grad_norm': 31.0360164642334, 'learning_rate': 3.798061746947995e-09, 'fcm_dpo/beta': 0.0009418315021321177, 'fcm_dpo/q_t': 0.38359707593917847, 'fcm_dpo/delta': -0.15742585062980652, 'fcm_dpo/margin': 582.7850952148438, 'margin_dpo/margin_mean': 582.7850952148438, 'margin_dpo/margin_std': 816.8055419921875, 'logps/chosen': -738.8358154296875, 'logps/rejected': -1366.162109375, 'logps/ref_chosen': -54.17146682739258, 'logps/ref_rejected': -98.7127914428711, 'KL/chosen_KL_mean': -684.664306640625, 'KL/rejected_KL_mean': -1267.4493408203125, 'KL/mean': -976.056884765625, 'KL/std': 685.91015625, 'logits/chosen': -1.1210038661956787, 'logits/rejected': -1.1833868026733398, 'epoch': 0.95} + 95%|█████████▌| 648/681 [27:32<01:23, 2.53s/it] 95%|█████████▌| 649/681 [27:34<01:22, 2.57s/it] {'loss': 1.2295, 'grad_norm': 28.521751403808594, 'learning_rate': 3.5784585771215235e-09, 'fcm_dpo/beta': 0.0009543564519844949, 'fcm_dpo/q_t': 0.4425292909145355, 'fcm_dpo/delta': 0.13216045498847961, 'fcm_dpo/margin': 283.9723205566406, 'margin_dpo/margin_mean': 283.9723205566406, 'margin_dpo/margin_std': 654.6543579101562, 'logps/chosen': -774.1192626953125, 'logps/rejected': -1075.6884765625, 'logps/ref_chosen': -62.480350494384766, 'logps/ref_rejected': -80.07717895507812, 'KL/chosen_KL_mean': -711.638916015625, 'KL/rejected_KL_mean': -995.6112060546875, 'KL/mean': -853.6250610351562, 'KL/std': 527.212890625, 'logits/chosen': -1.1217185258865356, 'logits/rejected': -1.1153336763381958, 'epoch': 0.95} + 95%|█████████▌| 649/681 [27:34<01:22, 2.57s/it] 95%|█████████▌| 650/681 [27:37<01:20, 2.60s/it] {'loss': 1.1243, 'grad_norm': 33.09385681152344, 'learning_rate': 3.3653488440851253e-09, 'fcm_dpo/beta': 0.000953975017182529, 'fcm_dpo/q_t': 0.4086850881576538, 'fcm_dpo/delta': -0.018730733543634415, 'fcm_dpo/margin': 437.99713134765625, 'margin_dpo/margin_mean': 437.99713134765625, 'margin_dpo/margin_std': 743.27490234375, 'logps/chosen': -810.7120361328125, 'logps/rejected': -1290.881103515625, 'logps/ref_chosen': -56.09281921386719, 'logps/ref_rejected': -98.26483917236328, 'KL/chosen_KL_mean': -754.6192016601562, 'KL/rejected_KL_mean': -1192.6162109375, 'KL/mean': -973.61767578125, 'KL/std': 647.8673095703125, 'logits/chosen': -1.028601884841919, 'logits/rejected': -1.0516587495803833, 'epoch': 0.95} + 95%|█████████▌| 650/681 [27:37<01:20, 2.60s/it] 96%|█████████▌| 651/681 [27:40<01:18, 2.61s/it] {'loss': 1.0095, 'grad_norm': 40.27021408081055, 'learning_rate': 3.158738163478475e-09, 'fcm_dpo/beta': 0.0009362648124806583, 'fcm_dpo/q_t': 0.38282567262649536, 'fcm_dpo/delta': -0.12655048072338104, 'fcm_dpo/margin': 555.2301025390625, 'margin_dpo/margin_mean': 555.2301025390625, 'margin_dpo/margin_std': 632.1912841796875, 'logps/chosen': -556.1956787109375, 'logps/rejected': -1167.958251953125, 'logps/ref_chosen': -43.42544937133789, 'logps/ref_rejected': -99.95791625976562, 'KL/chosen_KL_mean': -512.770263671875, 'KL/rejected_KL_mean': -1068.000244140625, 'KL/mean': -790.38525390625, 'KL/std': 609.9718017578125, 'logits/chosen': -1.0640699863433838, 'logits/rejected': -1.1213992834091187, 'epoch': 0.96} + 96%|█████████▌| 651/681 [27:40<01:18, 2.61s/it] 96%|█████████▌| 652/681 [27:42<01:15, 2.61s/it] {'loss': 1.1248, 'grad_norm': 32.868282318115234, 'learning_rate': 2.9586319796851555e-09, 'fcm_dpo/beta': 0.000932047376409173, 'fcm_dpo/q_t': 0.411517471075058, 'fcm_dpo/delta': -0.0005056131631135941, 'fcm_dpo/margin': 429.6570129394531, 'margin_dpo/margin_mean': 429.6570129394531, 'margin_dpo/margin_std': 717.76123046875, 'logps/chosen': -713.67041015625, 'logps/rejected': -1192.5184326171875, 'logps/ref_chosen': -62.57680892944336, 'logps/ref_rejected': -111.76779174804688, 'KL/chosen_KL_mean': -651.0936279296875, 'KL/rejected_KL_mean': -1080.7506103515625, 'KL/mean': -865.922119140625, 'KL/std': 616.6910400390625, 'logits/chosen': -1.0609800815582275, 'logits/rejected': -1.087823748588562, 'epoch': 0.96} + 96%|█████████▌| 652/681 [27:42<01:15, 2.61s/it] 96%|█████████▌| 653/681 [27:45<01:12, 2.58s/it] {'loss': 1.153, 'grad_norm': 35.157344818115234, 'learning_rate': 2.7650355656892166e-09, 'fcm_dpo/beta': 0.0009377988171763718, 'fcm_dpo/q_t': 0.4204859137535095, 'fcm_dpo/delta': 0.03901583328843117, 'fcm_dpo/margin': 386.3274841308594, 'margin_dpo/margin_mean': 386.3274841308594, 'margin_dpo/margin_std': 692.9034423828125, 'logps/chosen': -864.7777709960938, 'logps/rejected': -1293.241943359375, 'logps/ref_chosen': -61.11295700073242, 'logps/ref_rejected': -103.24960327148438, 'KL/chosen_KL_mean': -803.664794921875, 'KL/rejected_KL_mean': -1189.9921875, 'KL/mean': -996.8284912109375, 'KL/std': 658.72021484375, 'logits/chosen': -1.138892412185669, 'logits/rejected': -1.1634893417358398, 'epoch': 0.96} + 96%|█████████▌| 653/681 [27:45<01:12, 2.58s/it] 96%|█████████▌| 654/681 [27:47<01:09, 2.58s/it] {'loss': 1.1429, 'grad_norm': 39.73265075683594, 'learning_rate': 2.577954022936174e-09, 'fcm_dpo/beta': 0.0009442999726161361, 'fcm_dpo/q_t': 0.4215119779109955, 'fcm_dpo/delta': 0.04640195518732071, 'fcm_dpo/margin': 376.21954345703125, 'margin_dpo/margin_mean': 376.21954345703125, 'margin_dpo/margin_std': 627.8160400390625, 'logps/chosen': -786.0355224609375, 'logps/rejected': -1199.30078125, 'logps/ref_chosen': -61.7281379699707, 'logps/ref_rejected': -98.7738037109375, 'KL/chosen_KL_mean': -724.307373046875, 'KL/rejected_KL_mean': -1100.52685546875, 'KL/mean': -912.4171142578125, 'KL/std': 534.213134765625, 'logits/chosen': -1.0920642614364624, 'logits/rejected': -1.1086204051971436, 'epoch': 0.96} + 96%|█████████▌| 654/681 [27:47<01:09, 2.58s/it] 96%|█████████▌| 655/681 [27:50<01:06, 2.55s/it] {'loss': 1.1326, 'grad_norm': 28.9021053314209, 'learning_rate': 2.397392281198729e-09, 'fcm_dpo/beta': 0.0009508723160251975, 'fcm_dpo/q_t': 0.416969895362854, 'fcm_dpo/delta': 0.028146151453256607, 'fcm_dpo/margin': 392.17303466796875, 'margin_dpo/margin_mean': 392.17303466796875, 'margin_dpo/margin_std': 639.540771484375, 'logps/chosen': -714.6474609375, 'logps/rejected': -1155.535400390625, 'logps/ref_chosen': -49.576812744140625, 'logps/ref_rejected': -98.29183197021484, 'KL/chosen_KL_mean': -665.0706787109375, 'KL/rejected_KL_mean': -1057.24365234375, 'KL/mean': -861.1571044921875, 'KL/std': 536.1744384765625, 'logits/chosen': -1.0546410083770752, 'logits/rejected': -1.0955651998519897, 'epoch': 0.96} + 96%|█████████▌| 655/681 [27:50<01:06, 2.55s/it] 96%|█████████▋| 656/681 [27:52<01:05, 2.61s/it] {'loss': 0.9623, 'grad_norm': 87.20188903808594, 'learning_rate': 2.223355098446622e-09, 'fcm_dpo/beta': 0.0009240615181624889, 'fcm_dpo/q_t': 0.3659891188144684, 'fcm_dpo/delta': -0.2268456667661667, 'fcm_dpo/margin': 663.841796875, 'margin_dpo/margin_mean': 663.841796875, 'margin_dpo/margin_std': 737.1829833984375, 'logps/chosen': -781.2054443359375, 'logps/rejected': -1506.1724853515625, 'logps/ref_chosen': -52.54943084716797, 'logps/ref_rejected': -113.67464447021484, 'KL/chosen_KL_mean': -728.656005859375, 'KL/rejected_KL_mean': -1392.497802734375, 'KL/mean': -1060.576904296875, 'KL/std': 723.1806030273438, 'logits/chosen': -0.9737097024917603, 'logits/rejected': -1.0463311672210693, 'epoch': 0.96} + 96%|█████████▋| 656/681 [27:52<01:05, 2.61s/it] 96%|█████████▋| 657/681 [27:55<00:59, 2.49s/it] {'loss': 1.0554, 'grad_norm': 39.09135437011719, 'learning_rate': 2.055847060721566e-09, 'fcm_dpo/beta': 0.0008973278454504907, 'fcm_dpo/q_t': 0.39242735505104065, 'fcm_dpo/delta': -0.08321470022201538, 'fcm_dpo/margin': 533.8377075195312, 'margin_dpo/margin_mean': 533.8377075195312, 'margin_dpo/margin_std': 719.7000122070312, 'logps/chosen': -726.372802734375, 'logps/rejected': -1311.4248046875, 'logps/ref_chosen': -46.700538635253906, 'logps/ref_rejected': -97.91487121582031, 'KL/chosen_KL_mean': -679.6723022460938, 'KL/rejected_KL_mean': -1213.510009765625, 'KL/mean': -946.5911865234375, 'KL/std': 687.6986083984375, 'logits/chosen': -1.1246776580810547, 'logits/rejected': -1.1695971488952637, 'epoch': 0.96} + 96%|█████████▋| 657/681 [27:55<00:59, 2.49s/it] 97%|█████████▋| 658/681 [27:57<00:56, 2.45s/it] {'loss': 1.1161, 'grad_norm': 36.563560485839844, 'learning_rate': 1.8948725820160662e-09, 'fcm_dpo/beta': 0.0008927997550927103, 'fcm_dpo/q_t': 0.4129961133003235, 'fcm_dpo/delta': 0.020825423300266266, 'fcm_dpo/margin': 425.004638671875, 'margin_dpo/margin_mean': 425.004638671875, 'margin_dpo/margin_std': 614.8718872070312, 'logps/chosen': -800.3736572265625, 'logps/rejected': -1260.359619140625, 'logps/ref_chosen': -60.95820999145508, 'logps/ref_rejected': -95.93949127197266, 'KL/chosen_KL_mean': -739.4154663085938, 'KL/rejected_KL_mean': -1164.420166015625, 'KL/mean': -951.917724609375, 'KL/std': 541.0897216796875, 'logits/chosen': -1.074481725692749, 'logits/rejected': -1.1052826642990112, 'epoch': 0.97} + 97%|█████████▋| 658/681 [27:57<00:56, 2.45s/it] 97%|█████████▋| 659/681 [28:00<00:55, 2.53s/it] {'loss': 1.1178, 'grad_norm': 42.586883544921875, 'learning_rate': 1.7404359041573723e-09, 'fcm_dpo/beta': 0.0009003398008644581, 'fcm_dpo/q_t': 0.41639554500579834, 'fcm_dpo/delta': 0.03149181231856346, 'fcm_dpo/margin': 410.4945068359375, 'margin_dpo/margin_mean': 410.4945068359375, 'margin_dpo/margin_std': 598.6842041015625, 'logps/chosen': -725.3404541015625, 'logps/rejected': -1146.56298828125, 'logps/ref_chosen': -76.74298095703125, 'logps/ref_rejected': -87.4709701538086, 'KL/chosen_KL_mean': -648.5974731445312, 'KL/rejected_KL_mean': -1059.092041015625, 'KL/mean': -853.8447265625, 'KL/std': 544.5018310546875, 'logits/chosen': -0.9929611086845398, 'logits/rejected': -0.9660124778747559, 'epoch': 0.97} + 97%|█████████▋| 659/681 [28:00<00:55, 2.53s/it] 97%|█████████▋| 660/681 [28:02<00:52, 2.50s/it] {'loss': 1.0481, 'grad_norm': 36.593955993652344, 'learning_rate': 1.592541096695571e-09, 'fcm_dpo/beta': 0.0008946568705141544, 'fcm_dpo/q_t': 0.3920001983642578, 'fcm_dpo/delta': -0.0832086056470871, 'fcm_dpo/margin': 535.7288818359375, 'margin_dpo/margin_mean': 535.7288818359375, 'margin_dpo/margin_std': 685.7335205078125, 'logps/chosen': -762.4723510742188, 'logps/rejected': -1315.11328125, 'logps/ref_chosen': -59.04788589477539, 'logps/ref_rejected': -75.96005249023438, 'KL/chosen_KL_mean': -703.4244384765625, 'KL/rejected_KL_mean': -1239.1533203125, 'KL/mean': -971.2889404296875, 'KL/std': 633.0969848632812, 'logits/chosen': -1.0897400379180908, 'logits/rejected': -1.1135540008544922, 'epoch': 0.97} + 97%|█████████▋| 660/681 [28:02<00:52, 2.50s/it] 97%|█████████▋| 661/681 [28:04<00:48, 2.41s/it] {'loss': 1.089, 'grad_norm': 50.08948516845703, 'learning_rate': 1.4511920567963908e-09, 'fcm_dpo/beta': 0.0008907719748094678, 'fcm_dpo/q_t': 0.4084845185279846, 'fcm_dpo/delta': -0.015506003051996231, 'fcm_dpo/margin': 465.50323486328125, 'margin_dpo/margin_mean': 465.5032958984375, 'margin_dpo/margin_std': 672.30322265625, 'logps/chosen': -671.4685668945312, 'logps/rejected': -1172.303466796875, 'logps/ref_chosen': -50.673973083496094, 'logps/ref_rejected': -86.00569152832031, 'KL/chosen_KL_mean': -620.7945556640625, 'KL/rejected_KL_mean': -1086.2978515625, 'KL/mean': -853.5462646484375, 'KL/std': 685.778564453125, 'logits/chosen': -1.1007812023162842, 'logits/rejected': -1.116791009902954, 'epoch': 0.97} + 97%|█████████▋| 661/681 [28:04<00:48, 2.41s/it] 97%|█████████▋| 662/681 [28:07<00:48, 2.56s/it] {'loss': 1.1725, 'grad_norm': 29.727699279785156, 'learning_rate': 1.3163925091384532e-09, 'fcm_dpo/beta': 0.0008955647936090827, 'fcm_dpo/q_t': 0.4247671663761139, 'fcm_dpo/delta': 0.06570842862129211, 'fcm_dpo/margin': 375.7431640625, 'margin_dpo/margin_mean': 375.74310302734375, 'margin_dpo/margin_std': 715.9241333007812, 'logps/chosen': -793.4810791015625, 'logps/rejected': -1189.01904296875, 'logps/ref_chosen': -69.26106262207031, 'logps/ref_rejected': -89.05593872070312, 'KL/chosen_KL_mean': -724.219970703125, 'KL/rejected_KL_mean': -1099.963134765625, 'KL/mean': -912.091552734375, 'KL/std': 585.681884765625, 'logits/chosen': -1.0112884044647217, 'logits/rejected': -1.012375831604004, 'epoch': 0.97} + 97%|█████████▋| 662/681 [28:07<00:48, 2.56s/it] 97%|█████████▋| 663/681 [28:10<00:48, 2.68s/it] {'loss': 1.125, 'grad_norm': 25.254993438720703, 'learning_rate': 1.1881460058152382e-09, 'fcm_dpo/beta': 0.0008970214985311031, 'fcm_dpo/q_t': 0.4119381904602051, 'fcm_dpo/delta': -0.0017335359007120132, 'fcm_dpo/margin': 447.7265930175781, 'margin_dpo/margin_mean': 447.7265930175781, 'margin_dpo/margin_std': 758.81591796875, 'logps/chosen': -742.0482177734375, 'logps/rejected': -1238.8212890625, 'logps/ref_chosen': -64.87890625, 'logps/ref_rejected': -113.92536926269531, 'KL/chosen_KL_mean': -677.1693115234375, 'KL/rejected_KL_mean': -1124.8958740234375, 'KL/mean': -901.0325927734375, 'KL/std': 655.4639892578125, 'logits/chosen': -1.0759081840515137, 'logits/rejected': -1.1006672382354736, 'epoch': 0.97} + 97%|█████████▋| 663/681 [28:10<00:48, 2.68s/it] 98%|█████████▊| 664/681 [28:13<00:45, 2.67s/it] {'loss': 1.0717, 'grad_norm': 27.473468780517578, 'learning_rate': 1.066455926241383e-09, 'fcm_dpo/beta': 0.0008872643811628222, 'fcm_dpo/q_t': 0.39906027913093567, 'fcm_dpo/delta': -0.05684386566281319, 'fcm_dpo/margin': 511.424560546875, 'margin_dpo/margin_mean': 511.424560546875, 'margin_dpo/margin_std': 705.2711181640625, 'logps/chosen': -793.5599365234375, 'logps/rejected': -1349.61767578125, 'logps/ref_chosen': -60.88847351074219, 'logps/ref_rejected': -105.521728515625, 'KL/chosen_KL_mean': -732.6714477539062, 'KL/rejected_KL_mean': -1244.095947265625, 'KL/mean': -988.3836669921875, 'KL/std': 662.098876953125, 'logits/chosen': -1.0546207427978516, 'logits/rejected': -1.0900723934173584, 'epoch': 0.98} + 98%|█████████▊| 664/681 [28:13<00:45, 2.67s/it] 98%|█████████▊| 665/681 [28:15<00:41, 2.57s/it] {'loss': 1.0953, 'grad_norm': 48.51000213623047, 'learning_rate': 9.513254770636137e-10, 'fcm_dpo/beta': 0.0008908901363611221, 'fcm_dpo/q_t': 0.4121313691139221, 'fcm_dpo/delta': 0.021186400204896927, 'fcm_dpo/margin': 426.11090087890625, 'margin_dpo/margin_mean': 426.11090087890625, 'margin_dpo/margin_std': 529.1512451171875, 'logps/chosen': -695.8072509765625, 'logps/rejected': -1146.162841796875, 'logps/ref_chosen': -60.56413269042969, 'logps/ref_rejected': -84.80882263183594, 'KL/chosen_KL_mean': -635.2431640625, 'KL/rejected_KL_mean': -1061.35400390625, 'KL/mean': -848.298583984375, 'KL/std': 531.826416015625, 'logits/chosen': -1.1572396755218506, 'logits/rejected': -1.1837971210479736, 'epoch': 0.98} + 98%|█████████▊| 665/681 [28:15<00:41, 2.57s/it] 98%|█████████▊| 666/681 [28:18<00:39, 2.61s/it] {'loss': 1.1128, 'grad_norm': 34.465694427490234, 'learning_rate': 8.427576920763956e-10, 'fcm_dpo/beta': 0.0008939065737649798, 'fcm_dpo/q_t': 0.4139803946018219, 'fcm_dpo/delta': 0.020892852917313576, 'fcm_dpo/margin': 424.9931335449219, 'margin_dpo/margin_mean': 424.9931640625, 'margin_dpo/margin_std': 604.1651611328125, 'logps/chosen': -751.3270263671875, 'logps/rejected': -1207.791748046875, 'logps/ref_chosen': -64.41996002197266, 'logps/ref_rejected': -95.8916244506836, 'KL/chosen_KL_mean': -686.9071044921875, 'KL/rejected_KL_mean': -1111.900146484375, 'KL/mean': -899.4036865234375, 'KL/std': 544.5043334960938, 'logits/chosen': -0.9737902283668518, 'logits/rejected': -0.9849244356155396, 'epoch': 0.98} + 98%|█████████▊| 666/681 [28:18<00:39, 2.61s/it] 98%|█████████▊| 667/681 [28:21<00:36, 2.62s/it] {'loss': 1.067, 'grad_norm': 44.31229782104492, 'learning_rate': 7.407554321417764e-10, 'fcm_dpo/beta': 0.0008860268862918019, 'fcm_dpo/q_t': 0.3982255458831787, 'fcm_dpo/delta': -0.05220697447657585, 'fcm_dpo/margin': 507.36602783203125, 'margin_dpo/margin_mean': 507.36602783203125, 'margin_dpo/margin_std': 670.2469482421875, 'logps/chosen': -848.5464477539062, 'logps/rejected': -1374.470947265625, 'logps/ref_chosen': -69.27702331542969, 'logps/ref_rejected': -87.83549499511719, 'KL/chosen_KL_mean': -779.2694091796875, 'KL/rejected_KL_mean': -1286.635498046875, 'KL/mean': -1032.952392578125, 'KL/std': 609.7968139648438, 'logits/chosen': -1.008927822113037, 'logits/rejected': -1.0107920169830322, 'epoch': 0.98} + 98%|█████████▊| 667/681 [28:21<00:36, 2.62s/it] 98%|█████████▊| 668/681 [28:23<00:34, 2.63s/it] {'loss': 1.2118, 'grad_norm': 51.814239501953125, 'learning_rate': 6.453213851142225e-10, 'fcm_dpo/beta': 0.0009021821897476912, 'fcm_dpo/q_t': 0.43241050839424133, 'fcm_dpo/delta': 0.08765879273414612, 'fcm_dpo/margin': 348.3375244140625, 'margin_dpo/margin_mean': 348.3375244140625, 'margin_dpo/margin_std': 782.4864501953125, 'logps/chosen': -902.2957763671875, 'logps/rejected': -1281.768310546875, 'logps/ref_chosen': -72.60400390625, 'logps/ref_rejected': -103.73905944824219, 'KL/chosen_KL_mean': -829.6917724609375, 'KL/rejected_KL_mean': -1178.029296875, 'KL/mean': -1003.8605346679688, 'KL/std': 655.914794921875, 'logits/chosen': -1.082472324371338, 'logits/rejected': -1.0882298946380615, 'epoch': 0.98} + 98%|█████████▊| 668/681 [28:23<00:34, 2.63s/it] 98%|█████████▊| 669/681 [28:26<00:31, 2.66s/it] {'loss': 1.0658, 'grad_norm': 27.12503433227539, 'learning_rate': 5.564580657695939e-10, 'fcm_dpo/beta': 0.0008957190439105034, 'fcm_dpo/q_t': 0.3966999053955078, 'fcm_dpo/delta': -0.05677647516131401, 'fcm_dpo/margin': 507.1175537109375, 'margin_dpo/margin_mean': 507.1175537109375, 'margin_dpo/margin_std': 671.839599609375, 'logps/chosen': -678.6981811523438, 'logps/rejected': -1217.623779296875, 'logps/ref_chosen': -46.116416931152344, 'logps/ref_rejected': -77.92434692382812, 'KL/chosen_KL_mean': -632.581787109375, 'KL/rejected_KL_mean': -1139.6993408203125, 'KL/mean': -886.140625, 'KL/std': 600.3204345703125, 'logits/chosen': -1.0696676969528198, 'logits/rejected': -1.0817254781723022, 'epoch': 0.98} + 98%|█████████▊| 669/681 [28:26<00:31, 2.66s/it] 98%|█████████▊| 670/681 [28:29<00:29, 2.65s/it] {'loss': 1.066, 'grad_norm': 23.987220764160156, 'learning_rate': 4.741678157389739e-10, 'fcm_dpo/beta': 0.0008871153695508838, 'fcm_dpo/q_t': 0.3957204818725586, 'fcm_dpo/delta': -0.06361524760723114, 'fcm_dpo/margin': 519.2064819335938, 'margin_dpo/margin_mean': 519.2064819335938, 'margin_dpo/margin_std': 689.0188598632812, 'logps/chosen': -664.168701171875, 'logps/rejected': -1217.969970703125, 'logps/ref_chosen': -62.34575271606445, 'logps/ref_rejected': -96.9405517578125, 'KL/chosen_KL_mean': -601.8228759765625, 'KL/rejected_KL_mean': -1121.0294189453125, 'KL/mean': -861.4261474609375, 'KL/std': 569.886962890625, 'logits/chosen': -0.9869112968444824, 'logits/rejected': -1.0059700012207031, 'epoch': 0.98} + 98%|█████████▊| 670/681 [28:29<00:29, 2.65s/it] 99%|█████████▊| 671/681 [28:31<00:25, 2.56s/it] {'loss': 1.1536, 'grad_norm': 37.049495697021484, 'learning_rate': 3.9845280344705245e-10, 'fcm_dpo/beta': 0.0008903343696147203, 'fcm_dpo/q_t': 0.41793012619018555, 'fcm_dpo/delta': 0.047325365245342255, 'fcm_dpo/margin': 397.4279479980469, 'margin_dpo/margin_mean': 397.427978515625, 'margin_dpo/margin_std': 692.941650390625, 'logps/chosen': -818.516357421875, 'logps/rejected': -1251.763427734375, 'logps/ref_chosen': -48.00010681152344, 'logps/ref_rejected': -83.81932067871094, 'KL/chosen_KL_mean': -770.5162353515625, 'KL/rejected_KL_mean': -1167.944091796875, 'KL/mean': -969.230224609375, 'KL/std': 568.30517578125, 'logits/chosen': -1.0835880041122437, 'logits/rejected': -1.1127347946166992, 'epoch': 0.99} + 99%|█████████▊| 671/681 [28:31<00:25, 2.56s/it] 99%|█████████▊| 672/681 [28:33<00:22, 2.54s/it] {'loss': 1.1557, 'grad_norm': 60.62648010253906, 'learning_rate': 3.293150240547549e-10, 'fcm_dpo/beta': 0.0008913551573641598, 'fcm_dpo/q_t': 0.41862136125564575, 'fcm_dpo/delta': 0.03129229322075844, 'fcm_dpo/margin': 414.9320983886719, 'margin_dpo/margin_mean': 414.9320983886719, 'margin_dpo/margin_std': 755.1729125976562, 'logps/chosen': -913.0614013671875, 'logps/rejected': -1362.55029296875, 'logps/ref_chosen': -58.58328628540039, 'logps/ref_rejected': -93.14015197753906, 'KL/chosen_KL_mean': -854.4781494140625, 'KL/rejected_KL_mean': -1269.41015625, 'KL/mean': -1061.944091796875, 'KL/std': 686.4140625, 'logits/chosen': -1.1662323474884033, 'logits/rejected': -1.1740036010742188, 'epoch': 0.99} + 99%|█████████▊| 672/681 [28:33<00:22, 2.54s/it] 99%|█████████▉| 673/681 [28:36<00:19, 2.47s/it] {'loss': 1.1464, 'grad_norm': 42.15021514892578, 'learning_rate': 2.6675629940689504e-10, 'fcm_dpo/beta': 0.0009015346877276897, 'fcm_dpo/q_t': 0.42215287685394287, 'fcm_dpo/delta': 0.054041508585214615, 'fcm_dpo/margin': 385.8390808105469, 'margin_dpo/margin_mean': 385.8390808105469, 'margin_dpo/margin_std': 660.7847900390625, 'logps/chosen': -800.53515625, 'logps/rejected': -1224.947265625, 'logps/ref_chosen': -46.72320556640625, 'logps/ref_rejected': -85.29623413085938, 'KL/chosen_KL_mean': -753.8118896484375, 'KL/rejected_KL_mean': -1139.651123046875, 'KL/mean': -946.7315063476562, 'KL/std': 579.491455078125, 'logits/chosen': -1.1026825904846191, 'logits/rejected': -1.1077499389648438, 'epoch': 0.99} + 99%|█████████▉| 673/681 [28:36<00:19, 2.47s/it] 99%|█████████▉| 674/681 [28:39<00:17, 2.56s/it] {'loss': 1.0617, 'grad_norm': 36.43729782104492, 'learning_rate': 2.1077827798404725e-10, 'fcm_dpo/beta': 0.0008984719170257449, 'fcm_dpo/q_t': 0.3981897830963135, 'fcm_dpo/delta': -0.061865366995334625, 'fcm_dpo/margin': 510.90618896484375, 'margin_dpo/margin_mean': 510.90618896484375, 'margin_dpo/margin_std': 678.5731811523438, 'logps/chosen': -675.9693603515625, 'logps/rejected': -1211.475830078125, 'logps/ref_chosen': -45.445526123046875, 'logps/ref_rejected': -70.04593658447266, 'KL/chosen_KL_mean': -630.5238037109375, 'KL/rejected_KL_mean': -1141.429931640625, 'KL/mean': -885.9769287109375, 'KL/std': 571.075439453125, 'logits/chosen': -0.9947335720062256, 'logits/rejected': -1.015453577041626, 'epoch': 0.99} + 99%|█████████▉| 674/681 [28:39<00:17, 2.56s/it] 99%|█████████▉| 675/681 [28:41<00:15, 2.55s/it] {'loss': 1.0676, 'grad_norm': 25.439342498779297, 'learning_rate': 1.6138243485910863e-10, 'fcm_dpo/beta': 0.0008785349782556295, 'fcm_dpo/q_t': 0.39929330348968506, 'fcm_dpo/delta': -0.060169536620378494, 'fcm_dpo/margin': 518.9458618164062, 'margin_dpo/margin_mean': 518.9458618164062, 'margin_dpo/margin_std': 681.2320556640625, 'logps/chosen': -753.0992431640625, 'logps/rejected': -1301.9608154296875, 'logps/ref_chosen': -44.17628479003906, 'logps/ref_rejected': -74.09197998046875, 'KL/chosen_KL_mean': -708.9229736328125, 'KL/rejected_KL_mean': -1227.868896484375, 'KL/mean': -968.3958740234375, 'KL/std': 633.1185302734375, 'logits/chosen': -1.0798540115356445, 'logits/rejected': -1.094804286956787, 'epoch': 0.99} + 99%|█████████▉| 675/681 [28:41<00:15, 2.55s/it] 99%|█████████▉| 676/681 [28:44<00:13, 2.60s/it] {'loss': 1.065, 'grad_norm': 26.6938419342041, 'learning_rate': 1.1857007165852472e-10, 'fcm_dpo/beta': 0.0008790518622845411, 'fcm_dpo/q_t': 0.4009990990161896, 'fcm_dpo/delta': -0.03571845218539238, 'fcm_dpo/margin': 493.9117736816406, 'margin_dpo/margin_mean': 493.9117736816406, 'margin_dpo/margin_std': 605.2867431640625, 'logps/chosen': -821.196044921875, 'logps/rejected': -1332.0679931640625, 'logps/ref_chosen': -71.39852905273438, 'logps/ref_rejected': -88.3587646484375, 'KL/chosen_KL_mean': -749.7974853515625, 'KL/rejected_KL_mean': -1243.709228515625, 'KL/mean': -996.75341796875, 'KL/std': 598.0169067382812, 'logits/chosen': -1.0037989616394043, 'logits/rejected': -1.0201971530914307, 'epoch': 0.99} + 99%|█████████▉| 676/681 [28:44<00:13, 2.60s/it] 99%|█████████▉| 677/681 [28:46<00:10, 2.52s/it] {'loss': 1.1107, 'grad_norm': 31.511207580566406, 'learning_rate': 8.23423165278725e-11, 'fcm_dpo/beta': 0.0008738588076084852, 'fcm_dpo/q_t': 0.4123176634311676, 'fcm_dpo/delta': -0.009816518053412437, 'fcm_dpo/margin': 468.474609375, 'margin_dpo/margin_mean': 468.4746398925781, 'margin_dpo/margin_std': 753.8814697265625, 'logps/chosen': -821.289794921875, 'logps/rejected': -1311.4635009765625, 'logps/ref_chosen': -56.527435302734375, 'logps/ref_rejected': -78.22654724121094, 'KL/chosen_KL_mean': -764.7623291015625, 'KL/rejected_KL_mean': -1233.237060546875, 'KL/mean': -998.9996337890625, 'KL/std': 625.9779052734375, 'logits/chosen': -1.093052864074707, 'logits/rejected': -1.0892189741134644, 'epoch': 0.99} + 99%|█████████▉| 677/681 [28:46<00:10, 2.52s/it] 100%|█████████▉| 678/681 [28:49<00:07, 2.49s/it] {'loss': 1.0583, 'grad_norm': 28.7852840423584, 'learning_rate': 5.270012410216185e-11, 'fcm_dpo/beta': 0.0008661206811666489, 'fcm_dpo/q_t': 0.39175188541412354, 'fcm_dpo/delta': -0.09516976773738861, 'fcm_dpo/margin': 566.3224487304688, 'margin_dpo/margin_mean': 566.3224487304688, 'margin_dpo/margin_std': 791.4454345703125, 'logps/chosen': -690.8457641601562, 'logps/rejected': -1291.638427734375, 'logps/ref_chosen': -46.13447570800781, 'logps/ref_rejected': -80.60462951660156, 'KL/chosen_KL_mean': -644.7113037109375, 'KL/rejected_KL_mean': -1211.03369140625, 'KL/mean': -927.87255859375, 'KL/std': 692.720458984375, 'logits/chosen': -1.0311825275421143, 'logits/rejected': -1.070950984954834, 'epoch': 1.0} + 100%|█████████▉| 678/681 [28:49<00:07, 2.49s/it] 100%|█████████▉| 679/681 [28:51<00:05, 2.57s/it] {'loss': 1.1507, 'grad_norm': 39.39456558227539, 'learning_rate': 2.9644275480772416e-11, 'fcm_dpo/beta': 0.0008657841826789081, 'fcm_dpo/q_t': 0.4249575436115265, 'fcm_dpo/delta': 0.06911883503198624, 'fcm_dpo/margin': 384.8945617675781, 'margin_dpo/margin_mean': 384.8945617675781, 'margin_dpo/margin_std': 635.107666015625, 'logps/chosen': -785.1641845703125, 'logps/rejected': -1196.3619384765625, 'logps/ref_chosen': -50.294921875, 'logps/ref_rejected': -76.59813690185547, 'KL/chosen_KL_mean': -734.8692626953125, 'KL/rejected_KL_mean': -1119.763916015625, 'KL/mean': -927.3165283203125, 'KL/std': 549.86669921875, 'logits/chosen': -1.0478802919387817, 'logits/rejected': -1.0363208055496216, 'epoch': 1.0} + 100%|█████████▉| 679/681 [28:51<00:05, 2.57s/it] 100%|█████████▉| 680/681 [28:54<00:02, 2.58s/it] {'loss': 1.0929, 'grad_norm': 38.68387985229492, 'learning_rate': 1.31753782067201e-11, 'fcm_dpo/beta': 0.0008600302971899509, 'fcm_dpo/q_t': 0.39925122261047363, 'fcm_dpo/delta': -0.04386995732784271, 'fcm_dpo/margin': 513.0979614257812, 'margin_dpo/margin_mean': 513.0979614257812, 'margin_dpo/margin_std': 767.623046875, 'logps/chosen': -825.6535034179688, 'logps/rejected': -1374.220458984375, 'logps/ref_chosen': -76.91569519042969, 'logps/ref_rejected': -112.384765625, 'KL/chosen_KL_mean': -748.73779296875, 'KL/rejected_KL_mean': -1261.835693359375, 'KL/mean': -1005.2867431640625, 'KL/std': 711.034912109375, 'logits/chosen': -1.037444829940796, 'logits/rejected': -1.0661684274673462, 'epoch': 1.0} + 100%|█████████▉| 680/681 [28:54<00:02, 2.58s/it] 100%|██████████| 681/681 [28:56<00:00, 2.57s/it] {'loss': 1.1462, 'grad_norm': 35.66378402709961, 'learning_rate': 3.2938662507808745e-12, 'fcm_dpo/beta': 0.0008584200404584408, 'fcm_dpo/q_t': 0.4208639860153198, 'fcm_dpo/delta': -0.04448072612285614, 'fcm_dpo/margin': 404.02093505859375, 'margin_dpo/margin_mean': 404.0209655761719, 'margin_dpo/margin_std': 639.079833984375, 'logps/chosen': -798.5409545898438, 'logps/rejected': -1230.16259765625, 'logps/ref_chosen': -60.957279205322266, 'logps/ref_rejected': -88.55797576904297, 'KL/chosen_KL_mean': -737.5836791992188, 'KL/rejected_KL_mean': -1141.6046142578125, 'KL/mean': -939.5941772460938, 'KL/std': 583.9456176757812, 'logits/chosen': -1.1038322448730469, 'logits/rejected': -1.1210821866989136, 'epoch': 1.0} + 100%|██████████| 681/681 [28:56<00:00, 2.57s/it][INFO|trainer.py:2681] 2026-04-29 17:14:09,833 >> + +Training completed. Do not forget to share your model on huggingface.co/models =) + + + {'train_runtime': 1736.9515, 'train_samples_per_second': 25.1, 'train_steps_per_second': 0.392, 'train_loss': 1.093637848565582, 'epoch': 1.0} + 100%|██████████| 681/681 [28:56<00:00, 2.57s/it] 100%|██████████| 681/681 [28:56<00:00, 2.55s/it] +***** train metrics ***** + epoch = 1.0 + total_flos = 0GF + train_loss = 1.0936 + train_runtime = 0:28:56.95 + train_samples = 43598 + train_samples_per_second = 25.1 + train_steps_per_second = 0.392 +2026-04-29 17:14:09 - INFO - __main__ - *** Training complete *** +2026-04-29 17:14:09 - INFO - __main__ - *** Save model *** +[INFO|configuration_utils.py:419] 2026-04-29 17:14:42,749 >> Configuration saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/config.json +[INFO|configuration_utils.py:911] 2026-04-29 17:14:42,751 >> Configuration saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/generation_config.json +[INFO|modeling_utils.py:3580] 2026-04-29 17:15:54,224 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 7 checkpoint shards. You can find where each parameters has been saved in the index located at /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/model.safetensors.index.json. +[INFO|tokenization_utils_base.py:2510] 2026-04-29 17:15:54,231 >> tokenizer config file saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/tokenizer_config.json +[INFO|tokenization_utils_base.py:2519] 2026-04-29 17:15:54,233 >> Special tokens file saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/special_tokens_map.json +2026-04-29 17:15:54 - INFO - __main__ - Saved HF-compatible model artifacts to /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449 +[INFO|modelcard.py:450] 2026-04-29 17:15:55,854 >> Dropping the following result as it does not have all the necessary fields: +{'dataset': {'name': 'Anthropic/hh-rlhf', 'type': 'Anthropic/hh-rlhf'}} +[INFO|configuration_utils.py:419] 2026-04-29 17:15:55,861 >> Configuration saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/config.json +2026-04-29 17:15:55 - INFO - __main__ - Skipping margin dataset upload because push_margin_dataset is false. +2026-04-29 17:15:55 - INFO - __main__ - *** Training complete! *** +wandb: - 0.011 MB of 0.011 MB uploaded wandb: \ 0.011 MB of 0.011 MB uploaded wandb: | 0.011 MB of 0.011 MB uploaded wandb: / 0.011 MB of 0.640 MB uploaded wandb: - 0.012 MB of 0.640 MB uploaded wandb: \ 0.014 MB of 0.640 MB uploaded wandb: | 0.014 MB of 0.640 MB uploaded wandb: / 0.014 MB of 0.640 MB uploaded wandb: - 0.640 MB of 0.640 MB uploaded wandb: +wandb: Run history: +wandb: train/KL/chosen_KL_mean ████████▇▇▇▇▇▆▆▆▆▅▅▅▅▆▅▅▅▅▄▂▂▃▃▃▃▁▁▂▁▁▁▁ +wandb: train/KL/mean ████████▇▇▇▇▇▆▆▆▆▅▆▅▅▅▅▅▅▅▄▂▃▃▃▃▂▂▂▃▂▁▁▁ +wandb: train/KL/rejected_KL_mean ████████▇▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▄▃▃▃▄▃▂▃▂▃▂▁▁▂ +wandb: train/KL/std ▁▁▁▁▁▁▁▂▂▂▂▂▂▃▂▃▃▃▃▃▃▃▃▄▄▄▅▇▆▆▅▆▇▆▆▆▆███ +wandb: train/epoch ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███ +wandb: train/fcm_dpo/beta ██▇▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ +wandb: train/fcm_dpo/delta ▆▆▂▁▁▅▄▆▆▅▅▆▆█▅▆▅▆▆▆█▄▇▄▅▅▅▅▇▅▇▄▃█▇▆▅▄▂▅ +wandb: train/fcm_dpo/margin ▁▁▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▂▃▃▃▄▃▅▄▄▅▅▅▅▄▆▇▄▅▅▆▇█▆ +wandb: train/fcm_dpo/q_t █▆▁▁▁▃▃▄▄▃▃▄▃▅▃▃▃▅▄▃▅▂▄▂▃▃▃▄▄▃▄▃▂▅▄▄▃▃▂▃ +wandb: train/global_step ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███ +wandb: train/grad_norm ██▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▂▁ +wandb: train/learning_rate ▂▃▅▇██████▇▇▇▇▇▆▆▆▆▅▅▅▄▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁ +wandb: train/logits/chosen ▆▆▆▅▅▄▄▆▇▇██▇▇█▇█▇▇▇▆▆▆▆▅▅▄▂▂▂▃▂▂▁▁▁▁▂▂▁ +wandb: train/logits/rejected ▆▆▇▆▅▅▅▆▇▇▇█▇▇█▇█▇█▇▇▆▆▆▅▆▄▂▂▂▃▂▂▁▁▁▁▂▂▁ +wandb: train/logps/chosen ███████▇▇▇▇▇▆▆▆▆▆▅▅▅▅▆▅▅▅▅▄▁▂▃▃▃▃▁▂▂▁▁▁▁ +wandb: train/logps/ref_chosen ▅█▆▆▄▃▆▆▅▄▇▃▄▃█▅▅▄▇▅▃█▇▅▅▃▆▁▇▅▃▇▅▆▇▆▃▆▆▂ +wandb: train/logps/ref_rejected ▇▄█▅▅▃▆█▇▄▅▃▅▆▅▅▄▅█▅█▄▆▃▅▄▃▁▆▃▄▆▃▄█▅▄▂▂▂ +wandb: train/logps/rejected ████████▇▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▄▂▃▃▄▃▂▃▃▃▂▁▁▂ +wandb: train/loss █▆▁▁▁▄▃▄▄▃▃▄▃▄▂▃▃▅▃▃▄▂▄▂▂▂▂▄▄▃▄▃▂▅▅▃▃▃▂▃ +wandb: train/margin_dpo/margin_mean ▁▁▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▂▃▃▃▄▃▅▄▄▅▅▅▅▄▆▇▄▅▅▆▇█▆ +wandb: train/margin_dpo/margin_std ▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▃▄▇▅▆▅▇▇▆█▅▆█▇█ +wandb: +wandb: Run summary: +wandb: total_flos 0.0 +wandb: train/KL/chosen_KL_mean -737.58368 +wandb: train/KL/mean -939.59418 +wandb: train/KL/rejected_KL_mean -1141.60461 +wandb: train/KL/std 583.94562 +wandb: train/epoch 1.0 +wandb: train/fcm_dpo/beta 0.00086 +wandb: train/fcm_dpo/delta -0.04448 +wandb: train/fcm_dpo/margin 404.02094 +wandb: train/fcm_dpo/q_t 0.42086 +wandb: train/global_step 681 +wandb: train/grad_norm 35.66378 +wandb: train/learning_rate 0.0 +wandb: train/logits/chosen -1.10383 +wandb: train/logits/rejected -1.12108 +wandb: train/logps/chosen -798.54095 +wandb: train/logps/ref_chosen -60.95728 +wandb: train/logps/ref_rejected -88.55798 +wandb: train/logps/rejected -1230.1626 +wandb: train/loss 1.1462 +wandb: train/margin_dpo/margin_mean 404.02097 +wandb: train/margin_dpo/margin_std 639.07983 +wandb: train_loss 1.09364 +wandb: train_runtime 1736.9515 +wandb: train_samples_per_second 25.1 +wandb: train_steps_per_second 0.392 +wandb: +wandb: 🚀 View run llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449 at: https://wandb.ai/can-not-fand-northeastern-university/llama3-hh-new-dpo-multi-beta-sweep/runs/ypz8eup0 +wandb: ⭐️ View project at: https://wandb.ai/can-not-fand-northeastern-university/llama3-hh-new-dpo-multi-beta-sweep +wandb: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s) +wandb: Find logs at: ./wandb/wandb/run-20260429_164429-ypz8eup0/logs +wandb: WARNING The new W&B backend becomes opt-out in version 0.18.0; try it out with `wandb.require("core")`! See https://wandb.me/wandb-core for more information. diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..4fd2a77 --- /dev/null +++ b/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 0.0, + "train_loss": 1.093637848565582, + "train_runtime": 1736.9515, + "train_samples": 43598, + "train_samples_per_second": 25.1, + "train_steps_per_second": 0.392 +} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..f9a27cd --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,15706 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 200, + "global_step": 681, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "KL/chosen_KL_mean": 0.00527191162109375, + "KL/mean": 0.016706019639968872, + "KL/rejected_KL_mean": 0.028141021728515625, + "KL/std": 0.272699236869812, + "epoch": 0.0014684287812041115, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.02287006378173828, + "fcm_dpo/q_t": 0.5027250051498413, + "grad_norm": 420.2432861328125, + "learning_rate": 0.0, + "logits/chosen": -0.4974287748336792, + "logits/rejected": -0.43299180269241333, + "logps/chosen": -50.1435661315918, + "logps/ref_chosen": -50.14883804321289, + "logps/ref_rejected": -74.1280517578125, + "logps/rejected": -74.09991455078125, + "loss": 1.4087, + "margin_dpo/margin_mean": -0.02287048101425171, + "margin_dpo/margin_std": 0.41920793056488037, + "step": 1 + }, + { + "KL/chosen_KL_mean": -0.03498649597167969, + "KL/mean": -0.00212840735912323, + "KL/rejected_KL_mean": 0.030735015869140625, + "KL/std": 0.24797174334526062, + "epoch": 0.002936857562408223, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.06572261452674866, + "fcm_dpo/q_t": 0.5081548094749451, + "grad_norm": 364.62652587890625, + "learning_rate": 7.246376811594203e-09, + "logits/chosen": -0.49536412954330444, + "logits/rejected": -0.4594460427761078, + "logps/chosen": -52.65568923950195, + "logps/ref_chosen": -52.620704650878906, + "logps/ref_rejected": -75.30413818359375, + "logps/rejected": -75.27340698242188, + "loss": 1.4271, + "margin_dpo/margin_mean": -0.06572240591049194, + "margin_dpo/margin_std": 0.35048407316207886, + "step": 2 + }, + { + "KL/chosen_KL_mean": 0.052303314208984375, + "KL/mean": 0.017774119973182678, + "KL/rejected_KL_mean": -0.016756057739257812, + "KL/std": 0.28824305534362793, + "epoch": 0.004405286343612335, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.06905469298362732, + "fcm_dpo/q_t": 0.4915676712989807, + "grad_norm": 347.2252197265625, + "learning_rate": 1.4492753623188406e-08, + "logits/chosen": -0.4816562235355377, + "logits/rejected": -0.44209641218185425, + "logps/chosen": -60.929290771484375, + "logps/ref_chosen": -60.981597900390625, + "logps/ref_rejected": -68.67259216308594, + "logps/rejected": -68.6893539428711, + "loss": 1.362, + "margin_dpo/margin_mean": 0.06905469298362732, + "margin_dpo/margin_std": 0.3988131284713745, + "step": 3 + }, + { + "KL/chosen_KL_mean": -0.021808624267578125, + "KL/mean": -0.011183008551597595, + "KL/rejected_KL_mean": -0.000560760498046875, + "KL/std": 0.250108003616333, + "epoch": 0.005873715124816446, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.021249920129776, + "fcm_dpo/q_t": 0.5027137994766235, + "grad_norm": 359.3165588378906, + "learning_rate": 2.1739130434782606e-08, + "logits/chosen": -0.4682066738605499, + "logits/rejected": -0.44051969051361084, + "logps/chosen": -56.789520263671875, + "logps/ref_chosen": -56.7677116394043, + "logps/ref_rejected": -86.64710998535156, + "logps/rejected": -86.64767456054688, + "loss": 1.4044, + "margin_dpo/margin_mean": -0.02125033736228943, + "margin_dpo/margin_std": 0.33959275484085083, + "step": 4 + }, + { + "KL/chosen_KL_mean": 0.040134429931640625, + "KL/mean": 0.021857306361198425, + "KL/rejected_KL_mean": 0.003582000732421875, + "KL/std": 0.26523804664611816, + "epoch": 0.007342143906020558, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.036554425954818726, + "fcm_dpo/q_t": 0.4954211413860321, + "grad_norm": 448.5081481933594, + "learning_rate": 2.898550724637681e-08, + "logits/chosen": -0.49668556451797485, + "logits/rejected": -0.45167264342308044, + "logps/chosen": -53.81924057006836, + "logps/ref_chosen": -53.859375, + "logps/ref_rejected": -84.14918518066406, + "logps/rejected": -84.14559936523438, + "loss": 1.3765, + "margin_dpo/margin_mean": 0.03655460476875305, + "margin_dpo/margin_std": 0.3572620153427124, + "step": 5 + }, + { + "KL/chosen_KL_mean": -0.011350631713867188, + "KL/mean": 0.008662402629852295, + "KL/rejected_KL_mean": 0.028675079345703125, + "KL/std": 0.28275883197784424, + "epoch": 0.00881057268722467, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.04002267122268677, + "fcm_dpo/q_t": 0.5049124956130981, + "grad_norm": 474.76165771484375, + "learning_rate": 3.6231884057971014e-08, + "logits/chosen": -0.5011003613471985, + "logits/rejected": -0.4586023688316345, + "logps/chosen": -63.018836975097656, + "logps/ref_chosen": -63.007484436035156, + "logps/ref_rejected": -92.64534759521484, + "logps/rejected": -92.61666870117188, + "loss": 1.4172, + "margin_dpo/margin_mean": -0.04002311825752258, + "margin_dpo/margin_std": 0.41552552580833435, + "step": 6 + }, + { + "KL/chosen_KL_mean": 0.017522811889648438, + "KL/mean": -0.0009044557809829712, + "KL/rejected_KL_mean": -0.0193328857421875, + "KL/std": 0.27743956446647644, + "epoch": 0.010279001468428781, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.0368523895740509, + "fcm_dpo/q_t": 0.49542200565338135, + "grad_norm": 406.9675598144531, + "learning_rate": 4.347826086956521e-08, + "logits/chosen": -0.5030827522277832, + "logits/rejected": -0.4692496657371521, + "logps/chosen": -57.75729751586914, + "logps/ref_chosen": -57.774818420410156, + "logps/ref_rejected": -103.92059326171875, + "logps/rejected": -103.93992614746094, + "loss": 1.3777, + "margin_dpo/margin_mean": 0.03685298562049866, + "margin_dpo/margin_std": 0.3953211307525635, + "step": 7 + }, + { + "KL/chosen_KL_mean": -0.03408622741699219, + "KL/mean": -0.005419567227363586, + "KL/rejected_KL_mean": 0.023250579833984375, + "KL/std": 0.2861067056655884, + "epoch": 0.011747430249632892, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.05733850598335266, + "fcm_dpo/q_t": 0.5070033073425293, + "grad_norm": 401.7236328125, + "learning_rate": 5.0724637681159424e-08, + "logits/chosen": -0.5170855522155762, + "logits/rejected": -0.4922248125076294, + "logps/chosen": -58.7501220703125, + "logps/ref_chosen": -58.716033935546875, + "logps/ref_rejected": -79.3114242553711, + "logps/rejected": -79.28817749023438, + "loss": 1.4225, + "margin_dpo/margin_mean": -0.05733811855316162, + "margin_dpo/margin_std": 0.3359847962856293, + "step": 8 + }, + { + "KL/chosen_KL_mean": -0.017595291137695312, + "KL/mean": -0.022840231657028198, + "KL/rejected_KL_mean": -0.02808380126953125, + "KL/std": 0.28952154517173767, + "epoch": 0.013215859030837005, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.010491371154785156, + "fcm_dpo/q_t": 0.49866122007369995, + "grad_norm": 423.5147705078125, + "learning_rate": 5.797101449275362e-08, + "logits/chosen": -0.5013039708137512, + "logits/rejected": -0.45518267154693604, + "logps/chosen": -69.88443756103516, + "logps/ref_chosen": -69.8668441772461, + "logps/ref_rejected": -99.6026611328125, + "logps/rejected": -99.63075256347656, + "loss": 1.3925, + "margin_dpo/margin_mean": 0.010491013526916504, + "margin_dpo/margin_std": 0.42117273807525635, + "step": 9 + }, + { + "KL/chosen_KL_mean": 0.0170440673828125, + "KL/mean": 0.02462557703256607, + "KL/rejected_KL_mean": 0.0322113037109375, + "KL/std": 0.2662718594074249, + "epoch": 0.014684287812041116, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.015163615345954895, + "fcm_dpo/q_t": 0.5018855333328247, + "grad_norm": 353.2280578613281, + "learning_rate": 6.521739130434782e-08, + "logits/chosen": -0.4687877297401428, + "logits/rejected": -0.42438995838165283, + "logps/chosen": -48.340641021728516, + "logps/ref_chosen": -48.35768508911133, + "logps/ref_rejected": -80.37206268310547, + "logps/rejected": -80.33985137939453, + "loss": 1.4024, + "margin_dpo/margin_mean": -0.015163183212280273, + "margin_dpo/margin_std": 0.35796934366226196, + "step": 10 + }, + { + "KL/chosen_KL_mean": 0.021253585815429688, + "KL/mean": 0.008961886167526245, + "KL/rejected_KL_mean": -0.003330230712890625, + "KL/std": 0.24493196606636047, + "epoch": 0.016152716593245228, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.024578213691711426, + "fcm_dpo/q_t": 0.49689868092536926, + "grad_norm": 344.31915283203125, + "learning_rate": 7.246376811594203e-08, + "logits/chosen": -0.4556809365749359, + "logits/rejected": -0.43051183223724365, + "logps/chosen": -52.995601654052734, + "logps/ref_chosen": -53.01685333251953, + "logps/ref_rejected": -87.78038024902344, + "logps/rejected": -87.78370666503906, + "loss": 1.3821, + "margin_dpo/margin_mean": 0.024578243494033813, + "margin_dpo/margin_std": 0.3401423990726471, + "step": 11 + }, + { + "KL/chosen_KL_mean": -0.0713043212890625, + "KL/mean": -0.07425594329833984, + "KL/rejected_KL_mean": -0.07719802856445312, + "KL/std": 0.2839137315750122, + "epoch": 0.01762114537444934, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.0059018731117248535, + "fcm_dpo/q_t": 0.4991750717163086, + "grad_norm": 445.4076843261719, + "learning_rate": 7.971014492753623e-08, + "logits/chosen": -0.5402117967605591, + "logits/rejected": -0.5041322708129883, + "logps/chosen": -61.876739501953125, + "logps/ref_chosen": -61.80543518066406, + "logps/ref_rejected": -104.8582763671875, + "logps/rejected": -104.93547058105469, + "loss": 1.395, + "margin_dpo/margin_mean": 0.005901157855987549, + "margin_dpo/margin_std": 0.43184518814086914, + "step": 12 + }, + { + "KL/chosen_KL_mean": 0.018360137939453125, + "KL/mean": 0.013540104031562805, + "KL/rejected_KL_mean": 0.00872039794921875, + "KL/std": 0.2952546775341034, + "epoch": 0.01908957415565345, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.009646743535995483, + "fcm_dpo/q_t": 0.4988465905189514, + "grad_norm": 399.35504150390625, + "learning_rate": 8.695652173913042e-08, + "logits/chosen": -0.49472951889038086, + "logits/rejected": -0.46776068210601807, + "logps/chosen": -64.24199676513672, + "logps/ref_chosen": -64.2603530883789, + "logps/ref_rejected": -87.20307922363281, + "logps/rejected": -87.19436645507812, + "loss": 1.3929, + "margin_dpo/margin_mean": 0.009646564722061157, + "margin_dpo/margin_std": 0.4087739586830139, + "step": 13 + }, + { + "KL/chosen_KL_mean": -0.015897750854492188, + "KL/mean": -0.016403615474700928, + "KL/rejected_KL_mean": -0.016910552978515625, + "KL/std": 0.29813089966773987, + "epoch": 0.020558002936857563, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.0010128617286682129, + "fcm_dpo/q_t": 0.49986132979393005, + "grad_norm": 423.43255615234375, + "learning_rate": 9.420289855072464e-08, + "logits/chosen": -0.4713672995567322, + "logits/rejected": -0.4317617416381836, + "logps/chosen": -58.12610626220703, + "logps/ref_chosen": -58.11021041870117, + "logps/ref_rejected": -104.04708099365234, + "logps/rejected": -104.06399536132812, + "loss": 1.3972, + "margin_dpo/margin_mean": 0.0010128915309906006, + "margin_dpo/margin_std": 0.4278063476085663, + "step": 14 + }, + { + "KL/chosen_KL_mean": -0.029178619384765625, + "KL/mean": -0.043839290738105774, + "KL/rejected_KL_mean": -0.058498382568359375, + "KL/std": 0.21881349384784698, + "epoch": 0.022026431718061675, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.029320567846298218, + "fcm_dpo/q_t": 0.4963420033454895, + "grad_norm": 320.57830810546875, + "learning_rate": 1.0144927536231885e-07, + "logits/chosen": -0.4776439368724823, + "logits/rejected": -0.45821863412857056, + "logps/chosen": -56.99608612060547, + "logps/ref_chosen": -56.96691131591797, + "logps/ref_rejected": -80.80863952636719, + "logps/rejected": -80.86714172363281, + "loss": 1.3803, + "margin_dpo/margin_mean": 0.029320329427719116, + "margin_dpo/margin_std": 0.3670857548713684, + "step": 15 + }, + { + "KL/chosen_KL_mean": 0.03498077392578125, + "KL/mean": 0.0047643184661865234, + "KL/rejected_KL_mean": -0.025447845458984375, + "KL/std": 0.22115886211395264, + "epoch": 0.023494860499265784, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.06042605638504028, + "fcm_dpo/q_t": 0.49249696731567383, + "grad_norm": 413.9620056152344, + "learning_rate": 1.0869565217391303e-07, + "logits/chosen": -0.5402108430862427, + "logits/rejected": -0.5008047223091125, + "logps/chosen": -61.70491027832031, + "logps/ref_chosen": -61.739891052246094, + "logps/ref_rejected": -84.36947631835938, + "logps/rejected": -84.3949203491211, + "loss": 1.3621, + "margin_dpo/margin_mean": 0.06042572855949402, + "margin_dpo/margin_std": 0.28903117775917053, + "step": 16 + }, + { + "KL/chosen_KL_mean": 0.032032012939453125, + "KL/mean": -0.021739423274993896, + "KL/rejected_KL_mean": -0.07551193237304688, + "KL/std": 0.28327155113220215, + "epoch": 0.024963289280469897, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.10754328966140747, + "fcm_dpo/q_t": 0.4866830110549927, + "grad_norm": 378.045166015625, + "learning_rate": 1.1594202898550725e-07, + "logits/chosen": -0.5082837343215942, + "logits/rejected": -0.4719049036502838, + "logps/chosen": -67.67829895019531, + "logps/ref_chosen": -67.71033477783203, + "logps/ref_rejected": -85.37865447998047, + "logps/rejected": -85.45416259765625, + "loss": 1.3408, + "margin_dpo/margin_mean": 0.10754308104515076, + "margin_dpo/margin_std": 0.33711522817611694, + "step": 17 + }, + { + "KL/chosen_KL_mean": -0.004711151123046875, + "KL/mean": -0.020610541105270386, + "KL/rejected_KL_mean": -0.0365142822265625, + "KL/std": 0.24805116653442383, + "epoch": 0.02643171806167401, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.03179961442947388, + "fcm_dpo/q_t": 0.49600082635879517, + "grad_norm": 400.0282897949219, + "learning_rate": 1.2318840579710146e-07, + "logits/chosen": -0.4709518253803253, + "logits/rejected": -0.41293156147003174, + "logps/chosen": -47.74420166015625, + "logps/ref_chosen": -47.7394905090332, + "logps/ref_rejected": -75.4722900390625, + "logps/rejected": -75.50880432128906, + "loss": 1.3788, + "margin_dpo/margin_mean": 0.0317995548248291, + "margin_dpo/margin_std": 0.3525484800338745, + "step": 18 + }, + { + "KL/chosen_KL_mean": 0.040874481201171875, + "KL/mean": -0.014696747064590454, + "KL/rejected_KL_mean": -0.07026290893554688, + "KL/std": 0.2666897773742676, + "epoch": 0.027900146842878122, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.11113607883453369, + "fcm_dpo/q_t": 0.48626208305358887, + "grad_norm": 357.18487548828125, + "learning_rate": 1.3043478260869563e-07, + "logits/chosen": -0.485554575920105, + "logits/rejected": -0.4352126121520996, + "logps/chosen": -70.16448974609375, + "logps/ref_chosen": -70.20536041259766, + "logps/ref_rejected": -89.7575912475586, + "logps/rejected": -89.82785034179688, + "loss": 1.3391, + "margin_dpo/margin_mean": 0.11113619804382324, + "margin_dpo/margin_std": 0.3337096571922302, + "step": 19 + }, + { + "KL/chosen_KL_mean": -0.010358810424804688, + "KL/mean": -0.048069894313812256, + "KL/rejected_KL_mean": -0.08577346801757812, + "KL/std": 0.24265292286872864, + "epoch": 0.02936857562408223, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.0754203200340271, + "fcm_dpo/q_t": 0.4906235337257385, + "grad_norm": 360.1549377441406, + "learning_rate": 1.3768115942028986e-07, + "logits/chosen": -0.5490742325782776, + "logits/rejected": -0.4924872815608978, + "logps/chosen": -50.8135986328125, + "logps/ref_chosen": -50.80324172973633, + "logps/ref_rejected": -78.82334899902344, + "logps/rejected": -78.90911865234375, + "loss": 1.3552, + "margin_dpo/margin_mean": 0.07542020082473755, + "margin_dpo/margin_std": 0.3174728751182556, + "step": 20 + }, + { + "KL/chosen_KL_mean": -0.0028514862060546875, + "KL/mean": -0.034926123917102814, + "KL/rejected_KL_mean": -0.0670013427734375, + "KL/std": 0.2600030303001404, + "epoch": 0.030837004405286344, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.06414835155010223, + "fcm_dpo/q_t": 0.4920843839645386, + "grad_norm": 378.02789306640625, + "learning_rate": 1.4492753623188405e-07, + "logits/chosen": -0.5145970582962036, + "logits/rejected": -0.4921773076057434, + "logps/chosen": -50.06586837768555, + "logps/ref_chosen": -50.063018798828125, + "logps/ref_rejected": -77.86878967285156, + "logps/rejected": -77.935791015625, + "loss": 1.3604, + "margin_dpo/margin_mean": 0.06414888799190521, + "margin_dpo/margin_std": 0.30178433656692505, + "step": 21 + }, + { + "KL/chosen_KL_mean": 0.03511619567871094, + "KL/mean": -0.06825144588947296, + "KL/rejected_KL_mean": -0.17161941528320312, + "KL/std": 0.30668485164642334, + "epoch": 0.032305433186490456, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.20674064755439758, + "fcm_dpo/q_t": 0.4745423197746277, + "grad_norm": 388.7802429199219, + "learning_rate": 1.5217391304347825e-07, + "logits/chosen": -0.4853633642196655, + "logits/rejected": -0.4417203366756439, + "logps/chosen": -59.02252197265625, + "logps/ref_chosen": -59.05763626098633, + "logps/ref_rejected": -97.50466918945312, + "logps/rejected": -97.6762924194336, + "loss": 1.2978, + "margin_dpo/margin_mean": 0.206741064786911, + "margin_dpo/margin_std": 0.43350815773010254, + "step": 22 + }, + { + "KL/chosen_KL_mean": 0.09302711486816406, + "KL/mean": -0.042252302169799805, + "KL/rejected_KL_mean": -0.17752456665039062, + "KL/std": 0.31162387132644653, + "epoch": 0.033773861967694566, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.27054840326309204, + "fcm_dpo/q_t": 0.4666573405265808, + "grad_norm": 364.82421875, + "learning_rate": 1.5942028985507245e-07, + "logits/chosen": -0.5055208206176758, + "logits/rejected": -0.4839291274547577, + "logps/chosen": -59.98466873168945, + "logps/ref_chosen": -60.07769775390625, + "logps/ref_rejected": -81.13955688476562, + "logps/rejected": -81.31707763671875, + "loss": 1.2662, + "margin_dpo/margin_mean": 0.2705477774143219, + "margin_dpo/margin_std": 0.40643441677093506, + "step": 23 + }, + { + "KL/chosen_KL_mean": 0.04521942138671875, + "KL/mean": -0.0763748288154602, + "KL/rejected_KL_mean": -0.19797515869140625, + "KL/std": 0.28322041034698486, + "epoch": 0.03524229074889868, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.24319219589233398, + "fcm_dpo/q_t": 0.4699401259422302, + "grad_norm": 391.4231872558594, + "learning_rate": 1.6666666666666665e-07, + "logits/chosen": -0.5012839436531067, + "logits/rejected": -0.4848848581314087, + "logps/chosen": -44.24581527709961, + "logps/ref_chosen": -44.29103469848633, + "logps/ref_rejected": -99.12521362304688, + "logps/rejected": -99.32318878173828, + "loss": 1.2773, + "margin_dpo/margin_mean": 0.24319320917129517, + "margin_dpo/margin_std": 0.3720870018005371, + "step": 24 + }, + { + "KL/chosen_KL_mean": 0.05419921875, + "KL/mean": -0.06559216976165771, + "KL/rejected_KL_mean": -0.18538284301757812, + "KL/std": 0.3871203064918518, + "epoch": 0.03671071953010279, + "fcm_dpo/beta": 0.5, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.2395843267440796, + "fcm_dpo/q_t": 0.4704943895339966, + "grad_norm": 349.39923095703125, + "learning_rate": 1.7391304347826085e-07, + "logits/chosen": -0.5076802968978882, + "logits/rejected": -0.47814005613327026, + "logps/chosen": -52.482852935791016, + "logps/ref_chosen": -52.537052154541016, + "logps/ref_rejected": -89.34219360351562, + "logps/rejected": -89.52757263183594, + "loss": 1.2852, + "margin_dpo/margin_mean": 0.2395840287208557, + "margin_dpo/margin_std": 0.49307340383529663, + "step": 25 + }, + { + "KL/chosen_KL_mean": 0.06473541259765625, + "KL/mean": -0.1246185302734375, + "KL/rejected_KL_mean": -0.31397247314453125, + "KL/std": 0.4182741940021515, + "epoch": 0.0381791483113069, + "fcm_dpo/beta": 0.5046226978302002, + "fcm_dpo/delta": 0.09160952270030975, + "fcm_dpo/margin": 0.37870925664901733, + "fcm_dpo/q_t": 0.4540257155895233, + "grad_norm": 383.2384948730469, + "learning_rate": 1.8115942028985507e-07, + "logits/chosen": -0.5243556499481201, + "logits/rejected": -0.4925326108932495, + "logps/chosen": -53.858070373535156, + "logps/ref_chosen": -53.92280578613281, + "logps/ref_rejected": -103.35971069335938, + "logps/rejected": -103.67369079589844, + "loss": 1.2223, + "margin_dpo/margin_mean": 0.37870919704437256, + "margin_dpo/margin_std": 0.5341597199440002, + "step": 26 + }, + { + "KL/chosen_KL_mean": 0.1428699493408203, + "KL/mean": -0.11810372769832611, + "KL/rejected_KL_mean": -0.3790779113769531, + "KL/std": 0.44679516553878784, + "epoch": 0.039647577092511016, + "fcm_dpo/beta": 0.5186325311660767, + "fcm_dpo/delta": 0.13306188583374023, + "fcm_dpo/margin": 0.5219477415084839, + "fcm_dpo/q_t": 0.435089111328125, + "grad_norm": 404.0160217285156, + "learning_rate": 1.8840579710144927e-07, + "logits/chosen": -0.5184708833694458, + "logits/rejected": -0.4820369780063629, + "logps/chosen": -42.75566101074219, + "logps/ref_chosen": -42.898529052734375, + "logps/ref_rejected": -98.72419738769531, + "logps/rejected": -99.10327911376953, + "loss": 1.1512, + "margin_dpo/margin_mean": 0.5219472646713257, + "margin_dpo/margin_std": 0.5090110301971436, + "step": 27 + }, + { + "KL/chosen_KL_mean": 0.03517341613769531, + "KL/mean": -0.1448913812637329, + "KL/rejected_KL_mean": -0.32495880126953125, + "KL/std": 0.41489964723587036, + "epoch": 0.041116005873715125, + "fcm_dpo/beta": 0.5273520350456238, + "fcm_dpo/delta": 0.08291557431221008, + "fcm_dpo/margin": 0.36012983322143555, + "fcm_dpo/q_t": 0.45420730113983154, + "grad_norm": 340.0865173339844, + "learning_rate": 1.9565217391304347e-07, + "logits/chosen": -0.5174187421798706, + "logits/rejected": -0.4631088972091675, + "logps/chosen": -60.52132797241211, + "logps/ref_chosen": -60.55650329589844, + "logps/ref_rejected": -91.40111541748047, + "logps/rejected": -91.72607421875, + "loss": 1.2296, + "margin_dpo/margin_mean": 0.36013028025627136, + "margin_dpo/margin_std": 0.5909340381622314, + "step": 28 + }, + { + "KL/chosen_KL_mean": 0.09375, + "KL/mean": -0.18463768064975739, + "KL/rejected_KL_mean": -0.4630241394042969, + "KL/std": 0.4841146767139435, + "epoch": 0.042584434654919234, + "fcm_dpo/beta": 0.5401861667633057, + "fcm_dpo/delta": 0.10225643217563629, + "fcm_dpo/margin": 0.5567755699157715, + "fcm_dpo/q_t": 0.42827117443084717, + "grad_norm": 398.7171936035156, + "learning_rate": 2.028985507246377e-07, + "logits/chosen": -0.5421187877655029, + "logits/rejected": -0.49508053064346313, + "logps/chosen": -57.71403503417969, + "logps/ref_chosen": -57.80778503417969, + "logps/ref_rejected": -97.39434814453125, + "logps/rejected": -97.85737609863281, + "loss": 1.1301, + "margin_dpo/margin_mean": 0.5567758679389954, + "margin_dpo/margin_std": 0.5642556548118591, + "step": 29 + }, + { + "KL/chosen_KL_mean": 0.15998458862304688, + "KL/mean": -0.19396524131298065, + "KL/rejected_KL_mean": -0.5479164123535156, + "KL/std": 0.6062048673629761, + "epoch": 0.04405286343612335, + "fcm_dpo/beta": 0.542646050453186, + "fcm_dpo/delta": 0.01639886572957039, + "fcm_dpo/margin": 0.7079055309295654, + "fcm_dpo/q_t": 0.40854281187057495, + "grad_norm": 362.9571533203125, + "learning_rate": 2.1014492753623187e-07, + "logits/chosen": -0.482383131980896, + "logits/rejected": -0.451177716255188, + "logps/chosen": -52.417388916015625, + "logps/ref_chosen": -52.577369689941406, + "logps/ref_rejected": -98.48920440673828, + "logps/rejected": -99.03712463378906, + "loss": 1.0681, + "margin_dpo/margin_mean": 0.7079058289527893, + "margin_dpo/margin_std": 0.6428213119506836, + "step": 30 + }, + { + "KL/chosen_KL_mean": 0.10284805297851562, + "KL/mean": -0.1433902531862259, + "KL/rejected_KL_mean": -0.3896293640136719, + "KL/std": 0.5450563430786133, + "epoch": 0.04552129221732746, + "fcm_dpo/beta": 0.5562627911567688, + "fcm_dpo/delta": 0.12924844026565552, + "fcm_dpo/margin": 0.4924760162830353, + "fcm_dpo/q_t": 0.43538039922714233, + "grad_norm": 309.9827575683594, + "learning_rate": 2.1739130434782607e-07, + "logits/chosen": -0.48639383912086487, + "logits/rejected": -0.4393838047981262, + "logps/chosen": -63.704071044921875, + "logps/ref_chosen": -63.806922912597656, + "logps/ref_rejected": -72.89400482177734, + "logps/rejected": -73.28363037109375, + "loss": 1.1624, + "margin_dpo/margin_mean": 0.492476224899292, + "margin_dpo/margin_std": 0.6477472186088562, + "step": 31 + }, + { + "KL/chosen_KL_mean": 0.1891956329345703, + "KL/mean": -0.19269661605358124, + "KL/rejected_KL_mean": -0.5745887756347656, + "KL/std": 0.7516759634017944, + "epoch": 0.04698972099853157, + "fcm_dpo/beta": 0.5558298230171204, + "fcm_dpo/delta": -0.025696825236082077, + "fcm_dpo/margin": 0.7637799978256226, + "fcm_dpo/q_t": 0.4026581645011902, + "grad_norm": 343.0960998535156, + "learning_rate": 2.2463768115942027e-07, + "logits/chosen": -0.5173541307449341, + "logits/rejected": -0.4762566089630127, + "logps/chosen": -62.550331115722656, + "logps/ref_chosen": -62.739524841308594, + "logps/ref_rejected": -89.3175048828125, + "logps/rejected": -89.89208984375, + "loss": 1.0627, + "margin_dpo/margin_mean": 0.7637800574302673, + "margin_dpo/margin_std": 0.9043101668357849, + "step": 32 + }, + { + "KL/chosen_KL_mean": 0.09556961059570312, + "KL/mean": -0.20322665572166443, + "KL/rejected_KL_mean": -0.5020217895507812, + "KL/std": 0.5590921640396118, + "epoch": 0.048458149779735685, + "fcm_dpo/beta": 0.562440037727356, + "fcm_dpo/delta": 0.0659160241484642, + "fcm_dpo/margin": 0.5975915789604187, + "fcm_dpo/q_t": 0.4198671281337738, + "grad_norm": 328.4997253417969, + "learning_rate": 2.318840579710145e-07, + "logits/chosen": -0.5058610439300537, + "logits/rejected": -0.48015594482421875, + "logps/chosen": -53.1654052734375, + "logps/ref_chosen": -53.26097106933594, + "logps/ref_rejected": -87.8851318359375, + "logps/rejected": -88.38716125488281, + "loss": 1.1085, + "margin_dpo/margin_mean": 0.5975916385650635, + "margin_dpo/margin_std": 0.6252603530883789, + "step": 33 + }, + { + "KL/chosen_KL_mean": 0.09558486938476562, + "KL/mean": -0.3123227655887604, + "KL/rejected_KL_mean": -0.7202377319335938, + "KL/std": 0.7326186895370483, + "epoch": 0.049926578560939794, + "fcm_dpo/beta": 0.5575153827667236, + "fcm_dpo/delta": -0.057598959654569626, + "fcm_dpo/margin": 0.8158173561096191, + "fcm_dpo/q_t": 0.39417457580566406, + "grad_norm": 312.8501892089844, + "learning_rate": 2.391304347826087e-07, + "logits/chosen": -0.4918326139450073, + "logits/rejected": -0.47446727752685547, + "logps/chosen": -50.72174072265625, + "logps/ref_chosen": -50.81732940673828, + "logps/ref_rejected": -101.92184448242188, + "logps/rejected": -102.64208221435547, + "loss": 1.0336, + "margin_dpo/margin_mean": 0.8158169984817505, + "margin_dpo/margin_std": 0.8447773456573486, + "step": 34 + }, + { + "KL/chosen_KL_mean": 0.12476348876953125, + "KL/mean": -0.46371960639953613, + "KL/rejected_KL_mean": -1.0522003173828125, + "KL/std": 0.9688708782196045, + "epoch": 0.0513950073421439, + "fcm_dpo/beta": 0.5353924036026001, + "fcm_dpo/delta": -0.24643680453300476, + "fcm_dpo/margin": 1.1769663095474243, + "fcm_dpo/q_t": 0.35741329193115234, + "grad_norm": 279.2007751464844, + "learning_rate": 2.463768115942029e-07, + "logits/chosen": -0.5165481567382812, + "logits/rejected": -0.47960567474365234, + "logps/chosen": -50.89972686767578, + "logps/ref_chosen": -51.02449035644531, + "logps/ref_rejected": -106.82443237304688, + "logps/rejected": -107.87663269042969, + "loss": 0.9218, + "margin_dpo/margin_mean": 1.1769659519195557, + "margin_dpo/margin_std": 1.0974863767623901, + "step": 35 + }, + { + "KL/chosen_KL_mean": 0.055484771728515625, + "KL/mean": -0.4918856918811798, + "KL/rejected_KL_mean": -1.0392494201660156, + "KL/std": 1.049076795578003, + "epoch": 0.05286343612334802, + "fcm_dpo/beta": 0.5205714702606201, + "fcm_dpo/delta": -0.1801259070634842, + "fcm_dpo/margin": 1.09473717212677, + "fcm_dpo/q_t": 0.37265270948410034, + "grad_norm": 232.2285919189453, + "learning_rate": 2.536231884057971e-07, + "logits/chosen": -0.5617629885673523, + "logits/rejected": -0.5254461765289307, + "logps/chosen": -51.936004638671875, + "logps/ref_chosen": -51.991493225097656, + "logps/ref_rejected": -86.0406265258789, + "logps/rejected": -87.07987976074219, + "loss": 0.9947, + "margin_dpo/margin_mean": 1.094736099243164, + "margin_dpo/margin_std": 1.23842453956604, + "step": 36 + }, + { + "KL/chosen_KL_mean": 0.0018596649169921875, + "KL/mean": -0.5054476261138916, + "KL/rejected_KL_mean": -1.01275634765625, + "KL/std": 1.0474822521209717, + "epoch": 0.05433186490455213, + "fcm_dpo/beta": 0.49882519245147705, + "fcm_dpo/delta": -0.11387760937213898, + "fcm_dpo/margin": 1.0146119594573975, + "fcm_dpo/q_t": 0.3891563415527344, + "grad_norm": 219.2965850830078, + "learning_rate": 2.6086956521739126e-07, + "logits/chosen": -0.4967535734176636, + "logits/rejected": -0.4519627094268799, + "logps/chosen": -62.80524826049805, + "logps/ref_chosen": -62.807106018066406, + "logps/ref_rejected": -77.89507293701172, + "logps/rejected": -78.90782928466797, + "loss": 1.0432, + "margin_dpo/margin_mean": 1.0146114826202393, + "margin_dpo/margin_std": 1.3467109203338623, + "step": 37 + }, + { + "KL/chosen_KL_mean": 0.1434650421142578, + "KL/mean": -0.5286348462104797, + "KL/rejected_KL_mean": -1.2007369995117188, + "KL/std": 1.3252570629119873, + "epoch": 0.055800293685756244, + "fcm_dpo/beta": 0.4817589521408081, + "fcm_dpo/delta": -0.2642138600349426, + "fcm_dpo/margin": 1.3442010879516602, + "fcm_dpo/q_t": 0.3633432388305664, + "grad_norm": 220.9096221923828, + "learning_rate": 2.681159420289855e-07, + "logits/chosen": -0.5368313789367676, + "logits/rejected": -0.5042594075202942, + "logps/chosen": -48.24705505371094, + "logps/ref_chosen": -48.39051818847656, + "logps/ref_rejected": -97.91244506835938, + "logps/rejected": -99.11317443847656, + "loss": 0.9599, + "margin_dpo/margin_mean": 1.344200849533081, + "margin_dpo/margin_std": 1.6261742115020752, + "step": 38 + }, + { + "KL/chosen_KL_mean": 0.07047653198242188, + "KL/mean": -0.7256151437759399, + "KL/rejected_KL_mean": -1.5217018127441406, + "KL/std": 1.2704432010650635, + "epoch": 0.05726872246696035, + "fcm_dpo/beta": 0.44901108741760254, + "fcm_dpo/delta": -0.3407745361328125, + "fcm_dpo/margin": 1.592177152633667, + "fcm_dpo/q_t": 0.3372858464717865, + "grad_norm": 206.50027465820312, + "learning_rate": 2.753623188405797e-07, + "logits/chosen": -0.5508787631988525, + "logits/rejected": -0.5106680989265442, + "logps/chosen": -50.679996490478516, + "logps/ref_chosen": -50.75047302246094, + "logps/ref_rejected": -78.56951141357422, + "logps/rejected": -80.09121704101562, + "loss": 0.8796, + "margin_dpo/margin_mean": 1.5921775102615356, + "margin_dpo/margin_std": 1.383728265762329, + "step": 39 + }, + { + "KL/chosen_KL_mean": 0.18862533569335938, + "KL/mean": -0.5789550542831421, + "KL/rejected_KL_mean": -1.3465385437011719, + "KL/std": 1.350581407546997, + "epoch": 0.05873715124816446, + "fcm_dpo/beta": 0.4270463287830353, + "fcm_dpo/delta": -0.2727539539337158, + "fcm_dpo/margin": 1.5351669788360596, + "fcm_dpo/q_t": 0.3570956885814667, + "grad_norm": 164.9803009033203, + "learning_rate": 2.8260869565217386e-07, + "logits/chosen": -0.4705943763256073, + "logits/rejected": -0.4372428059577942, + "logps/chosen": -57.79644012451172, + "logps/ref_chosen": -57.985069274902344, + "logps/ref_rejected": -74.3000717163086, + "logps/rejected": -75.6466064453125, + "loss": 0.9362, + "margin_dpo/margin_mean": 1.5351676940917969, + "margin_dpo/margin_std": 1.6188992261886597, + "step": 40 + }, + { + "KL/chosen_KL_mean": 0.003345489501953125, + "KL/mean": -0.9243937730789185, + "KL/rejected_KL_mean": -1.8521308898925781, + "KL/std": 1.8027801513671875, + "epoch": 0.06020558002936858, + "fcm_dpo/beta": 0.3977815508842468, + "fcm_dpo/delta": -0.3646352291107178, + "fcm_dpo/margin": 1.8554785251617432, + "fcm_dpo/q_t": 0.3413015604019165, + "grad_norm": 177.38894653320312, + "learning_rate": 2.898550724637681e-07, + "logits/chosen": -0.5352627038955688, + "logits/rejected": -0.4983564019203186, + "logps/chosen": -62.69247055053711, + "logps/ref_chosen": -62.69581604003906, + "logps/ref_rejected": -97.02352905273438, + "logps/rejected": -98.87565612792969, + "loss": 0.9018, + "margin_dpo/margin_mean": 1.8554792404174805, + "margin_dpo/margin_std": 1.9582023620605469, + "step": 41 + }, + { + "KL/chosen_KL_mean": 0.2212810516357422, + "KL/mean": -1.0126826763153076, + "KL/rejected_KL_mean": -2.24664306640625, + "KL/std": 2.0452983379364014, + "epoch": 0.06167400881057269, + "fcm_dpo/beta": 0.3601230978965759, + "fcm_dpo/delta": -0.5366164445877075, + "fcm_dpo/margin": 2.467926502227783, + "fcm_dpo/q_t": 0.3121350407600403, + "grad_norm": 159.704833984375, + "learning_rate": 2.971014492753623e-07, + "logits/chosen": -0.5344812870025635, + "logits/rejected": -0.4876905083656311, + "logps/chosen": -58.745147705078125, + "logps/ref_chosen": -58.966426849365234, + "logps/ref_rejected": -109.90837097167969, + "logps/rejected": -112.15501403808594, + "loss": 0.8128, + "margin_dpo/margin_mean": 2.467926502227783, + "margin_dpo/margin_std": 2.2979540824890137, + "step": 42 + }, + { + "KL/chosen_KL_mean": 0.4799919128417969, + "KL/mean": -0.6912780404090881, + "KL/rejected_KL_mean": -1.862548828125, + "KL/std": 1.731245756149292, + "epoch": 0.0631424375917768, + "fcm_dpo/beta": 0.32807010412216187, + "fcm_dpo/delta": -0.4021656811237335, + "fcm_dpo/margin": 2.342538356781006, + "fcm_dpo/q_t": 0.325826495885849, + "grad_norm": 152.13230895996094, + "learning_rate": 3.043478260869565e-07, + "logits/chosen": -0.5518888235092163, + "logits/rejected": -0.5275447368621826, + "logps/chosen": -53.676002502441406, + "logps/ref_chosen": -54.15599822998047, + "logps/ref_rejected": -96.48019409179688, + "logps/rejected": -98.34274291992188, + "loss": 0.8349, + "margin_dpo/margin_mean": 2.342538356781006, + "margin_dpo/margin_std": 1.8504887819290161, + "step": 43 + }, + { + "KL/chosen_KL_mean": 0.25227928161621094, + "KL/mean": -1.1076438426971436, + "KL/rejected_KL_mean": -2.4675674438476562, + "KL/std": 2.1824231147766113, + "epoch": 0.06461086637298091, + "fcm_dpo/beta": 0.3006964921951294, + "fcm_dpo/delta": -0.4574472904205322, + "fcm_dpo/margin": 2.719846248626709, + "fcm_dpo/q_t": 0.31683629751205444, + "grad_norm": 148.4615020751953, + "learning_rate": 3.115942028985507e-07, + "logits/chosen": -0.49776938557624817, + "logits/rejected": -0.4771164655685425, + "logps/chosen": -49.82621765136719, + "logps/ref_chosen": -50.07849884033203, + "logps/ref_rejected": -108.78376007080078, + "logps/rejected": -111.25132751464844, + "loss": 0.8114, + "margin_dpo/margin_mean": 2.719846248626709, + "margin_dpo/margin_std": 2.149664878845215, + "step": 44 + }, + { + "KL/chosen_KL_mean": 0.12762832641601562, + "KL/mean": -0.9766333103179932, + "KL/rejected_KL_mean": -2.0808982849121094, + "KL/std": 1.9980565309524536, + "epoch": 0.06607929515418502, + "fcm_dpo/beta": 0.2850699722766876, + "fcm_dpo/delta": -0.2445305585861206, + "fcm_dpo/margin": 2.2085297107696533, + "fcm_dpo/q_t": 0.3639563322067261, + "grad_norm": 119.1253890991211, + "learning_rate": 3.188405797101449e-07, + "logits/chosen": -0.47408968210220337, + "logits/rejected": -0.46131014823913574, + "logps/chosen": -48.28730010986328, + "logps/ref_chosen": -48.4149284362793, + "logps/ref_rejected": -77.93643188476562, + "logps/rejected": -80.01732635498047, + "loss": 0.9552, + "margin_dpo/margin_mean": 2.208528518676758, + "margin_dpo/margin_std": 2.4762864112854004, + "step": 45 + }, + { + "KL/chosen_KL_mean": 0.2157917022705078, + "KL/mean": -1.1610474586486816, + "KL/rejected_KL_mean": -2.5378875732421875, + "KL/std": 2.4480698108673096, + "epoch": 0.06754772393538913, + "fcm_dpo/beta": 0.26706546545028687, + "fcm_dpo/delta": -0.361088365316391, + "fcm_dpo/margin": 2.753678798675537, + "fcm_dpo/q_t": 0.34314534068107605, + "grad_norm": 128.59988403320312, + "learning_rate": 3.260869565217391e-07, + "logits/chosen": -0.5371255278587341, + "logits/rejected": -0.486606240272522, + "logps/chosen": -55.783634185791016, + "logps/ref_chosen": -55.999427795410156, + "logps/ref_rejected": -95.652587890625, + "logps/rejected": -98.19047546386719, + "loss": 0.9049, + "margin_dpo/margin_mean": 2.753678798675537, + "margin_dpo/margin_std": 2.9481449127197266, + "step": 46 + }, + { + "KL/chosen_KL_mean": 0.42566680908203125, + "KL/mean": -0.9785783290863037, + "KL/rejected_KL_mean": -2.3828277587890625, + "KL/std": 2.440776824951172, + "epoch": 0.06901615271659324, + "fcm_dpo/beta": 0.2504443824291229, + "fcm_dpo/delta": -0.3251284062862396, + "fcm_dpo/margin": 2.808493137359619, + "fcm_dpo/q_t": 0.34311166405677795, + "grad_norm": 118.37008666992188, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -0.5844066143035889, + "logits/rejected": -0.5324996709823608, + "logps/chosen": -57.50041198730469, + "logps/ref_chosen": -57.92607879638672, + "logps/ref_rejected": -94.67920684814453, + "logps/rejected": -97.0620346069336, + "loss": 0.8917, + "margin_dpo/margin_mean": 2.8084943294525146, + "margin_dpo/margin_std": 2.5350003242492676, + "step": 47 + }, + { + "KL/chosen_KL_mean": 0.11053085327148438, + "KL/mean": -1.249524712562561, + "KL/rejected_KL_mean": -2.609577178955078, + "KL/std": 2.3069586753845215, + "epoch": 0.07048458149779736, + "fcm_dpo/beta": 0.23410022258758545, + "fcm_dpo/delta": -0.2554876506328583, + "fcm_dpo/margin": 2.720108985900879, + "fcm_dpo/q_t": 0.35484230518341064, + "grad_norm": 125.24164581298828, + "learning_rate": 3.4057971014492755e-07, + "logits/chosen": -0.5820130109786987, + "logits/rejected": -0.5238351225852966, + "logps/chosen": -57.077545166015625, + "logps/ref_chosen": -57.188072204589844, + "logps/ref_rejected": -88.0166015625, + "logps/rejected": -90.62617492675781, + "loss": 0.9335, + "margin_dpo/margin_mean": 2.720109224319458, + "margin_dpo/margin_std": 2.4631295204162598, + "step": 48 + }, + { + "KL/chosen_KL_mean": 0.39922142028808594, + "KL/mean": -1.2344255447387695, + "KL/rejected_KL_mean": -2.8680648803710938, + "KL/std": 2.883481025695801, + "epoch": 0.07195301027900147, + "fcm_dpo/beta": 0.2220032513141632, + "fcm_dpo/delta": -0.34955620765686035, + "fcm_dpo/margin": 3.2672815322875977, + "fcm_dpo/q_t": 0.3432408273220062, + "grad_norm": 352.78228759765625, + "learning_rate": 3.478260869565217e-07, + "logits/chosen": -0.5358976125717163, + "logits/rejected": -0.47647011280059814, + "logps/chosen": -61.286048889160156, + "logps/ref_chosen": -61.685272216796875, + "logps/ref_rejected": -83.76747131347656, + "logps/rejected": -86.63554382324219, + "loss": 0.8999, + "margin_dpo/margin_mean": 3.2672815322875977, + "margin_dpo/margin_std": 3.3359837532043457, + "step": 49 + }, + { + "KL/chosen_KL_mean": -0.03353118896484375, + "KL/mean": -1.8093570470809937, + "KL/rejected_KL_mean": -3.585174560546875, + "KL/std": 2.991940975189209, + "epoch": 0.07342143906020558, + "fcm_dpo/beta": 0.20502600073814392, + "fcm_dpo/delta": -0.3554917573928833, + "fcm_dpo/margin": 3.5516459941864014, + "fcm_dpo/q_t": 0.33918917179107666, + "grad_norm": 98.57312774658203, + "learning_rate": 3.5507246376811595e-07, + "logits/chosen": -0.5251749157905579, + "logits/rejected": -0.4888863265514374, + "logps/chosen": -58.757667541503906, + "logps/ref_chosen": -58.72413635253906, + "logps/ref_rejected": -96.35814666748047, + "logps/rejected": -99.94332122802734, + "loss": 0.8875, + "margin_dpo/margin_mean": 3.5516457557678223, + "margin_dpo/margin_std": 3.4122915267944336, + "step": 50 + }, + { + "KL/chosen_KL_mean": -0.10815811157226562, + "KL/mean": -1.8943548202514648, + "KL/rejected_KL_mean": -3.6805572509765625, + "KL/std": 3.606013774871826, + "epoch": 0.07488986784140969, + "fcm_dpo/beta": 0.19137313961982727, + "fcm_dpo/delta": -0.30843037366867065, + "fcm_dpo/margin": 3.572404384613037, + "fcm_dpo/q_t": 0.35805124044418335, + "grad_norm": 78.19511413574219, + "learning_rate": 3.6231884057971015e-07, + "logits/chosen": -0.5243328809738159, + "logits/rejected": -0.491935670375824, + "logps/chosen": -61.48182678222656, + "logps/ref_chosen": -61.3736686706543, + "logps/ref_rejected": -76.00199890136719, + "logps/rejected": -79.68255615234375, + "loss": 0.9556, + "margin_dpo/margin_mean": 3.5724036693573, + "margin_dpo/margin_std": 4.397710800170898, + "step": 51 + }, + { + "KL/chosen_KL_mean": 0.5040225982666016, + "KL/mean": -2.2015185356140137, + "KL/rejected_KL_mean": -4.9070587158203125, + "KL/std": 4.136686325073242, + "epoch": 0.0763582966226138, + "fcm_dpo/beta": 0.17365267872810364, + "fcm_dpo/delta": -0.5967501401901245, + "fcm_dpo/margin": 5.411087989807129, + "fcm_dpo/q_t": 0.29616397619247437, + "grad_norm": 78.32229614257812, + "learning_rate": 3.695652173913043e-07, + "logits/chosen": -0.5358279943466187, + "logits/rejected": -0.4792342185974121, + "logps/chosen": -51.833335876464844, + "logps/ref_chosen": -52.33735656738281, + "logps/ref_rejected": -79.97391510009766, + "logps/rejected": -84.88097381591797, + "loss": 0.7675, + "margin_dpo/margin_mean": 5.411087989807129, + "margin_dpo/margin_std": 4.405357360839844, + "step": 52 + }, + { + "KL/chosen_KL_mean": -0.03709983825683594, + "KL/mean": -2.7792229652404785, + "KL/rejected_KL_mean": -5.521343231201172, + "KL/std": 4.680900573730469, + "epoch": 0.07782672540381791, + "fcm_dpo/beta": 0.1579442024230957, + "fcm_dpo/delta": -0.5074787139892578, + "fcm_dpo/margin": 5.484250068664551, + "fcm_dpo/q_t": 0.32274216413497925, + "grad_norm": 76.56503295898438, + "learning_rate": 3.7681159420289855e-07, + "logits/chosen": -0.620309591293335, + "logits/rejected": -0.5990212559700012, + "logps/chosen": -53.35175323486328, + "logps/ref_chosen": -53.31465148925781, + "logps/ref_rejected": -91.78359985351562, + "logps/rejected": -97.30493927001953, + "loss": 0.8413, + "margin_dpo/margin_mean": 5.484249114990234, + "margin_dpo/margin_std": 5.2846550941467285, + "step": 53 + }, + { + "KL/chosen_KL_mean": -0.15765953063964844, + "KL/mean": -2.5494110584259033, + "KL/rejected_KL_mean": -4.941162109375, + "KL/std": 4.4056077003479, + "epoch": 0.07929515418502203, + "fcm_dpo/beta": 0.14552612602710724, + "fcm_dpo/delta": -0.3178091049194336, + "fcm_dpo/margin": 4.783502578735352, + "fcm_dpo/q_t": 0.34551307559013367, + "grad_norm": 68.20849609375, + "learning_rate": 3.8405797101449274e-07, + "logits/chosen": -0.5885263085365295, + "logits/rejected": -0.5346698760986328, + "logps/chosen": -50.84632110595703, + "logps/ref_chosen": -50.68865966796875, + "logps/ref_rejected": -91.71539306640625, + "logps/rejected": -96.65655517578125, + "loss": 0.8929, + "margin_dpo/margin_mean": 4.783502578735352, + "margin_dpo/margin_std": 4.461567401885986, + "step": 54 + }, + { + "KL/chosen_KL_mean": -0.5955238342285156, + "KL/mean": -3.465237617492676, + "KL/rejected_KL_mean": -6.334949493408203, + "KL/std": 5.3761420249938965, + "epoch": 0.08076358296622614, + "fcm_dpo/beta": 0.13501086831092834, + "fcm_dpo/delta": -0.40534526109695435, + "fcm_dpo/margin": 5.739419460296631, + "fcm_dpo/q_t": 0.3375312089920044, + "grad_norm": 66.71011352539062, + "learning_rate": 3.9130434782608694e-07, + "logits/chosen": -0.653782844543457, + "logits/rejected": -0.5924779176712036, + "logps/chosen": -63.210758209228516, + "logps/ref_chosen": -62.615234375, + "logps/ref_rejected": -88.99349975585938, + "logps/rejected": -95.32845306396484, + "loss": 0.9046, + "margin_dpo/margin_mean": 5.739418983459473, + "margin_dpo/margin_std": 6.410279273986816, + "step": 55 + }, + { + "KL/chosen_KL_mean": -0.39885711669921875, + "KL/mean": -3.1298060417175293, + "KL/rejected_KL_mean": -5.860759735107422, + "KL/std": 5.217003345489502, + "epoch": 0.08223201174743025, + "fcm_dpo/beta": 0.12596547603607178, + "fcm_dpo/delta": -0.30908891558647156, + "fcm_dpo/margin": 5.461906433105469, + "fcm_dpo/q_t": 0.3533214330673218, + "grad_norm": 55.40151596069336, + "learning_rate": 3.9855072463768114e-07, + "logits/chosen": -0.5743478536605835, + "logits/rejected": -0.5299459099769592, + "logps/chosen": -58.33158493041992, + "logps/ref_chosen": -57.9327278137207, + "logps/ref_rejected": -94.1744384765625, + "logps/rejected": -100.03520202636719, + "loss": 0.9462, + "margin_dpo/margin_mean": 5.461906433105469, + "margin_dpo/margin_std": 6.3516526222229, + "step": 56 + }, + { + "KL/chosen_KL_mean": -0.4383068084716797, + "KL/mean": -3.5001718997955322, + "KL/rejected_KL_mean": -6.562034606933594, + "KL/std": 5.148193359375, + "epoch": 0.08370044052863436, + "fcm_dpo/beta": 0.11817534267902374, + "fcm_dpo/delta": -0.3478173613548279, + "fcm_dpo/margin": 6.123730659484863, + "fcm_dpo/q_t": 0.3378201723098755, + "grad_norm": 61.338130950927734, + "learning_rate": 4.057971014492754e-07, + "logits/chosen": -0.5510739088058472, + "logits/rejected": -0.5219501256942749, + "logps/chosen": -70.93359375, + "logps/ref_chosen": -70.49528503417969, + "logps/ref_rejected": -95.56546020507812, + "logps/rejected": -102.12748718261719, + "loss": 0.8847, + "margin_dpo/margin_mean": 6.123730182647705, + "margin_dpo/margin_std": 5.595479488372803, + "step": 57 + }, + { + "KL/chosen_KL_mean": -0.48359107971191406, + "KL/mean": -3.9639272689819336, + "KL/rejected_KL_mean": -7.444267272949219, + "KL/std": 5.993289470672607, + "epoch": 0.08516886930983847, + "fcm_dpo/beta": 0.10953576862812042, + "fcm_dpo/delta": -0.3910744786262512, + "fcm_dpo/margin": 6.96067476272583, + "fcm_dpo/q_t": 0.3375468850135803, + "grad_norm": 61.08738708496094, + "learning_rate": 4.1304347826086954e-07, + "logits/chosen": -0.5824143886566162, + "logits/rejected": -0.5054690837860107, + "logps/chosen": -62.61653137207031, + "logps/ref_chosen": -62.13294219970703, + "logps/ref_rejected": -84.61729431152344, + "logps/rejected": -92.06156158447266, + "loss": 0.8919, + "margin_dpo/margin_mean": 6.960675239562988, + "margin_dpo/margin_std": 7.216721534729004, + "step": 58 + }, + { + "KL/chosen_KL_mean": -1.005767822265625, + "KL/mean": -4.686802864074707, + "KL/rejected_KL_mean": -8.367839813232422, + "KL/std": 6.362232208251953, + "epoch": 0.08663729809104258, + "fcm_dpo/beta": 0.1001485139131546, + "fcm_dpo/delta": -0.369930237531662, + "fcm_dpo/margin": 7.362071514129639, + "fcm_dpo/q_t": 0.339927077293396, + "grad_norm": 56.65191650390625, + "learning_rate": 4.2028985507246374e-07, + "logits/chosen": -0.6022584438323975, + "logits/rejected": -0.5596363544464111, + "logps/chosen": -52.93829345703125, + "logps/ref_chosen": -51.932525634765625, + "logps/ref_rejected": -88.88520050048828, + "logps/rejected": -97.25303649902344, + "loss": 0.8958, + "margin_dpo/margin_mean": 7.362071514129639, + "margin_dpo/margin_std": 7.449038505554199, + "step": 59 + }, + { + "KL/chosen_KL_mean": -1.9381217956542969, + "KL/mean": -4.90670919418335, + "KL/rejected_KL_mean": -7.875293731689453, + "KL/std": 5.7876200675964355, + "epoch": 0.0881057268722467, + "fcm_dpo/beta": 0.09618770331144333, + "fcm_dpo/delta": -0.18251214921474457, + "fcm_dpo/margin": 5.937169075012207, + "fcm_dpo/q_t": 0.36946025490760803, + "grad_norm": 63.26620864868164, + "learning_rate": 4.2753623188405794e-07, + "logits/chosen": -0.622028112411499, + "logits/rejected": -0.5631489753723145, + "logps/chosen": -62.88031005859375, + "logps/ref_chosen": -60.94218826293945, + "logps/ref_rejected": -85.39340209960938, + "logps/rejected": -93.26869201660156, + "loss": 0.9865, + "margin_dpo/margin_mean": 5.937168598175049, + "margin_dpo/margin_std": 6.618038177490234, + "step": 60 + }, + { + "KL/chosen_KL_mean": -0.9614639282226562, + "KL/mean": -4.590740203857422, + "KL/rejected_KL_mean": -8.220016479492188, + "KL/std": 7.815638542175293, + "epoch": 0.08957415565345081, + "fcm_dpo/beta": 0.09131693840026855, + "fcm_dpo/delta": -0.28324007987976074, + "fcm_dpo/margin": 7.258551597595215, + "fcm_dpo/q_t": 0.3635759949684143, + "grad_norm": 50.66496658325195, + "learning_rate": 4.3478260869565214e-07, + "logits/chosen": -0.615394115447998, + "logits/rejected": -0.5809042453765869, + "logps/chosen": -61.59498596191406, + "logps/ref_chosen": -60.633522033691406, + "logps/ref_rejected": -89.85249328613281, + "logps/rejected": -98.072509765625, + "loss": 0.9727, + "margin_dpo/margin_mean": 7.258552074432373, + "margin_dpo/margin_std": 9.66031265258789, + "step": 61 + }, + { + "KL/chosen_KL_mean": -1.1735248565673828, + "KL/mean": -3.935976266860962, + "KL/rejected_KL_mean": -6.69842529296875, + "KL/std": 5.688698768615723, + "epoch": 0.09104258443465492, + "fcm_dpo/beta": 0.08945208787918091, + "fcm_dpo/delta": -0.09905220568180084, + "fcm_dpo/margin": 5.52489709854126, + "fcm_dpo/q_t": 0.389636754989624, + "grad_norm": 49.19923782348633, + "learning_rate": 4.420289855072464e-07, + "logits/chosen": -0.5917966365814209, + "logits/rejected": -0.5570877194404602, + "logps/chosen": -57.32429504394531, + "logps/ref_chosen": -56.15077209472656, + "logps/ref_rejected": -75.56619262695312, + "logps/rejected": -82.26461791992188, + "loss": 1.0378, + "margin_dpo/margin_mean": 5.524896621704102, + "margin_dpo/margin_std": 6.8345046043396, + "step": 62 + }, + { + "KL/chosen_KL_mean": -1.9826297760009766, + "KL/mean": -5.725447654724121, + "KL/rejected_KL_mean": -9.468265533447266, + "KL/std": 7.366238594055176, + "epoch": 0.09251101321585903, + "fcm_dpo/beta": 0.08485674113035202, + "fcm_dpo/delta": -0.2533873915672302, + "fcm_dpo/margin": 7.485637664794922, + "fcm_dpo/q_t": 0.3566383123397827, + "grad_norm": 52.68979263305664, + "learning_rate": 4.4927536231884053e-07, + "logits/chosen": -0.5868048667907715, + "logits/rejected": -0.5401818752288818, + "logps/chosen": -75.13002014160156, + "logps/ref_chosen": -73.14739227294922, + "logps/ref_rejected": -97.61006164550781, + "logps/rejected": -107.07832336425781, + "loss": 0.9454, + "margin_dpo/margin_mean": 7.485637664794922, + "margin_dpo/margin_std": 7.758219242095947, + "step": 63 + }, + { + "KL/chosen_KL_mean": -0.6014499664306641, + "KL/mean": -4.980618953704834, + "KL/rejected_KL_mean": -9.359790802001953, + "KL/std": 7.880523204803467, + "epoch": 0.09397944199706314, + "fcm_dpo/beta": 0.0794319286942482, + "fcm_dpo/delta": -0.3237980306148529, + "fcm_dpo/margin": 8.758337020874023, + "fcm_dpo/q_t": 0.34900087118148804, + "grad_norm": 47.52156066894531, + "learning_rate": 4.5652173913043473e-07, + "logits/chosen": -0.5694026947021484, + "logits/rejected": -0.5367158651351929, + "logps/chosen": -54.60005187988281, + "logps/ref_chosen": -53.998600006103516, + "logps/ref_rejected": -93.53019714355469, + "logps/rejected": -102.8899917602539, + "loss": 0.9296, + "margin_dpo/margin_mean": 8.758337020874023, + "margin_dpo/margin_std": 9.623977661132812, + "step": 64 + }, + { + "KL/chosen_KL_mean": -2.417318344116211, + "KL/mean": -6.842019081115723, + "KL/rejected_KL_mean": -11.2667236328125, + "KL/std": 8.855112075805664, + "epoch": 0.09544787077826726, + "fcm_dpo/beta": 0.07528192549943924, + "fcm_dpo/delta": -0.2872685492038727, + "fcm_dpo/margin": 8.849407196044922, + "fcm_dpo/q_t": 0.3515852093696594, + "grad_norm": 46.06278991699219, + "learning_rate": 4.63768115942029e-07, + "logits/chosen": -0.6717353463172913, + "logits/rejected": -0.6576972007751465, + "logps/chosen": -67.2533187866211, + "logps/ref_chosen": -64.83599853515625, + "logps/ref_rejected": -109.94645690917969, + "logps/rejected": -121.21318054199219, + "loss": 0.9428, + "margin_dpo/margin_mean": 8.849407196044922, + "margin_dpo/margin_std": 9.624493598937988, + "step": 65 + }, + { + "KL/chosen_KL_mean": -2.239471435546875, + "KL/mean": -6.359401702880859, + "KL/rejected_KL_mean": -10.479331970214844, + "KL/std": 8.035075187683105, + "epoch": 0.09691629955947137, + "fcm_dpo/beta": 0.07178394496440887, + "fcm_dpo/delta": -0.2061339020729065, + "fcm_dpo/margin": 8.239856719970703, + "fcm_dpo/q_t": 0.3704487979412079, + "grad_norm": 40.174861907958984, + "learning_rate": 4.7101449275362313e-07, + "logits/chosen": -0.6172059774398804, + "logits/rejected": -0.5826204419136047, + "logps/chosen": -53.68299865722656, + "logps/ref_chosen": -51.44352722167969, + "logps/ref_rejected": -75.63629913330078, + "logps/rejected": -86.11563110351562, + "loss": 0.9802, + "margin_dpo/margin_mean": 8.239856719970703, + "margin_dpo/margin_std": 9.627714157104492, + "step": 66 + }, + { + "KL/chosen_KL_mean": -1.9857635498046875, + "KL/mean": -6.205845832824707, + "KL/rejected_KL_mean": -10.425933837890625, + "KL/std": 8.025278091430664, + "epoch": 0.09838472834067548, + "fcm_dpo/beta": 0.06964662671089172, + "fcm_dpo/delta": -0.19918228685855865, + "fcm_dpo/margin": 8.440168380737305, + "fcm_dpo/q_t": 0.3705397844314575, + "grad_norm": 41.403099060058594, + "learning_rate": 4.782608695652174e-07, + "logits/chosen": -0.5965728759765625, + "logits/rejected": -0.5546629428863525, + "logps/chosen": -61.326568603515625, + "logps/ref_chosen": -59.34080505371094, + "logps/ref_rejected": -72.78728485107422, + "logps/rejected": -83.21321868896484, + "loss": 0.9789, + "margin_dpo/margin_mean": 8.440168380737305, + "margin_dpo/margin_std": 9.680936813354492, + "step": 67 + }, + { + "KL/chosen_KL_mean": -2.2271480560302734, + "KL/mean": -6.318003177642822, + "KL/rejected_KL_mean": -10.408863067626953, + "KL/std": 7.332122325897217, + "epoch": 0.09985315712187959, + "fcm_dpo/beta": 0.06731708347797394, + "fcm_dpo/delta": -0.15928582847118378, + "fcm_dpo/margin": 8.181710243225098, + "fcm_dpo/q_t": 0.3742911219596863, + "grad_norm": 39.689701080322266, + "learning_rate": 4.855072463768116e-07, + "logits/chosen": -0.6384579539299011, + "logits/rejected": -0.5809626579284668, + "logps/chosen": -67.4329833984375, + "logps/ref_chosen": -65.2058334350586, + "logps/ref_rejected": -77.20724487304688, + "logps/rejected": -87.6161117553711, + "loss": 0.9771, + "margin_dpo/margin_mean": 8.181710243225098, + "margin_dpo/margin_std": 8.135688781738281, + "step": 68 + }, + { + "KL/chosen_KL_mean": -2.7193145751953125, + "KL/mean": -7.553807258605957, + "KL/rejected_KL_mean": -12.388301849365234, + "KL/std": 8.551678657531738, + "epoch": 0.1013215859030837, + "fcm_dpo/beta": 0.06404094398021698, + "fcm_dpo/delta": -0.23506096005439758, + "fcm_dpo/margin": 9.668986320495605, + "fcm_dpo/q_t": 0.360501229763031, + "grad_norm": 42.04729080200195, + "learning_rate": 4.927536231884058e-07, + "logits/chosen": -0.6100502014160156, + "logits/rejected": -0.5856792330741882, + "logps/chosen": -62.53855514526367, + "logps/ref_chosen": -59.81924057006836, + "logps/ref_rejected": -103.38886260986328, + "logps/rejected": -115.77716064453125, + "loss": 0.937, + "margin_dpo/margin_mean": 9.668987274169922, + "margin_dpo/margin_std": 9.378090858459473, + "step": 69 + }, + { + "KL/chosen_KL_mean": -3.914093017578125, + "KL/mean": -9.15247917175293, + "KL/rejected_KL_mean": -14.390865325927734, + "KL/std": 10.303367614746094, + "epoch": 0.1027900146842878, + "fcm_dpo/beta": 0.061325304210186005, + "fcm_dpo/delta": -0.25857973098754883, + "fcm_dpo/margin": 10.476768493652344, + "fcm_dpo/q_t": 0.3579747676849365, + "grad_norm": 42.36003875732422, + "learning_rate": 5e-07, + "logits/chosen": -0.6354060173034668, + "logits/rejected": -0.6008400917053223, + "logps/chosen": -65.84473419189453, + "logps/ref_chosen": -61.930641174316406, + "logps/ref_rejected": -91.06078338623047, + "logps/rejected": -105.45164489746094, + "loss": 0.9405, + "margin_dpo/margin_mean": 10.476768493652344, + "margin_dpo/margin_std": 10.90849494934082, + "step": 70 + }, + { + "KL/chosen_KL_mean": -3.4942684173583984, + "KL/mean": -9.552824020385742, + "KL/rejected_KL_mean": -15.611381530761719, + "KL/std": 10.604869842529297, + "epoch": 0.10425844346549193, + "fcm_dpo/beta": 0.057439714670181274, + "fcm_dpo/delta": -0.3188778758049011, + "fcm_dpo/margin": 12.117112159729004, + "fcm_dpo/q_t": 0.34699490666389465, + "grad_norm": 38.4710807800293, + "learning_rate": 4.999967061337492e-07, + "logits/chosen": -0.6384104490280151, + "logits/rejected": -0.5962811708450317, + "logps/chosen": -65.2446060180664, + "logps/ref_chosen": -61.750335693359375, + "logps/ref_rejected": -97.33662414550781, + "logps/rejected": -112.947998046875, + "loss": 0.9015, + "margin_dpo/margin_mean": 12.117112159729004, + "margin_dpo/margin_std": 11.577495574951172, + "step": 71 + }, + { + "KL/chosen_KL_mean": -4.108892440795898, + "KL/mean": -10.48940658569336, + "KL/rejected_KL_mean": -16.869918823242188, + "KL/std": 11.388107299804688, + "epoch": 0.10572687224669604, + "fcm_dpo/beta": 0.05391976609826088, + "fcm_dpo/delta": -0.31035923957824707, + "fcm_dpo/margin": 12.761024475097656, + "fcm_dpo/q_t": 0.3477667570114136, + "grad_norm": 38.119319915771484, + "learning_rate": 4.999868246217933e-07, + "logits/chosen": -0.6678510904312134, + "logits/rejected": -0.6335985660552979, + "logps/chosen": -70.16230773925781, + "logps/ref_chosen": -66.05341339111328, + "logps/ref_rejected": -95.2869873046875, + "logps/rejected": -112.15690612792969, + "loss": 0.9156, + "margin_dpo/margin_mean": 12.761024475097656, + "margin_dpo/margin_std": 13.06039810180664, + "step": 72 + }, + { + "KL/chosen_KL_mean": -5.835565567016602, + "KL/mean": -12.30703067779541, + "KL/rejected_KL_mean": -18.77849578857422, + "KL/std": 14.999626159667969, + "epoch": 0.10719530102790015, + "fcm_dpo/beta": 0.05082736164331436, + "fcm_dpo/delta": -0.27768224477767944, + "fcm_dpo/margin": 12.9429292678833, + "fcm_dpo/q_t": 0.36679285764694214, + "grad_norm": 37.15599822998047, + "learning_rate": 4.999703557245192e-07, + "logits/chosen": -0.6618713736534119, + "logits/rejected": -0.6186869144439697, + "logps/chosen": -72.09184265136719, + "logps/ref_chosen": -66.25627136230469, + "logps/ref_rejected": -90.45613098144531, + "logps/rejected": -109.23463439941406, + "loss": 1.0098, + "margin_dpo/margin_mean": 12.942928314208984, + "margin_dpo/margin_std": 18.527137756347656, + "step": 73 + }, + { + "KL/chosen_KL_mean": -6.543451309204102, + "KL/mean": -13.558289527893066, + "KL/rejected_KL_mean": -20.573123931884766, + "KL/std": 16.691537857055664, + "epoch": 0.10866372980910426, + "fcm_dpo/beta": 0.048035770654678345, + "fcm_dpo/delta": -0.2947743535041809, + "fcm_dpo/margin": 14.029674530029297, + "fcm_dpo/q_t": 0.3622450828552246, + "grad_norm": 38.12542724609375, + "learning_rate": 4.999472998758977e-07, + "logits/chosen": -0.6320587396621704, + "logits/rejected": -0.6205792427062988, + "logps/chosen": -59.96833419799805, + "logps/ref_chosen": -53.42488098144531, + "logps/ref_rejected": -95.94693756103516, + "logps/rejected": -116.52006530761719, + "loss": 0.9874, + "margin_dpo/margin_mean": 14.029674530029297, + "margin_dpo/margin_std": 20.53775405883789, + "step": 74 + }, + { + "KL/chosen_KL_mean": -6.452543258666992, + "KL/mean": -16.478172302246094, + "KL/rejected_KL_mean": -26.503795623779297, + "KL/std": 17.552452087402344, + "epoch": 0.11013215859030837, + "fcm_dpo/beta": 0.04416520893573761, + "fcm_dpo/delta": -0.5302780866622925, + "fcm_dpo/margin": 20.05126190185547, + "fcm_dpo/q_t": 0.31639227271080017, + "grad_norm": 34.176265716552734, + "learning_rate": 4.999176576834721e-07, + "logits/chosen": -0.6823216676712036, + "logits/rejected": -0.6725906133651733, + "logps/chosen": -58.314205169677734, + "logps/ref_chosen": -51.861663818359375, + "logps/ref_rejected": -111.25398254394531, + "logps/rejected": -137.75778198242188, + "loss": 0.833, + "margin_dpo/margin_mean": 20.05126190185547, + "margin_dpo/margin_std": 19.639450073242188, + "step": 75 + }, + { + "KL/chosen_KL_mean": -8.126792907714844, + "KL/mean": -14.718840599060059, + "KL/rejected_KL_mean": -21.31088638305664, + "KL/std": 14.556184768676758, + "epoch": 0.11160058737151249, + "fcm_dpo/beta": 0.04169227182865143, + "fcm_dpo/delta": -0.15833759307861328, + "fcm_dpo/margin": 13.184097290039062, + "fcm_dpo/q_t": 0.3749655485153198, + "grad_norm": 33.384490966796875, + "learning_rate": 4.998814299283415e-07, + "logits/chosen": -0.692663848400116, + "logits/rejected": -0.6493555307388306, + "logps/chosen": -61.39282989501953, + "logps/ref_chosen": -53.26603698730469, + "logps/ref_rejected": -78.21662902832031, + "logps/rejected": -99.52751159667969, + "loss": 1.0048, + "margin_dpo/margin_mean": 13.184097290039062, + "margin_dpo/margin_std": 15.775751113891602, + "step": 76 + }, + { + "KL/chosen_KL_mean": -7.239437103271484, + "KL/mean": -17.858009338378906, + "KL/rejected_KL_mean": -28.476581573486328, + "KL/std": 19.998626708984375, + "epoch": 0.1130690161527166, + "fcm_dpo/beta": 0.03837820887565613, + "fcm_dpo/delta": -0.4561229944229126, + "fcm_dpo/margin": 21.237140655517578, + "fcm_dpo/q_t": 0.3239055275917053, + "grad_norm": 34.94086456298828, + "learning_rate": 4.998386175651409e-07, + "logits/chosen": -0.6894793510437012, + "logits/rejected": -0.6502680778503418, + "logps/chosen": -65.33612060546875, + "logps/ref_chosen": -58.0966796875, + "logps/ref_rejected": -93.77361297607422, + "logps/rejected": -122.25019836425781, + "loss": 0.8744, + "margin_dpo/margin_mean": 21.237140655517578, + "margin_dpo/margin_std": 21.865249633789062, + "step": 77 + }, + { + "KL/chosen_KL_mean": -7.513151168823242, + "KL/mean": -15.731389999389648, + "KL/rejected_KL_mean": -23.949626922607422, + "KL/std": 16.417797088623047, + "epoch": 0.1145374449339207, + "fcm_dpo/beta": 0.03666268289089203, + "fcm_dpo/delta": -0.21514025330543518, + "fcm_dpo/margin": 16.436477661132812, + "fcm_dpo/q_t": 0.3673900067806244, + "grad_norm": 31.626035690307617, + "learning_rate": 4.997892217220159e-07, + "logits/chosen": -0.6481041312217712, + "logits/rejected": -0.6217755079269409, + "logps/chosen": -63.12693786621094, + "logps/ref_chosen": -55.61378479003906, + "logps/ref_rejected": -84.93436431884766, + "logps/rejected": -108.88398742675781, + "loss": 0.9745, + "margin_dpo/margin_mean": 16.436477661132812, + "margin_dpo/margin_std": 18.627796173095703, + "step": 78 + }, + { + "KL/chosen_KL_mean": -7.894931793212891, + "KL/mean": -16.924148559570312, + "KL/rejected_KL_mean": -25.95336151123047, + "KL/std": 18.74026870727539, + "epoch": 0.11600587371512482, + "fcm_dpo/beta": 0.03483927622437477, + "fcm_dpo/delta": -0.24468708038330078, + "fcm_dpo/margin": 18.058425903320312, + "fcm_dpo/q_t": 0.36786949634552, + "grad_norm": 27.80043601989746, + "learning_rate": 4.997332437005931e-07, + "logits/chosen": -0.6485146284103394, + "logits/rejected": -0.6170614957809448, + "logps/chosen": -63.345420837402344, + "logps/ref_chosen": -55.45048522949219, + "logps/ref_rejected": -87.64756774902344, + "logps/rejected": -113.60092163085938, + "loss": 0.9866, + "margin_dpo/margin_mean": 18.05842399597168, + "margin_dpo/margin_std": 22.871458053588867, + "step": 79 + }, + { + "KL/chosen_KL_mean": -10.773929595947266, + "KL/mean": -19.173215866088867, + "KL/rejected_KL_mean": -27.572498321533203, + "KL/std": 19.74143409729004, + "epoch": 0.11747430249632893, + "fcm_dpo/beta": 0.03327310085296631, + "fcm_dpo/delta": -0.17127852141857147, + "fcm_dpo/margin": 16.79857063293457, + "fcm_dpo/q_t": 0.38316744565963745, + "grad_norm": 29.989072799682617, + "learning_rate": 4.996706849759452e-07, + "logits/chosen": -0.7313661575317383, + "logits/rejected": -0.688393235206604, + "logps/chosen": -69.29322052001953, + "logps/ref_chosen": -58.519290924072266, + "logps/ref_rejected": -87.54750061035156, + "logps/rejected": -115.1199951171875, + "loss": 1.0327, + "margin_dpo/margin_mean": 16.79857063293457, + "margin_dpo/margin_std": 22.803003311157227, + "step": 80 + }, + { + "KL/chosen_KL_mean": -9.863761901855469, + "KL/mean": -21.608978271484375, + "KL/rejected_KL_mean": -33.35419464111328, + "KL/std": 23.825132369995117, + "epoch": 0.11894273127753303, + "fcm_dpo/beta": 0.03129996731877327, + "fcm_dpo/delta": -0.36527884006500244, + "fcm_dpo/margin": 23.49043083190918, + "fcm_dpo/q_t": 0.34688568115234375, + "grad_norm": 30.72429847717285, + "learning_rate": 4.996015471965529e-07, + "logits/chosen": -0.7000492811203003, + "logits/rejected": -0.6688964366912842, + "logps/chosen": -76.31262969970703, + "logps/ref_chosen": -66.44886779785156, + "logps/ref_rejected": -129.66270446777344, + "logps/rejected": -163.01690673828125, + "loss": 0.9305, + "margin_dpo/margin_mean": 23.490432739257812, + "margin_dpo/margin_std": 28.210830688476562, + "step": 81 + }, + { + "KL/chosen_KL_mean": -12.267969131469727, + "KL/mean": -21.549884796142578, + "KL/rejected_KL_mean": -30.831790924072266, + "KL/std": 21.572769165039062, + "epoch": 0.12041116005873716, + "fcm_dpo/beta": 0.030443139374256134, + "fcm_dpo/delta": -0.17542892694473267, + "fcm_dpo/margin": 18.563825607299805, + "fcm_dpo/q_t": 0.3823755085468292, + "grad_norm": 33.41337203979492, + "learning_rate": 4.995258321842611e-07, + "logits/chosen": -0.6645326614379883, + "logits/rejected": -0.6522207260131836, + "logps/chosen": -64.50035095214844, + "logps/ref_chosen": -52.232383728027344, + "logps/ref_rejected": -90.74325561523438, + "logps/rejected": -121.57504272460938, + "loss": 1.074, + "margin_dpo/margin_mean": 18.563827514648438, + "margin_dpo/margin_std": 29.59918975830078, + "step": 82 + }, + { + "KL/chosen_KL_mean": -12.634248733520508, + "KL/mean": -23.630840301513672, + "KL/rejected_KL_mean": -34.627437591552734, + "KL/std": 22.750579833984375, + "epoch": 0.12187958883994127, + "fcm_dpo/beta": 0.028682120144367218, + "fcm_dpo/delta": -0.24740472435951233, + "fcm_dpo/margin": 21.993192672729492, + "fcm_dpo/q_t": 0.36408424377441406, + "grad_norm": 28.262123107910156, + "learning_rate": 4.994435419342304e-07, + "logits/chosen": -0.6717352867126465, + "logits/rejected": -0.6345555782318115, + "logps/chosen": -68.46163940429688, + "logps/ref_chosen": -55.82738494873047, + "logps/ref_rejected": -103.71589660644531, + "logps/rejected": -138.3433380126953, + "loss": 0.9787, + "margin_dpo/margin_mean": 21.993192672729492, + "margin_dpo/margin_std": 26.84136390686035, + "step": 83 + }, + { + "KL/chosen_KL_mean": -11.635591506958008, + "KL/mean": -21.24039649963379, + "KL/rejected_KL_mean": -30.845199584960938, + "KL/std": 19.57217788696289, + "epoch": 0.12334801762114538, + "fcm_dpo/beta": 0.027607331052422523, + "fcm_dpo/delta": -0.14001153409481049, + "fcm_dpo/margin": 19.20960235595703, + "fcm_dpo/q_t": 0.37740713357925415, + "grad_norm": 27.846446990966797, + "learning_rate": 4.993546786148857e-07, + "logits/chosen": -0.6538140177726746, + "logits/rejected": -0.6141324639320374, + "logps/chosen": -78.81175994873047, + "logps/ref_chosen": -67.1761703491211, + "logps/ref_rejected": -87.29859924316406, + "logps/rejected": -118.143798828125, + "loss": 0.9978, + "margin_dpo/margin_mean": 19.20960235595703, + "margin_dpo/margin_std": 19.999858856201172, + "step": 84 + }, + { + "KL/chosen_KL_mean": -12.274053573608398, + "KL/mean": -22.126728057861328, + "KL/rejected_KL_mean": -31.97940444946289, + "KL/std": 20.159584045410156, + "epoch": 0.12481644640234948, + "fcm_dpo/beta": 0.027254024520516396, + "fcm_dpo/delta": -0.1449553519487381, + "fcm_dpo/margin": 19.705352783203125, + "fcm_dpo/q_t": 0.38130825757980347, + "grad_norm": 27.484092712402344, + "learning_rate": 4.992592445678582e-07, + "logits/chosen": -0.6424893140792847, + "logits/rejected": -0.6098573207855225, + "logps/chosen": -70.68067169189453, + "logps/ref_chosen": -58.4066162109375, + "logps/ref_rejected": -78.63880157470703, + "logps/rejected": -110.61820983886719, + "loss": 1.0228, + "margin_dpo/margin_mean": 19.705352783203125, + "margin_dpo/margin_std": 24.122146606445312, + "step": 85 + }, + { + "KL/chosen_KL_mean": -16.05596351623535, + "KL/mean": -26.548908233642578, + "KL/rejected_KL_mean": -37.041839599609375, + "KL/std": 27.3221492767334, + "epoch": 0.1262848751835536, + "fcm_dpo/beta": 0.026313815265893936, + "fcm_dpo/delta": -0.16080215573310852, + "fcm_dpo/margin": 20.985877990722656, + "fcm_dpo/q_t": 0.3926513195037842, + "grad_norm": 31.304685592651367, + "learning_rate": 4.991572423079235e-07, + "logits/chosen": -0.6901407241821289, + "logits/rejected": -0.6772359609603882, + "logps/chosen": -72.19342803955078, + "logps/ref_chosen": -56.13746643066406, + "logps/ref_rejected": -88.12165069580078, + "logps/rejected": -125.16349029541016, + "loss": 1.1027, + "margin_dpo/margin_mean": 20.985881805419922, + "margin_dpo/margin_std": 37.65880584716797, + "step": 86 + }, + { + "KL/chosen_KL_mean": -15.858743667602539, + "KL/mean": -28.08181381225586, + "KL/rejected_KL_mean": -40.30488204956055, + "KL/std": 27.242843627929688, + "epoch": 0.1277533039647577, + "fcm_dpo/beta": 0.024988306686282158, + "fcm_dpo/delta": -0.22651353478431702, + "fcm_dpo/margin": 24.446142196655273, + "fcm_dpo/q_t": 0.3679496645927429, + "grad_norm": 26.263225555419922, + "learning_rate": 4.990486745229364e-07, + "logits/chosen": -0.7118654847145081, + "logits/rejected": -0.6854358911514282, + "logps/chosen": -71.49484252929688, + "logps/ref_chosen": -55.63609313964844, + "logps/ref_rejected": -95.46757507324219, + "logps/rejected": -135.7724609375, + "loss": 1.0137, + "margin_dpo/margin_mean": 24.446144104003906, + "margin_dpo/margin_std": 32.91810607910156, + "step": 87 + }, + { + "KL/chosen_KL_mean": -20.068950653076172, + "KL/mean": -29.328380584716797, + "KL/rejected_KL_mean": -38.58780288696289, + "KL/std": 28.398073196411133, + "epoch": 0.12922173274596183, + "fcm_dpo/beta": 0.0244886577129364, + "fcm_dpo/delta": -0.057016439735889435, + "fcm_dpo/margin": 18.518863677978516, + "fcm_dpo/q_t": 0.40317296981811523, + "grad_norm": 27.875059127807617, + "learning_rate": 4.989335440737586e-07, + "logits/chosen": -0.699777364730835, + "logits/rejected": -0.6927889585494995, + "logps/chosen": -93.74009704589844, + "logps/ref_chosen": -73.67115020751953, + "logps/ref_rejected": -106.70849609375, + "logps/rejected": -145.29629516601562, + "loss": 1.1165, + "margin_dpo/margin_mean": 18.518863677978516, + "margin_dpo/margin_std": 29.796024322509766, + "step": 88 + }, + { + "KL/chosen_KL_mean": -12.274129867553711, + "KL/mean": -22.63742446899414, + "KL/rejected_KL_mean": -33.0007209777832, + "KL/std": 24.061918258666992, + "epoch": 0.13069016152716592, + "fcm_dpo/beta": 0.024275628849864006, + "fcm_dpo/delta": -0.10855366289615631, + "fcm_dpo/margin": 20.72658920288086, + "fcm_dpo/q_t": 0.3883446455001831, + "grad_norm": 25.647180557250977, + "learning_rate": 4.988118539941847e-07, + "logits/chosen": -0.7325412631034851, + "logits/rejected": -0.7000647783279419, + "logps/chosen": -72.8990478515625, + "logps/ref_chosen": -60.624916076660156, + "logps/ref_rejected": -82.08354949951172, + "logps/rejected": -115.08427429199219, + "loss": 1.0399, + "margin_dpo/margin_mean": 20.72658920288086, + "margin_dpo/margin_std": 27.33102798461914, + "step": 89 + }, + { + "KL/chosen_KL_mean": -15.752443313598633, + "KL/mean": -29.865989685058594, + "KL/rejected_KL_mean": -43.97953414916992, + "KL/std": 32.87799835205078, + "epoch": 0.13215859030837004, + "fcm_dpo/beta": 0.02329513430595398, + "fcm_dpo/delta": -0.2749367952346802, + "fcm_dpo/margin": 28.22708511352539, + "fcm_dpo/q_t": 0.3700242340564728, + "grad_norm": 27.188581466674805, + "learning_rate": 4.986836074908615e-07, + "logits/chosen": -0.6703182458877563, + "logits/rejected": -0.681124210357666, + "logps/chosen": -69.03775024414062, + "logps/ref_chosen": -53.285308837890625, + "logps/ref_rejected": -111.54470825195312, + "logps/rejected": -155.5242462158203, + "loss": 1.0186, + "margin_dpo/margin_mean": 28.22708511352539, + "margin_dpo/margin_std": 40.7965087890625, + "step": 90 + }, + { + "KL/chosen_KL_mean": -16.609392166137695, + "KL/mean": -28.263938903808594, + "KL/rejected_KL_mean": -39.91849899291992, + "KL/std": 26.464740753173828, + "epoch": 0.13362701908957417, + "fcm_dpo/beta": 0.022392991930246353, + "fcm_dpo/delta": -0.12847986817359924, + "fcm_dpo/margin": 23.309101104736328, + "fcm_dpo/q_t": 0.3886939287185669, + "grad_norm": 25.46695899963379, + "learning_rate": 4.985488079432037e-07, + "logits/chosen": -0.6979262828826904, + "logits/rejected": -0.6650443077087402, + "logps/chosen": -78.412353515625, + "logps/ref_chosen": -61.802955627441406, + "logps/ref_rejected": -87.87395477294922, + "logps/rejected": -127.79244995117188, + "loss": 1.0595, + "margin_dpo/margin_mean": 23.309099197387695, + "margin_dpo/margin_std": 34.23745346069336, + "step": 91 + }, + { + "KL/chosen_KL_mean": -15.063024520874023, + "KL/mean": -26.865272521972656, + "KL/rejected_KL_mean": -38.66752624511719, + "KL/std": 27.186147689819336, + "epoch": 0.13509544787077826, + "fcm_dpo/beta": 0.021884029731154442, + "fcm_dpo/delta": -0.12284786254167557, + "fcm_dpo/margin": 23.604501724243164, + "fcm_dpo/q_t": 0.38834255933761597, + "grad_norm": 23.927978515625, + "learning_rate": 4.984074589033043e-07, + "logits/chosen": -0.7328395247459412, + "logits/rejected": -0.7109937071800232, + "logps/chosen": -66.70379638671875, + "logps/ref_chosen": -51.640769958496094, + "logps/ref_rejected": -77.88117980957031, + "logps/rejected": -116.5487060546875, + "loss": 1.0512, + "margin_dpo/margin_mean": 23.60449981689453, + "margin_dpo/margin_std": 32.96575164794922, + "step": 92 + }, + { + "KL/chosen_KL_mean": -16.824951171875, + "KL/mean": -28.743083953857422, + "KL/rejected_KL_mean": -40.661216735839844, + "KL/std": 25.748863220214844, + "epoch": 0.13656387665198239, + "fcm_dpo/beta": 0.021284889429807663, + "fcm_dpo/delta": -0.11301136016845703, + "fcm_dpo/margin": 23.836261749267578, + "fcm_dpo/q_t": 0.3872656226158142, + "grad_norm": 24.41376495361328, + "learning_rate": 4.982595640958425e-07, + "logits/chosen": -0.7215616703033447, + "logits/rejected": -0.6693962812423706, + "logps/chosen": -69.35418701171875, + "logps/ref_chosen": -52.529239654541016, + "logps/ref_rejected": -77.16075134277344, + "logps/rejected": -117.82196044921875, + "loss": 1.0264, + "margin_dpo/margin_mean": 23.836261749267578, + "margin_dpo/margin_std": 29.672298431396484, + "step": 93 + }, + { + "KL/chosen_KL_mean": -18.283151626586914, + "KL/mean": -31.948049545288086, + "KL/rejected_KL_mean": -45.61294937133789, + "KL/std": 29.418785095214844, + "epoch": 0.13803230543318648, + "fcm_dpo/beta": 0.02049822360277176, + "fcm_dpo/delta": -0.17131651937961578, + "fcm_dpo/margin": 27.329792022705078, + "fcm_dpo/q_t": 0.37480291724205017, + "grad_norm": 24.020462036132812, + "learning_rate": 4.98105127417984e-07, + "logits/chosen": -0.6669220924377441, + "logits/rejected": -0.6510493755340576, + "logps/chosen": -79.50576782226562, + "logps/ref_chosen": -61.22261047363281, + "logps/ref_rejected": -99.59902954101562, + "logps/rejected": -145.21197509765625, + "loss": 0.9924, + "margin_dpo/margin_mean": 27.329792022705078, + "margin_dpo/margin_std": 30.807952880859375, + "step": 94 + }, + { + "KL/chosen_KL_mean": -17.35989761352539, + "KL/mean": -28.47588539123535, + "KL/rejected_KL_mean": -39.591880798339844, + "KL/std": 28.130035400390625, + "epoch": 0.1395007342143906, + "fcm_dpo/beta": 0.02020413801074028, + "fcm_dpo/delta": -0.051920242607593536, + "fcm_dpo/margin": 22.231979370117188, + "fcm_dpo/q_t": 0.3971063494682312, + "grad_norm": 22.573118209838867, + "learning_rate": 4.979441529392784e-07, + "logits/chosen": -0.6930748224258423, + "logits/rejected": -0.6638180017471313, + "logps/chosen": -69.883544921875, + "logps/ref_chosen": -52.523643493652344, + "logps/ref_rejected": -75.8803482055664, + "logps/rejected": -115.47222900390625, + "loss": 1.0693, + "margin_dpo/margin_mean": 22.231979370117188, + "margin_dpo/margin_std": 29.545318603515625, + "step": 95 + }, + { + "KL/chosen_KL_mean": -16.8060245513916, + "KL/mean": -31.907100677490234, + "KL/rejected_KL_mean": -47.0081787109375, + "KL/std": 32.275535583496094, + "epoch": 0.14096916299559473, + "fcm_dpo/beta": 0.01948397234082222, + "fcm_dpo/delta": -0.20268620550632477, + "fcm_dpo/margin": 30.20215606689453, + "fcm_dpo/q_t": 0.3715973496437073, + "grad_norm": 23.293750762939453, + "learning_rate": 4.977766449015534e-07, + "logits/chosen": -0.7053156495094299, + "logits/rejected": -0.6753140091896057, + "logps/chosen": -78.96299743652344, + "logps/ref_chosen": -62.15697479248047, + "logps/ref_rejected": -96.59601593017578, + "logps/rejected": -143.60418701171875, + "loss": 0.9826, + "margin_dpo/margin_mean": 30.20215606689453, + "margin_dpo/margin_std": 35.575836181640625, + "step": 96 + }, + { + "KL/chosen_KL_mean": -18.12676429748535, + "KL/mean": -29.81930923461914, + "KL/rejected_KL_mean": -41.51185989379883, + "KL/std": 26.801036834716797, + "epoch": 0.14243759177679882, + "fcm_dpo/beta": 0.019416380673646927, + "fcm_dpo/delta": -0.057599060237407684, + "fcm_dpo/margin": 23.38509750366211, + "fcm_dpo/q_t": 0.3948373794555664, + "grad_norm": 23.826488494873047, + "learning_rate": 4.976026077188012e-07, + "logits/chosen": -0.6331626176834106, + "logits/rejected": -0.5884179472923279, + "logps/chosen": -72.77313232421875, + "logps/ref_chosen": -54.646366119384766, + "logps/ref_rejected": -76.96475219726562, + "logps/rejected": -118.47660827636719, + "loss": 1.0527, + "margin_dpo/margin_mean": 23.38509750366211, + "margin_dpo/margin_std": 26.815166473388672, + "step": 97 + }, + { + "KL/chosen_KL_mean": -22.460552215576172, + "KL/mean": -35.82654571533203, + "KL/rejected_KL_mean": -49.192535400390625, + "KL/std": 30.553295135498047, + "epoch": 0.14390602055800295, + "fcm_dpo/beta": 0.018851084634661674, + "fcm_dpo/delta": -0.10970290005207062, + "fcm_dpo/margin": 26.731998443603516, + "fcm_dpo/q_t": 0.38528013229370117, + "grad_norm": 24.53318214416504, + "learning_rate": 4.974220459770639e-07, + "logits/chosen": -0.6960965394973755, + "logits/rejected": -0.6801573038101196, + "logps/chosen": -87.71917724609375, + "logps/ref_chosen": -65.25862884521484, + "logps/ref_rejected": -96.5274887084961, + "logps/rejected": -145.72003173828125, + "loss": 1.0547, + "margin_dpo/margin_mean": 26.731998443603516, + "margin_dpo/margin_std": 36.594322204589844, + "step": 98 + }, + { + "KL/chosen_KL_mean": -17.805437088012695, + "KL/mean": -34.056640625, + "KL/rejected_KL_mean": -50.307838439941406, + "KL/std": 33.01419448852539, + "epoch": 0.14537444933920704, + "fcm_dpo/beta": 0.018170353025197983, + "fcm_dpo/delta": -0.20326459407806396, + "fcm_dpo/margin": 32.502403259277344, + "fcm_dpo/q_t": 0.3738780617713928, + "grad_norm": 21.70009994506836, + "learning_rate": 4.972349644343108e-07, + "logits/chosen": -0.6636701822280884, + "logits/rejected": -0.6640149354934692, + "logps/chosen": -63.44392395019531, + "logps/ref_chosen": -45.638484954833984, + "logps/ref_rejected": -86.43793487548828, + "logps/rejected": -136.7457733154297, + "loss": 0.993, + "margin_dpo/margin_mean": 32.502403259277344, + "margin_dpo/margin_std": 41.165550231933594, + "step": 99 + }, + { + "KL/chosen_KL_mean": -19.931947708129883, + "KL/mean": -29.551782608032227, + "KL/rejected_KL_mean": -39.17161178588867, + "KL/std": 26.748775482177734, + "epoch": 0.14684287812041116, + "fcm_dpo/beta": 0.018238741904497147, + "fcm_dpo/delta": 0.05035646632313728, + "fcm_dpo/margin": 19.239667892456055, + "fcm_dpo/q_t": 0.42096006870269775, + "grad_norm": 24.107803344726562, + "learning_rate": 4.970413680203148e-07, + "logits/chosen": -0.6563955545425415, + "logits/rejected": -0.6127746105194092, + "logps/chosen": -77.52592468261719, + "logps/ref_chosen": -57.59397888183594, + "logps/ref_rejected": -74.06021118164062, + "logps/rejected": -113.23182678222656, + "loss": 1.1626, + "margin_dpo/margin_mean": 19.239667892456055, + "margin_dpo/margin_std": 34.46852111816406, + "step": 100 + }, + { + "KL/chosen_KL_mean": -24.661067962646484, + "KL/mean": -36.265132904052734, + "KL/rejected_KL_mean": -47.869197845458984, + "KL/std": 32.89160919189453, + "epoch": 0.14831130690161526, + "fcm_dpo/beta": 0.0180535688996315, + "fcm_dpo/delta": -0.020610351115465164, + "fcm_dpo/margin": 23.2081298828125, + "fcm_dpo/q_t": 0.4103754460811615, + "grad_norm": 23.844804763793945, + "learning_rate": 4.968412618365215e-07, + "logits/chosen": -0.6886243224143982, + "logits/rejected": -0.6581400632858276, + "logps/chosen": -86.30992126464844, + "logps/ref_chosen": -61.64885330200195, + "logps/ref_rejected": -83.18968200683594, + "logps/rejected": -131.0588836669922, + "loss": 1.1263, + "margin_dpo/margin_mean": 23.2081298828125, + "margin_dpo/margin_std": 39.541419982910156, + "step": 101 + }, + { + "KL/chosen_KL_mean": -26.75433921813965, + "KL/mean": -35.32197570800781, + "KL/rejected_KL_mean": -43.889610290527344, + "KL/std": 31.045133590698242, + "epoch": 0.14977973568281938, + "fcm_dpo/beta": 0.018092244863510132, + "fcm_dpo/delta": -0.025346608832478523, + "fcm_dpo/margin": 17.135272979736328, + "fcm_dpo/q_t": 0.43100807070732117, + "grad_norm": 26.781410217285156, + "learning_rate": 4.966346511559149e-07, + "logits/chosen": -0.6860433220863342, + "logits/rejected": -0.6402877569198608, + "logps/chosen": -90.83322143554688, + "logps/ref_chosen": -64.0788803100586, + "logps/ref_rejected": -68.18707275390625, + "logps/rejected": -112.07669067382812, + "loss": 1.2068, + "margin_dpo/margin_mean": 17.13527488708496, + "margin_dpo/margin_std": 36.66783905029297, + "step": 102 + }, + { + "KL/chosen_KL_mean": -20.93372344970703, + "KL/mean": -38.12938690185547, + "KL/rejected_KL_mean": -55.3250617980957, + "KL/std": 34.48286437988281, + "epoch": 0.1512481644640235, + "fcm_dpo/beta": 0.01744980737566948, + "fcm_dpo/delta": -0.21340087056159973, + "fcm_dpo/margin": 34.39133834838867, + "fcm_dpo/q_t": 0.3697454333305359, + "grad_norm": 22.851566314697266, + "learning_rate": 4.964215414228785e-07, + "logits/chosen": -0.6631127595901489, + "logits/rejected": -0.6278681755065918, + "logps/chosen": -82.23300170898438, + "logps/ref_chosen": -61.299278259277344, + "logps/ref_rejected": -93.57270812988281, + "logps/rejected": -148.89776611328125, + "loss": 0.9817, + "margin_dpo/margin_mean": 34.39133834838867, + "margin_dpo/margin_std": 41.43703079223633, + "step": 103 + }, + { + "KL/chosen_KL_mean": -23.079187393188477, + "KL/mean": -39.357391357421875, + "KL/rejected_KL_mean": -55.635589599609375, + "KL/std": 38.08613967895508, + "epoch": 0.1527165932452276, + "fcm_dpo/beta": 0.01691918447613716, + "fcm_dpo/delta": -0.15939825773239136, + "fcm_dpo/margin": 32.55640411376953, + "fcm_dpo/q_t": 0.3846844732761383, + "grad_norm": 22.517627716064453, + "learning_rate": 4.96201938253052e-07, + "logits/chosen": -0.7116140127182007, + "logits/rejected": -0.6817853450775146, + "logps/chosen": -77.45196533203125, + "logps/ref_chosen": -54.372772216796875, + "logps/ref_rejected": -89.5647201538086, + "logps/rejected": -145.2003173828125, + "loss": 1.041, + "margin_dpo/margin_mean": 32.5564079284668, + "margin_dpo/margin_std": 46.39738082885742, + "step": 104 + }, + { + "KL/chosen_KL_mean": -21.710777282714844, + "KL/mean": -44.809566497802734, + "KL/rejected_KL_mean": -67.90835571289062, + "KL/std": 37.22575378417969, + "epoch": 0.15418502202643172, + "fcm_dpo/beta": 0.015895074233412743, + "fcm_dpo/delta": -0.36052238941192627, + "fcm_dpo/margin": 46.197574615478516, + "fcm_dpo/q_t": 0.3344946503639221, + "grad_norm": 22.841474533081055, + "learning_rate": 4.959758474331832e-07, + "logits/chosen": -0.6764773726463318, + "logits/rejected": -0.6534477472305298, + "logps/chosen": -76.34972381591797, + "logps/ref_chosen": -54.638946533203125, + "logps/ref_rejected": -97.97351837158203, + "logps/rejected": -165.88186645507812, + "loss": 0.8712, + "margin_dpo/margin_mean": 46.19757843017578, + "margin_dpo/margin_std": 40.87889099121094, + "step": 105 + }, + { + "KL/chosen_KL_mean": -24.2728328704834, + "KL/mean": -38.736602783203125, + "KL/rejected_KL_mean": -53.20037078857422, + "KL/std": 32.29327392578125, + "epoch": 0.15565345080763582, + "fcm_dpo/beta": 0.015443746000528336, + "fcm_dpo/delta": -0.04908674955368042, + "fcm_dpo/margin": 28.927536010742188, + "fcm_dpo/q_t": 0.3971561789512634, + "grad_norm": 21.783618927001953, + "learning_rate": 4.957432749209755e-07, + "logits/chosen": -0.6298633217811584, + "logits/rejected": -0.5977374315261841, + "logps/chosen": -79.10572814941406, + "logps/ref_chosen": -54.83289337158203, + "logps/ref_rejected": -85.22461700439453, + "logps/rejected": -138.42498779296875, + "loss": 1.0575, + "margin_dpo/margin_mean": 28.927536010742188, + "margin_dpo/margin_std": 35.089813232421875, + "step": 106 + }, + { + "KL/chosen_KL_mean": -28.940296173095703, + "KL/mean": -45.31855773925781, + "KL/rejected_KL_mean": -61.69682312011719, + "KL/std": 39.5135498046875, + "epoch": 0.15712187958883994, + "fcm_dpo/beta": 0.015136872418224812, + "fcm_dpo/delta": -0.10162399709224701, + "fcm_dpo/margin": 32.756526947021484, + "fcm_dpo/q_t": 0.3875572979450226, + "grad_norm": 21.18216323852539, + "learning_rate": 4.955042268449307e-07, + "logits/chosen": -0.697156548500061, + "logits/rejected": -0.6531878113746643, + "logps/chosen": -98.64810943603516, + "logps/ref_chosen": -69.70780944824219, + "logps/ref_rejected": -94.73950958251953, + "logps/rejected": -156.43634033203125, + "loss": 1.0427, + "margin_dpo/margin_mean": 32.75652313232422, + "margin_dpo/margin_std": 41.57597351074219, + "step": 107 + }, + { + "KL/chosen_KL_mean": -26.22125816345215, + "KL/mean": -45.80976486206055, + "KL/rejected_KL_mean": -65.39826965332031, + "KL/std": 43.5434684753418, + "epoch": 0.15859030837004406, + "fcm_dpo/beta": 0.014710919000208378, + "fcm_dpo/delta": -0.1870792806148529, + "fcm_dpo/margin": 39.17702102661133, + "fcm_dpo/q_t": 0.38031500577926636, + "grad_norm": 21.38556671142578, + "learning_rate": 4.952587095041881e-07, + "logits/chosen": -0.6746413707733154, + "logits/rejected": -0.6529253721237183, + "logps/chosen": -82.23114013671875, + "logps/ref_chosen": -56.0098876953125, + "logps/ref_rejected": -95.79601287841797, + "logps/rejected": -161.19427490234375, + "loss": 1.0372, + "margin_dpo/margin_mean": 39.17702102661133, + "margin_dpo/margin_std": 56.45673370361328, + "step": 108 + }, + { + "KL/chosen_KL_mean": -24.449106216430664, + "KL/mean": -45.4000358581543, + "KL/rejected_KL_mean": -66.35096740722656, + "KL/std": 42.009151458740234, + "epoch": 0.16005873715124816, + "fcm_dpo/beta": 0.014084616675972939, + "fcm_dpo/delta": -0.2030661702156067, + "fcm_dpo/margin": 41.90185546875, + "fcm_dpo/q_t": 0.36869800090789795, + "grad_norm": 21.996633529663086, + "learning_rate": 4.95006729368358e-07, + "logits/chosen": -0.6177815198898315, + "logits/rejected": -0.5968196392059326, + "logps/chosen": -87.3345947265625, + "logps/ref_chosen": -62.88549041748047, + "logps/ref_rejected": -98.68573760986328, + "logps/rejected": -165.03671264648438, + "loss": 0.9883, + "margin_dpo/margin_mean": 41.901859283447266, + "margin_dpo/margin_std": 49.483802795410156, + "step": 109 + }, + { + "KL/chosen_KL_mean": -25.081512451171875, + "KL/mean": -43.997459411621094, + "KL/rejected_KL_mean": -62.91341018676758, + "KL/std": 41.81461715698242, + "epoch": 0.16152716593245228, + "fcm_dpo/beta": 0.013565946370363235, + "fcm_dpo/delta": -0.12388351559638977, + "fcm_dpo/margin": 37.83190155029297, + "fcm_dpo/q_t": 0.386931836605072, + "grad_norm": 19.18947982788086, + "learning_rate": 4.947482930773511e-07, + "logits/chosen": -0.6101734638214111, + "logits/rejected": -0.5731357932090759, + "logps/chosen": -83.83519744873047, + "logps/ref_chosen": -58.753684997558594, + "logps/ref_rejected": -79.75001525878906, + "logps/rejected": -142.66342163085938, + "loss": 1.0508, + "margin_dpo/margin_mean": 37.83190155029297, + "margin_dpo/margin_std": 50.25776672363281, + "step": 110 + }, + { + "KL/chosen_KL_mean": -28.801855087280273, + "KL/mean": -50.240867614746094, + "KL/rejected_KL_mean": -71.67987823486328, + "KL/std": 47.443092346191406, + "epoch": 0.16299559471365638, + "fcm_dpo/beta": 0.01321389153599739, + "fcm_dpo/delta": -0.17823287844657898, + "fcm_dpo/margin": 42.87803649902344, + "fcm_dpo/q_t": 0.3765709400177002, + "grad_norm": 22.14818000793457, + "learning_rate": 4.944834074412042e-07, + "logits/chosen": -0.6801958084106445, + "logits/rejected": -0.6596289873123169, + "logps/chosen": -97.42596435546875, + "logps/ref_chosen": -68.62410736083984, + "logps/ref_rejected": -98.42886352539062, + "logps/rejected": -170.10873413085938, + "loss": 1.0313, + "margin_dpo/margin_mean": 42.87803268432617, + "margin_dpo/margin_std": 58.28101348876953, + "step": 111 + }, + { + "KL/chosen_KL_mean": -26.53110122680664, + "KL/mean": -40.25721740722656, + "KL/rejected_KL_mean": -53.983333587646484, + "KL/std": 33.112037658691406, + "epoch": 0.1644640234948605, + "fcm_dpo/beta": 0.013187635689973831, + "fcm_dpo/delta": 0.039192065596580505, + "fcm_dpo/margin": 27.45223617553711, + "fcm_dpo/q_t": 0.41697975993156433, + "grad_norm": 20.016754150390625, + "learning_rate": 4.942120794399002e-07, + "logits/chosen": -0.6283408999443054, + "logits/rejected": -0.5890357494354248, + "logps/chosen": -76.78074645996094, + "logps/ref_chosen": -50.24964141845703, + "logps/ref_rejected": -64.77442932128906, + "logps/rejected": -118.75776672363281, + "loss": 1.1236, + "margin_dpo/margin_mean": 27.452232360839844, + "margin_dpo/margin_std": 39.88359069824219, + "step": 112 + }, + { + "KL/chosen_KL_mean": -33.448238372802734, + "KL/mean": -48.20136642456055, + "KL/rejected_KL_mean": -62.954490661621094, + "KL/std": 34.57713317871094, + "epoch": 0.16593245227606462, + "fcm_dpo/beta": 0.013288527727127075, + "fcm_dpo/delta": 0.008226404897868633, + "fcm_dpo/margin": 29.506244659423828, + "fcm_dpo/q_t": 0.4095924198627472, + "grad_norm": 20.27177619934082, + "learning_rate": 4.939343162231841e-07, + "logits/chosen": -0.6053781509399414, + "logits/rejected": -0.5618308782577515, + "logps/chosen": -100.16119384765625, + "logps/ref_chosen": -66.71295166015625, + "logps/ref_rejected": -77.96870422363281, + "logps/rejected": -140.92318725585938, + "loss": 1.0924, + "margin_dpo/margin_mean": 29.506242752075195, + "margin_dpo/margin_std": 38.19648742675781, + "step": 113 + }, + { + "KL/chosen_KL_mean": -30.597002029418945, + "KL/mean": -54.11078643798828, + "KL/rejected_KL_mean": -77.62455749511719, + "KL/std": 51.99464797973633, + "epoch": 0.16740088105726872, + "fcm_dpo/beta": 0.012787006795406342, + "fcm_dpo/delta": -0.21655288338661194, + "fcm_dpo/margin": 47.027565002441406, + "fcm_dpo/q_t": 0.37409037351608276, + "grad_norm": 21.58888053894043, + "learning_rate": 4.936501251103751e-07, + "logits/chosen": -0.6126964092254639, + "logits/rejected": -0.5797730684280396, + "logps/chosen": -88.382080078125, + "logps/ref_chosen": -57.78507995605469, + "logps/ref_rejected": -87.10966491699219, + "logps/rejected": -164.73422241210938, + "loss": 0.9983, + "margin_dpo/margin_mean": 47.027557373046875, + "margin_dpo/margin_std": 63.04608154296875, + "step": 114 + }, + { + "KL/chosen_KL_mean": -40.25880813598633, + "KL/mean": -56.40677261352539, + "KL/rejected_KL_mean": -72.55473327636719, + "KL/std": 49.674827575683594, + "epoch": 0.16886930983847284, + "fcm_dpo/beta": 0.012744484469294548, + "fcm_dpo/delta": -0.012245994061231613, + "fcm_dpo/margin": 32.29592514038086, + "fcm_dpo/q_t": 0.4138960838317871, + "grad_norm": 28.310956954956055, + "learning_rate": 4.933595135901732e-07, + "logits/chosen": -0.6323011517524719, + "logits/rejected": -0.6113392114639282, + "logps/chosen": -105.84144592285156, + "logps/ref_chosen": -65.5826416015625, + "logps/ref_rejected": -98.56552124023438, + "logps/rejected": -171.12025451660156, + "loss": 1.1653, + "margin_dpo/margin_mean": 32.29592514038086, + "margin_dpo/margin_std": 65.29641723632812, + "step": 115 + }, + { + "KL/chosen_KL_mean": -30.069929122924805, + "KL/mean": -47.952064514160156, + "KL/rejected_KL_mean": -65.83419799804688, + "KL/std": 41.692527770996094, + "epoch": 0.17033773861967694, + "fcm_dpo/beta": 0.012649480253458023, + "fcm_dpo/delta": -0.055032700300216675, + "fcm_dpo/margin": 35.764259338378906, + "fcm_dpo/q_t": 0.39788171648979187, + "grad_norm": 22.138639450073242, + "learning_rate": 4.930624893204624e-07, + "logits/chosen": -0.6037485599517822, + "logits/rejected": -0.5925810933113098, + "logps/chosen": -81.47024536132812, + "logps/ref_chosen": -51.40031433105469, + "logps/ref_rejected": -80.5218505859375, + "logps/rejected": -146.35604858398438, + "loss": 1.0586, + "margin_dpo/margin_mean": 35.76426315307617, + "margin_dpo/margin_std": 45.07810974121094, + "step": 116 + }, + { + "KL/chosen_KL_mean": -38.60306930541992, + "KL/mean": -53.26951217651367, + "KL/rejected_KL_mean": -67.93594360351562, + "KL/std": 44.515228271484375, + "epoch": 0.17180616740088106, + "fcm_dpo/beta": 0.012619540095329285, + "fcm_dpo/delta": 0.030971404165029526, + "fcm_dpo/margin": 29.332874298095703, + "fcm_dpo/q_t": 0.41701966524124146, + "grad_norm": 27.81166648864746, + "learning_rate": 4.927590601281083e-07, + "logits/chosen": -0.5994728803634644, + "logits/rejected": -0.5634751319885254, + "logps/chosen": -107.90147399902344, + "logps/ref_chosen": -69.29840850830078, + "logps/ref_rejected": -66.583984375, + "logps/rejected": -134.51992797851562, + "loss": 1.1423, + "margin_dpo/margin_mean": 29.332870483398438, + "margin_dpo/margin_std": 50.427330017089844, + "step": 117 + }, + { + "KL/chosen_KL_mean": -30.75716781616211, + "KL/mean": -48.66988754272461, + "KL/rejected_KL_mean": -66.58258819580078, + "KL/std": 40.41304397583008, + "epoch": 0.17327459618208516, + "fcm_dpo/beta": 0.01254614070057869, + "fcm_dpo/delta": -0.05177786946296692, + "fcm_dpo/margin": 35.82543182373047, + "fcm_dpo/q_t": 0.3989246189594269, + "grad_norm": 21.09397315979004, + "learning_rate": 4.924492340087524e-07, + "logits/chosen": -0.6358990669250488, + "logits/rejected": -0.6165393590927124, + "logps/chosen": -86.39814758300781, + "logps/ref_chosen": -55.6409797668457, + "logps/ref_rejected": -75.66905975341797, + "logps/rejected": -142.25164794921875, + "loss": 1.0623, + "margin_dpo/margin_mean": 35.82543182373047, + "margin_dpo/margin_std": 46.26477813720703, + "step": 118 + }, + { + "KL/chosen_KL_mean": -42.31685256958008, + "KL/mean": -60.063663482666016, + "KL/rejected_KL_mean": -77.81047058105469, + "KL/std": 45.433780670166016, + "epoch": 0.17474302496328928, + "fcm_dpo/beta": 0.012348956428468227, + "fcm_dpo/delta": -0.04121140390634537, + "fcm_dpo/margin": 35.49361801147461, + "fcm_dpo/q_t": 0.40484434366226196, + "grad_norm": 23.629602432250977, + "learning_rate": 4.92133019126601e-07, + "logits/chosen": -0.6368391513824463, + "logits/rejected": -0.6250006556510925, + "logps/chosen": -115.8270492553711, + "logps/ref_chosen": -73.51019287109375, + "logps/ref_rejected": -102.977294921875, + "logps/rejected": -180.7877655029297, + "loss": 1.1009, + "margin_dpo/margin_mean": 35.49361801147461, + "margin_dpo/margin_std": 55.28799057006836, + "step": 119 + }, + { + "KL/chosen_KL_mean": -43.77620315551758, + "KL/mean": -68.10749816894531, + "KL/rejected_KL_mean": -92.43879699707031, + "KL/std": 56.72157287597656, + "epoch": 0.1762114537444934, + "fcm_dpo/beta": 0.012037184089422226, + "fcm_dpo/delta": -0.197471484541893, + "fcm_dpo/margin": 48.66258239746094, + "fcm_dpo/q_t": 0.3716173470020294, + "grad_norm": 22.18862533569336, + "learning_rate": 4.918104238142103e-07, + "logits/chosen": -0.614201009273529, + "logits/rejected": -0.5810754299163818, + "logps/chosen": -120.55703735351562, + "logps/ref_chosen": -76.78083801269531, + "logps/ref_rejected": -108.02374267578125, + "logps/rejected": -200.4625244140625, + "loss": 0.9948, + "margin_dpo/margin_mean": 48.66257858276367, + "margin_dpo/margin_std": 59.4996223449707, + "step": 120 + }, + { + "KL/chosen_KL_mean": -42.099632263183594, + "KL/mean": -69.11078643798828, + "KL/rejected_KL_mean": -96.12193298339844, + "KL/std": 55.44459915161133, + "epoch": 0.1776798825256975, + "fcm_dpo/beta": 0.011453816667199135, + "fcm_dpo/delta": -0.2350277304649353, + "fcm_dpo/margin": 54.022308349609375, + "fcm_dpo/q_t": 0.3670397102832794, + "grad_norm": 24.160505294799805, + "learning_rate": 4.91481456572267e-07, + "logits/chosen": -0.5971692800521851, + "logits/rejected": -0.5936212539672852, + "logps/chosen": -103.8895263671875, + "logps/ref_chosen": -61.789894104003906, + "logps/ref_rejected": -109.99456787109375, + "logps/rejected": -206.11651611328125, + "loss": 0.9958, + "margin_dpo/margin_mean": 54.022308349609375, + "margin_dpo/margin_std": 69.530029296875, + "step": 121 + }, + { + "KL/chosen_KL_mean": -38.35765838623047, + "KL/mean": -72.02163696289062, + "KL/rejected_KL_mean": -105.68560791015625, + "KL/std": 63.397361755371094, + "epoch": 0.17914831130690162, + "fcm_dpo/beta": 0.010804468765854836, + "fcm_dpo/delta": -0.35293835401535034, + "fcm_dpo/margin": 67.32794952392578, + "fcm_dpo/q_t": 0.3403121829032898, + "grad_norm": 23.03303337097168, + "learning_rate": 4.911461260693638e-07, + "logits/chosen": -0.5504162311553955, + "logits/rejected": -0.5663501024246216, + "logps/chosen": -85.25987243652344, + "logps/ref_chosen": -46.9022102355957, + "logps/ref_rejected": -106.71418762207031, + "logps/rejected": -212.39981079101562, + "loss": 0.893, + "margin_dpo/margin_mean": 67.32794952392578, + "margin_dpo/margin_std": 66.18780517578125, + "step": 122 + }, + { + "KL/chosen_KL_mean": -42.53881072998047, + "KL/mean": -64.56620025634766, + "KL/rejected_KL_mean": -86.59358215332031, + "KL/std": 54.408897399902344, + "epoch": 0.18061674008810572, + "fcm_dpo/beta": 0.010443691164255142, + "fcm_dpo/delta": -0.06393568962812424, + "fcm_dpo/margin": 44.054771423339844, + "fcm_dpo/q_t": 0.401348352432251, + "grad_norm": 21.118499755859375, + "learning_rate": 4.908044411417711e-07, + "logits/chosen": -0.5548320412635803, + "logits/rejected": -0.537066638469696, + "logps/chosen": -103.87744903564453, + "logps/ref_chosen": -61.33863830566406, + "logps/ref_rejected": -87.775390625, + "logps/rejected": -174.36898803710938, + "loss": 1.1089, + "margin_dpo/margin_mean": 44.054771423339844, + "margin_dpo/margin_std": 73.05723571777344, + "step": 123 + }, + { + "KL/chosen_KL_mean": -48.416603088378906, + "KL/mean": -80.89775085449219, + "KL/rejected_KL_mean": -113.37890625, + "KL/std": 74.14457702636719, + "epoch": 0.18208516886930984, + "fcm_dpo/beta": 0.010025800205767155, + "fcm_dpo/delta": -0.26980358362197876, + "fcm_dpo/margin": 64.96229553222656, + "fcm_dpo/q_t": 0.3692609667778015, + "grad_norm": 26.101245880126953, + "learning_rate": 4.904564107932048e-07, + "logits/chosen": -0.5674383640289307, + "logits/rejected": -0.5700336694717407, + "logps/chosen": -119.86493682861328, + "logps/ref_chosen": -71.44833374023438, + "logps/ref_rejected": -117.58056640625, + "logps/rejected": -230.95947265625, + "loss": 1.0158, + "margin_dpo/margin_mean": 64.96229553222656, + "margin_dpo/margin_std": 93.43049621582031, + "step": 124 + }, + { + "KL/chosen_KL_mean": -39.75640106201172, + "KL/mean": -68.591796875, + "KL/rejected_KL_mean": -97.42720031738281, + "KL/std": 62.443931579589844, + "epoch": 0.18355359765051396, + "fcm_dpo/beta": 0.009690500795841217, + "fcm_dpo/delta": -0.16801846027374268, + "fcm_dpo/margin": 57.67080307006836, + "fcm_dpo/q_t": 0.3799913227558136, + "grad_norm": 19.212617874145508, + "learning_rate": 4.90102044194588e-07, + "logits/chosen": -0.5034211874008179, + "logits/rejected": -0.5046522617340088, + "logps/chosen": -89.89334106445312, + "logps/ref_chosen": -50.136940002441406, + "logps/ref_rejected": -83.98861694335938, + "logps/rejected": -181.4158172607422, + "loss": 1.0263, + "margin_dpo/margin_mean": 57.670806884765625, + "margin_dpo/margin_std": 77.20497131347656, + "step": 125 + }, + { + "KL/chosen_KL_mean": -43.16718292236328, + "KL/mean": -70.08540344238281, + "KL/rejected_KL_mean": -97.00362396240234, + "KL/std": 56.352882385253906, + "epoch": 0.18502202643171806, + "fcm_dpo/beta": 0.009411858394742012, + "fcm_dpo/delta": -0.11267369985580444, + "fcm_dpo/margin": 53.836448669433594, + "fcm_dpo/q_t": 0.38875728845596313, + "grad_norm": 20.403451919555664, + "learning_rate": 4.897413506838102e-07, + "logits/chosen": -0.5015150308609009, + "logits/rejected": -0.4946970045566559, + "logps/chosen": -98.8342514038086, + "logps/ref_chosen": -55.66706848144531, + "logps/ref_rejected": -98.1297607421875, + "logps/rejected": -195.13339233398438, + "loss": 1.043, + "margin_dpo/margin_mean": 53.836448669433594, + "margin_dpo/margin_std": 71.72261810302734, + "step": 126 + }, + { + "KL/chosen_KL_mean": -41.83540344238281, + "KL/mean": -61.58380889892578, + "KL/rejected_KL_mean": -81.33221435546875, + "KL/std": 49.37230682373047, + "epoch": 0.18649045521292218, + "fcm_dpo/beta": 0.009420674294233322, + "fcm_dpo/delta": 0.028969500213861465, + "fcm_dpo/margin": 39.496803283691406, + "fcm_dpo/q_t": 0.4141026735305786, + "grad_norm": 20.757905960083008, + "learning_rate": 4.89374339765481e-07, + "logits/chosen": -0.5394914150238037, + "logits/rejected": -0.5201703310012817, + "logps/chosen": -98.39008331298828, + "logps/ref_chosen": -56.55467987060547, + "logps/ref_rejected": -76.7957763671875, + "logps/rejected": -158.12799072265625, + "loss": 1.1294, + "margin_dpo/margin_mean": 39.496803283691406, + "margin_dpo/margin_std": 62.23881149291992, + "step": 127 + }, + { + "KL/chosen_KL_mean": -44.993072509765625, + "KL/mean": -65.68038177490234, + "KL/rejected_KL_mean": -86.36769104003906, + "KL/std": 58.315940856933594, + "epoch": 0.18795888399412627, + "fcm_dpo/beta": 0.009480522945523262, + "fcm_dpo/delta": 0.007658433169126511, + "fcm_dpo/margin": 41.37461853027344, + "fcm_dpo/q_t": 0.41293513774871826, + "grad_norm": 29.94881248474121, + "learning_rate": 4.890010211106795e-07, + "logits/chosen": -0.5278192162513733, + "logits/rejected": -0.5076951384544373, + "logps/chosen": -103.11402893066406, + "logps/ref_chosen": -58.12095642089844, + "logps/ref_rejected": -76.43896484375, + "logps/rejected": -162.806640625, + "loss": 1.143, + "margin_dpo/margin_mean": 41.37461853027344, + "margin_dpo/margin_std": 72.8564453125, + "step": 128 + }, + { + "KL/chosen_KL_mean": -54.26958465576172, + "KL/mean": -76.15081024169922, + "KL/rejected_KL_mean": -98.03204345703125, + "KL/std": 64.25934600830078, + "epoch": 0.1894273127753304, + "fcm_dpo/beta": 0.009432371705770493, + "fcm_dpo/delta": -0.013336148113012314, + "fcm_dpo/margin": 43.762451171875, + "fcm_dpo/q_t": 0.4137777090072632, + "grad_norm": 20.718914031982422, + "learning_rate": 4.88621404556699e-07, + "logits/chosen": -0.5499258637428284, + "logits/rejected": -0.539750337600708, + "logps/chosen": -121.18595886230469, + "logps/ref_chosen": -66.91637420654297, + "logps/ref_rejected": -96.6422119140625, + "logps/rejected": -194.67425537109375, + "loss": 1.1482, + "margin_dpo/margin_mean": 43.762454986572266, + "margin_dpo/margin_std": 83.50243377685547, + "step": 129 + }, + { + "KL/chosen_KL_mean": -40.19133758544922, + "KL/mean": -73.26377868652344, + "KL/rejected_KL_mean": -106.33623504638672, + "KL/std": 66.14134216308594, + "epoch": 0.19089574155653452, + "fcm_dpo/beta": 0.009208977222442627, + "fcm_dpo/delta": -0.22258631885051727, + "fcm_dpo/margin": 66.1448974609375, + "fcm_dpo/q_t": 0.36926034092903137, + "grad_norm": 21.168210983276367, + "learning_rate": 4.882355001067891e-07, + "logits/chosen": -0.48520392179489136, + "logits/rejected": -0.47909384965896606, + "logps/chosen": -84.85818481445312, + "logps/ref_chosen": -44.66685104370117, + "logps/ref_rejected": -82.78165435791016, + "logps/rejected": -189.11788940429688, + "loss": 0.9951, + "margin_dpo/margin_mean": 66.1448974609375, + "margin_dpo/margin_std": 80.59586334228516, + "step": 130 + }, + { + "KL/chosen_KL_mean": -36.06732177734375, + "KL/mean": -69.21546173095703, + "KL/rejected_KL_mean": -102.36358642578125, + "KL/std": 65.6218490600586, + "epoch": 0.19236417033773862, + "fcm_dpo/beta": 0.008744207210838795, + "fcm_dpo/delta": -0.19108328223228455, + "fcm_dpo/margin": 66.29625701904297, + "fcm_dpo/q_t": 0.3681938648223877, + "grad_norm": 28.092988967895508, + "learning_rate": 4.878433179298909e-07, + "logits/chosen": -0.48882123827934265, + "logits/rejected": -0.4953649342060089, + "logps/chosen": -80.99191284179688, + "logps/ref_chosen": -44.924591064453125, + "logps/ref_rejected": -88.44401550292969, + "logps/rejected": -190.80758666992188, + "loss": 0.9777, + "margin_dpo/margin_mean": 66.29625701904297, + "margin_dpo/margin_std": 71.81926727294922, + "step": 131 + }, + { + "KL/chosen_KL_mean": -48.046173095703125, + "KL/mean": -75.12873840332031, + "KL/rejected_KL_mean": -102.2113037109375, + "KL/std": 65.57552337646484, + "epoch": 0.19383259911894274, + "fcm_dpo/beta": 0.00856691226363182, + "fcm_dpo/delta": -0.06736327707767487, + "fcm_dpo/margin": 54.165130615234375, + "fcm_dpo/q_t": 0.4011274576187134, + "grad_norm": 19.968385696411133, + "learning_rate": 4.874448683603694e-07, + "logits/chosen": -0.5298300385475159, + "logits/rejected": -0.5281400680541992, + "logps/chosen": -107.04725646972656, + "logps/ref_chosen": -59.00108337402344, + "logps/ref_rejected": -87.89215087890625, + "logps/rejected": -190.10345458984375, + "loss": 1.0883, + "margin_dpo/margin_mean": 54.165130615234375, + "margin_dpo/margin_std": 85.23381042480469, + "step": 132 + }, + { + "KL/chosen_KL_mean": -56.799015045166016, + "KL/mean": -80.92729187011719, + "KL/rejected_KL_mean": -105.05557250976562, + "KL/std": 59.583778381347656, + "epoch": 0.19530102790014683, + "fcm_dpo/beta": 0.008522960357367992, + "fcm_dpo/delta": -0.011873488314449787, + "fcm_dpo/margin": 48.256553649902344, + "fcm_dpo/q_t": 0.40982773900032043, + "grad_norm": 25.56277847290039, + "learning_rate": 4.870401618977415e-07, + "logits/chosen": -0.506017804145813, + "logits/rejected": -0.4915475845336914, + "logps/chosen": -123.40351867675781, + "logps/ref_chosen": -66.60449981689453, + "logps/ref_rejected": -96.33355712890625, + "logps/rejected": -201.38912963867188, + "loss": 1.109, + "margin_dpo/margin_mean": 48.256561279296875, + "margin_dpo/margin_std": 75.63455200195312, + "step": 133 + }, + { + "KL/chosen_KL_mean": -44.83940505981445, + "KL/mean": -70.63905334472656, + "KL/rejected_KL_mean": -96.43869018554688, + "KL/std": 57.99497604370117, + "epoch": 0.19676945668135096, + "fcm_dpo/beta": 0.00851006992161274, + "fcm_dpo/delta": -0.040991440415382385, + "fcm_dpo/margin": 51.59928894042969, + "fcm_dpo/q_t": 0.4014682173728943, + "grad_norm": 18.941587448120117, + "learning_rate": 4.866292092063986e-07, + "logits/chosen": -0.49966758489608765, + "logits/rejected": -0.4865725636482239, + "logps/chosen": -96.90866088867188, + "logps/ref_chosen": -52.06925582885742, + "logps/ref_rejected": -87.6545181274414, + "logps/rejected": -184.09320068359375, + "loss": 1.0664, + "margin_dpo/margin_mean": 51.59928894042969, + "margin_dpo/margin_std": 67.03329467773438, + "step": 134 + }, + { + "KL/chosen_KL_mean": -49.720855712890625, + "KL/mean": -86.716064453125, + "KL/rejected_KL_mean": -123.71127319335938, + "KL/std": 76.8663330078125, + "epoch": 0.19823788546255505, + "fcm_dpo/beta": 0.008192040026187897, + "fcm_dpo/delta": -0.21948286890983582, + "fcm_dpo/margin": 73.99042510986328, + "fcm_dpo/q_t": 0.37052643299102783, + "grad_norm": 22.25084686279297, + "learning_rate": 4.862120211153265e-07, + "logits/chosen": -0.4897315800189972, + "logits/rejected": -0.5235172510147095, + "logps/chosen": -100.07471466064453, + "logps/ref_chosen": -50.353858947753906, + "logps/ref_rejected": -115.97975158691406, + "logps/rejected": -239.69102478027344, + "loss": 0.9939, + "margin_dpo/margin_mean": 73.99043273925781, + "margin_dpo/margin_std": 92.94728088378906, + "step": 135 + }, + { + "KL/chosen_KL_mean": -59.06098175048828, + "KL/mean": -83.65766906738281, + "KL/rejected_KL_mean": -108.25436401367188, + "KL/std": 69.63789367675781, + "epoch": 0.19970631424375918, + "fcm_dpo/beta": 0.008057507686316967, + "fcm_dpo/delta": 0.003085322678089142, + "fcm_dpo/margin": 49.19337844848633, + "fcm_dpo/q_t": 0.4188251495361328, + "grad_norm": 20.361692428588867, + "learning_rate": 4.857886086178193e-07, + "logits/chosen": -0.48390525579452515, + "logits/rejected": -0.4752395749092102, + "logps/chosen": -124.13349151611328, + "logps/ref_chosen": -65.072509765625, + "logps/ref_rejected": -96.32122802734375, + "logps/rejected": -204.57559204101562, + "loss": 1.1425, + "margin_dpo/margin_mean": 49.193382263183594, + "margin_dpo/margin_std": 89.2242431640625, + "step": 136 + }, + { + "KL/chosen_KL_mean": -56.935489654541016, + "KL/mean": -95.57328796386719, + "KL/rejected_KL_mean": -134.21109008789062, + "KL/std": 92.69071960449219, + "epoch": 0.2011747430249633, + "fcm_dpo/beta": 0.007838413119316101, + "fcm_dpo/delta": -0.21924690902233124, + "fcm_dpo/margin": 77.27558898925781, + "fcm_dpo/q_t": 0.37673407793045044, + "grad_norm": 18.20696258544922, + "learning_rate": 4.853589828711902e-07, + "logits/chosen": -0.45655950903892517, + "logits/rejected": -0.48352983593940735, + "logps/chosen": -105.69461059570312, + "logps/ref_chosen": -48.759117126464844, + "logps/ref_rejected": -113.86376953125, + "logps/rejected": -248.07485961914062, + "loss": 1.025, + "margin_dpo/margin_mean": 77.27558898925781, + "margin_dpo/margin_std": 110.96575927734375, + "step": 137 + }, + { + "KL/chosen_KL_mean": -59.1551399230957, + "KL/mean": -88.54357147216797, + "KL/rejected_KL_mean": -117.9320068359375, + "KL/std": 70.5914306640625, + "epoch": 0.2026431718061674, + "fcm_dpo/beta": 0.00770821887999773, + "fcm_dpo/delta": -0.055549122393131256, + "fcm_dpo/margin": 58.77688217163086, + "fcm_dpo/q_t": 0.3963842988014221, + "grad_norm": 21.46184730529785, + "learning_rate": 4.849231551964771e-07, + "logits/chosen": -0.4387979507446289, + "logits/rejected": -0.4272313714027405, + "logps/chosen": -119.67478942871094, + "logps/ref_chosen": -60.519649505615234, + "logps/ref_rejected": -93.19694519042969, + "logps/rejected": -211.12896728515625, + "loss": 1.0551, + "margin_dpo/margin_mean": 58.77688217163086, + "margin_dpo/margin_std": 72.2972640991211, + "step": 138 + }, + { + "KL/chosen_KL_mean": -49.92842483520508, + "KL/mean": -85.01356506347656, + "KL/rejected_KL_mean": -120.09870910644531, + "KL/std": 67.0115737915039, + "epoch": 0.20411160058737152, + "fcm_dpo/beta": 0.007536326535046101, + "fcm_dpo/delta": -0.13581906259059906, + "fcm_dpo/margin": 70.17027282714844, + "fcm_dpo/q_t": 0.3828091025352478, + "grad_norm": 18.216188430786133, + "learning_rate": 4.844811370781446e-07, + "logits/chosen": -0.4372691512107849, + "logits/rejected": -0.42744114995002747, + "logps/chosen": -96.81980895996094, + "logps/ref_chosen": -46.89138412475586, + "logps/ref_rejected": -79.72798156738281, + "logps/rejected": -199.82669067382812, + "loss": 1.0186, + "margin_dpo/margin_mean": 70.17028045654297, + "margin_dpo/margin_std": 86.85481262207031, + "step": 139 + }, + { + "KL/chosen_KL_mean": -60.878379821777344, + "KL/mean": -92.82592010498047, + "KL/rejected_KL_mean": -124.77346801757812, + "KL/std": 74.88318634033203, + "epoch": 0.2055800293685756, + "fcm_dpo/beta": 0.0073799854144454, + "fcm_dpo/delta": -0.07513369619846344, + "fcm_dpo/margin": 63.89509582519531, + "fcm_dpo/q_t": 0.3952398896217346, + "grad_norm": 21.68344497680664, + "learning_rate": 4.840329401637809e-07, + "logits/chosen": -0.4220992922782898, + "logits/rejected": -0.40758657455444336, + "logps/chosen": -119.85308837890625, + "logps/ref_chosen": -58.97471618652344, + "logps/ref_rejected": -83.28410339355469, + "logps/rejected": -208.05758666992188, + "loss": 1.0673, + "margin_dpo/margin_mean": 63.89509201049805, + "margin_dpo/margin_std": 89.70909118652344, + "step": 140 + }, + { + "KL/chosen_KL_mean": -68.69772338867188, + "KL/mean": -98.87570190429688, + "KL/rejected_KL_mean": -129.05369567871094, + "KL/std": 82.59878540039062, + "epoch": 0.20704845814977973, + "fcm_dpo/beta": 0.007316044997423887, + "fcm_dpo/delta": -0.04346423223614693, + "fcm_dpo/margin": 60.355979919433594, + "fcm_dpo/q_t": 0.40099745988845825, + "grad_norm": 26.641067504882812, + "learning_rate": 4.83578576263792e-07, + "logits/chosen": -0.43964171409606934, + "logits/rejected": -0.42764222621917725, + "logps/chosen": -143.77337646484375, + "logps/ref_chosen": -75.07566833496094, + "logps/ref_rejected": -98.1922607421875, + "logps/rejected": -227.24595642089844, + "loss": 1.1025, + "margin_dpo/margin_mean": 60.355979919433594, + "margin_dpo/margin_std": 95.37814331054688, + "step": 141 + }, + { + "KL/chosen_KL_mean": -70.7205581665039, + "KL/mean": -105.15746307373047, + "KL/rejected_KL_mean": -139.5943603515625, + "KL/std": 90.38099670410156, + "epoch": 0.20851688693098386, + "fcm_dpo/beta": 0.0072142817080020905, + "fcm_dpo/delta": -0.10188804566860199, + "fcm_dpo/margin": 68.87380981445312, + "fcm_dpo/q_t": 0.39290913939476013, + "grad_norm": 28.223947525024414, + "learning_rate": 4.83118057351089e-07, + "logits/chosen": -0.4045884907245636, + "logits/rejected": -0.40342068672180176, + "logps/chosen": -128.7484893798828, + "logps/ref_chosen": -58.027931213378906, + "logps/ref_rejected": -94.58222961425781, + "logps/rejected": -234.1765899658203, + "loss": 1.0857, + "margin_dpo/margin_mean": 68.87380981445312, + "margin_dpo/margin_std": 106.27733612060547, + "step": 142 + }, + { + "KL/chosen_KL_mean": -73.88005065917969, + "KL/mean": -95.9843521118164, + "KL/rejected_KL_mean": -118.08866119384766, + "KL/std": 79.61582946777344, + "epoch": 0.20998531571218795, + "fcm_dpo/beta": 0.007206078618764877, + "fcm_dpo/delta": 0.08411475270986557, + "fcm_dpo/margin": 44.20860290527344, + "fcm_dpo/q_t": 0.4320971667766571, + "grad_norm": 23.867572784423828, + "learning_rate": 4.826513955607734e-07, + "logits/chosen": -0.40776681900024414, + "logits/rejected": -0.4014623761177063, + "logps/chosen": -131.47650146484375, + "logps/ref_chosen": -57.59645080566406, + "logps/ref_rejected": -78.99957275390625, + "logps/rejected": -197.08822631835938, + "loss": 1.1973, + "margin_dpo/margin_mean": 44.20860290527344, + "margin_dpo/margin_std": 93.17940521240234, + "step": 143 + }, + { + "KL/chosen_KL_mean": -65.05524444580078, + "KL/mean": -92.26278686523438, + "KL/rejected_KL_mean": -119.47032165527344, + "KL/std": 66.92149353027344, + "epoch": 0.21145374449339208, + "fcm_dpo/beta": 0.007265343330800533, + "fcm_dpo/delta": 0.004837200976908207, + "fcm_dpo/margin": 54.41508483886719, + "fcm_dpo/q_t": 0.41054314374923706, + "grad_norm": 20.957622528076172, + "learning_rate": 4.821786031898176e-07, + "logits/chosen": -0.3970365524291992, + "logits/rejected": -0.3828931450843811, + "logps/chosen": -124.96160888671875, + "logps/ref_chosen": -59.90636444091797, + "logps/ref_rejected": -82.00025939941406, + "logps/rejected": -201.4705810546875, + "loss": 1.1054, + "margin_dpo/margin_mean": 54.41508483886719, + "margin_dpo/margin_std": 78.49324035644531, + "step": 144 + }, + { + "KL/chosen_KL_mean": -62.5151252746582, + "KL/mean": -91.44053649902344, + "KL/rejected_KL_mean": -120.36595153808594, + "KL/std": 66.93643188476562, + "epoch": 0.21292217327459617, + "fcm_dpo/beta": 0.007246987894177437, + "fcm_dpo/delta": -0.020106535404920578, + "fcm_dpo/margin": 57.850833892822266, + "fcm_dpo/q_t": 0.40498581528663635, + "grad_norm": 24.766576766967773, + "learning_rate": 4.816996926967401e-07, + "logits/chosen": -0.4346858263015747, + "logits/rejected": -0.4181862473487854, + "logps/chosen": -119.11579132080078, + "logps/ref_chosen": -56.60066604614258, + "logps/ref_rejected": -77.86631774902344, + "logps/rejected": -198.23226928710938, + "loss": 1.0899, + "margin_dpo/margin_mean": 57.850833892822266, + "margin_dpo/margin_std": 81.18896484375, + "step": 145 + }, + { + "KL/chosen_KL_mean": -84.92266082763672, + "KL/mean": -107.23759460449219, + "KL/rejected_KL_mean": -129.55252075195312, + "KL/std": 72.51426696777344, + "epoch": 0.2143906020558003, + "fcm_dpo/beta": 0.007306361570954323, + "fcm_dpo/delta": 0.07646190375089645, + "fcm_dpo/margin": 44.6298828125, + "fcm_dpo/q_t": 0.4260821044445038, + "grad_norm": 26.79239273071289, + "learning_rate": 4.812146767012779e-07, + "logits/chosen": -0.40483713150024414, + "logits/rejected": -0.3770599961280823, + "logps/chosen": -150.92311096191406, + "logps/ref_chosen": -66.00045013427734, + "logps/ref_rejected": -81.70278930664062, + "logps/rejected": -211.25531005859375, + "loss": 1.1855, + "margin_dpo/margin_mean": 44.6298828125, + "margin_dpo/margin_std": 87.585693359375, + "step": 146 + }, + { + "KL/chosen_KL_mean": -62.145790100097656, + "KL/mean": -92.88223266601562, + "KL/rejected_KL_mean": -123.61865997314453, + "KL/std": 73.63186645507812, + "epoch": 0.21585903083700442, + "fcm_dpo/beta": 0.007286765147000551, + "fcm_dpo/delta": -0.050357475876808167, + "fcm_dpo/margin": 61.472869873046875, + "fcm_dpo/q_t": 0.40113240480422974, + "grad_norm": 20.187551498413086, + "learning_rate": 4.807235679840536e-07, + "logits/chosen": -0.4601389765739441, + "logits/rejected": -0.4417986273765564, + "logps/chosen": -115.55127716064453, + "logps/ref_chosen": -53.405487060546875, + "logps/ref_rejected": -71.39060974121094, + "logps/rejected": -195.00927734375, + "loss": 1.0871, + "margin_dpo/margin_mean": 61.472869873046875, + "margin_dpo/margin_std": 91.1969985961914, + "step": 147 + }, + { + "KL/chosen_KL_mean": -60.82073211669922, + "KL/mean": -86.87813568115234, + "KL/rejected_KL_mean": -112.935546875, + "KL/std": 72.8238296508789, + "epoch": 0.2173274596182085, + "fcm_dpo/beta": 0.007221372798085213, + "fcm_dpo/delta": -0.08385775983333588, + "fcm_dpo/margin": 52.114810943603516, + "fcm_dpo/q_t": 0.41665685176849365, + "grad_norm": 18.90130043029785, + "learning_rate": 4.802263794862384e-07, + "logits/chosen": -0.4921185076236725, + "logits/rejected": -0.4849007725715637, + "logps/chosen": -125.7578125, + "logps/ref_chosen": -64.93708038330078, + "logps/ref_rejected": -103.09384155273438, + "logps/rejected": -216.02938842773438, + "loss": 1.1251, + "margin_dpo/margin_mean": 52.114810943603516, + "margin_dpo/margin_std": 76.679443359375, + "step": 148 + }, + { + "KL/chosen_KL_mean": -57.94389724731445, + "KL/mean": -90.5237045288086, + "KL/rejected_KL_mean": -123.103515625, + "KL/std": 65.8777847290039, + "epoch": 0.21879588839941264, + "fcm_dpo/beta": 0.0070372275076806545, + "fcm_dpo/delta": -0.0631796270608902, + "fcm_dpo/margin": 65.15960693359375, + "fcm_dpo/q_t": 0.3951166570186615, + "grad_norm": 18.19388198852539, + "learning_rate": 4.797231243092118e-07, + "logits/chosen": -0.49327534437179565, + "logits/rejected": -0.47827810049057007, + "logps/chosen": -116.41766357421875, + "logps/ref_chosen": -58.47376251220703, + "logps/ref_rejected": -99.31474304199219, + "logps/rejected": -222.4182586669922, + "loss": 1.0541, + "margin_dpo/margin_mean": 65.15960693359375, + "margin_dpo/margin_std": 78.02362060546875, + "step": 149 + }, + { + "KL/chosen_KL_mean": -51.43506622314453, + "KL/mean": -83.17096710205078, + "KL/rejected_KL_mean": -114.90686798095703, + "KL/std": 77.47767639160156, + "epoch": 0.22026431718061673, + "fcm_dpo/beta": 0.006972650997340679, + "fcm_dpo/delta": -0.04591844975948334, + "fcm_dpo/margin": 63.47180938720703, + "fcm_dpo/q_t": 0.4047049582004547, + "grad_norm": 17.938838958740234, + "learning_rate": 4.792138157142157e-07, + "logits/chosen": -0.46106183528900146, + "logits/rejected": -0.4645771384239197, + "logps/chosen": -97.14087677001953, + "logps/ref_chosen": -45.705810546875, + "logps/ref_rejected": -83.34759521484375, + "logps/rejected": -198.25445556640625, + "loss": 1.0829, + "margin_dpo/margin_mean": 63.47180938720703, + "margin_dpo/margin_std": 92.78956604003906, + "step": 150 + }, + { + "KL/chosen_KL_mean": -63.859195709228516, + "KL/mean": -95.80221557617188, + "KL/rejected_KL_mean": -127.74524688720703, + "KL/std": 72.78681945800781, + "epoch": 0.22173274596182085, + "fcm_dpo/beta": 0.006956371478736401, + "fcm_dpo/delta": -0.046492453664541245, + "fcm_dpo/margin": 63.88605499267578, + "fcm_dpo/q_t": 0.398138165473938, + "grad_norm": 19.721174240112305, + "learning_rate": 4.786984671220053e-07, + "logits/chosen": -0.5282187461853027, + "logits/rejected": -0.5002726912498474, + "logps/chosen": -134.43002319335938, + "logps/ref_chosen": -70.57083129882812, + "logps/ref_rejected": -100.46382141113281, + "logps/rejected": -228.2090606689453, + "loss": 1.061, + "margin_dpo/margin_mean": 63.88605499267578, + "margin_dpo/margin_std": 79.7131118774414, + "step": 151 + }, + { + "KL/chosen_KL_mean": -55.69847106933594, + "KL/mean": -94.40074157714844, + "KL/rejected_KL_mean": -133.10302734375, + "KL/std": 75.01636505126953, + "epoch": 0.22320117474302498, + "fcm_dpo/beta": 0.0068196142092347145, + "fcm_dpo/delta": -0.13479407131671906, + "fcm_dpo/margin": 77.40454864501953, + "fcm_dpo/q_t": 0.38243043422698975, + "grad_norm": 20.134836196899414, + "learning_rate": 4.78177092112495e-07, + "logits/chosen": -0.5108896493911743, + "logits/rejected": -0.5100568532943726, + "logps/chosen": -115.86285400390625, + "logps/ref_chosen": -60.16438674926758, + "logps/ref_rejected": -106.14045715332031, + "logps/rejected": -239.24346923828125, + "loss": 1.0175, + "margin_dpo/margin_mean": 77.40455627441406, + "margin_dpo/margin_std": 91.77665710449219, + "step": 152 + }, + { + "KL/chosen_KL_mean": -56.524017333984375, + "KL/mean": -89.48245239257812, + "KL/rejected_KL_mean": -122.4408950805664, + "KL/std": 81.44053649902344, + "epoch": 0.22466960352422907, + "fcm_dpo/beta": 0.0067241257056593895, + "fcm_dpo/delta": -0.04522576555609703, + "fcm_dpo/margin": 65.91687774658203, + "fcm_dpo/q_t": 0.40355831384658813, + "grad_norm": 15.487606048583984, + "learning_rate": 4.776497044244016e-07, + "logits/chosen": -0.4853493571281433, + "logits/rejected": -0.48000335693359375, + "logps/chosen": -112.83929443359375, + "logps/ref_chosen": -56.315277099609375, + "logps/ref_rejected": -85.65583801269531, + "logps/rejected": -208.0967254638672, + "loss": 1.0897, + "margin_dpo/margin_mean": 65.91687774658203, + "margin_dpo/margin_std": 99.65748596191406, + "step": 153 + }, + { + "KL/chosen_KL_mean": -67.8558349609375, + "KL/mean": -99.38180541992188, + "KL/rejected_KL_mean": -130.9077606201172, + "KL/std": 81.84564208984375, + "epoch": 0.2261380323054332, + "fcm_dpo/beta": 0.006699780933558941, + "fcm_dpo/delta": -0.023667776957154274, + "fcm_dpo/margin": 63.05192947387695, + "fcm_dpo/q_t": 0.4071503281593323, + "grad_norm": 18.857498168945312, + "learning_rate": 4.771163179548808e-07, + "logits/chosen": -0.4654182493686676, + "logits/rejected": -0.4673847556114197, + "logps/chosen": -130.59840393066406, + "logps/ref_chosen": -62.74256896972656, + "logps/ref_rejected": -104.24420166015625, + "logps/rejected": -235.15194702148438, + "loss": 1.1211, + "margin_dpo/margin_mean": 63.05193328857422, + "margin_dpo/margin_std": 104.29759216308594, + "step": 154 + }, + { + "KL/chosen_KL_mean": -61.78107452392578, + "KL/mean": -93.73155975341797, + "KL/rejected_KL_mean": -125.68203735351562, + "KL/std": 76.08834838867188, + "epoch": 0.2276064610866373, + "fcm_dpo/beta": 0.006656583398580551, + "fcm_dpo/delta": -0.026573501527309418, + "fcm_dpo/margin": 63.900962829589844, + "fcm_dpo/q_t": 0.4044332206249237, + "grad_norm": 19.28769302368164, + "learning_rate": 4.7657694675916247e-07, + "logits/chosen": -0.48790478706359863, + "logits/rejected": -0.468170702457428, + "logps/chosen": -122.43425750732422, + "logps/ref_chosen": -60.65318298339844, + "logps/ref_rejected": -77.49220275878906, + "logps/rejected": -203.1742401123047, + "loss": 1.0941, + "margin_dpo/margin_mean": 63.900962829589844, + "margin_dpo/margin_std": 94.01338195800781, + "step": 155 + }, + { + "KL/chosen_KL_mean": -86.89419555664062, + "KL/mean": -105.12815856933594, + "KL/rejected_KL_mean": -123.36213684082031, + "KL/std": 80.70298767089844, + "epoch": 0.2290748898678414, + "fcm_dpo/beta": 0.006699825637042522, + "fcm_dpo/delta": 0.053138453513383865, + "fcm_dpo/margin": 36.46794128417969, + "fcm_dpo/q_t": 0.44462156295776367, + "grad_norm": 27.65213966369629, + "learning_rate": 4.7603160505017893e-07, + "logits/chosen": -0.419753760099411, + "logits/rejected": -0.41109997034072876, + "logps/chosen": -156.38607788085938, + "logps/ref_chosen": -69.49188232421875, + "logps/ref_rejected": -77.16929626464844, + "logps/rejected": -200.53143310546875, + "loss": 1.276, + "margin_dpo/margin_mean": 36.46794128417969, + "margin_dpo/margin_std": 106.41633605957031, + "step": 156 + }, + { + "KL/chosen_KL_mean": -78.34427642822266, + "KL/mean": -119.24581909179688, + "KL/rejected_KL_mean": -160.1473388671875, + "KL/std": 88.42298889160156, + "epoch": 0.2305433186490455, + "fcm_dpo/beta": 0.00652310810983181, + "fcm_dpo/delta": -0.14256341755390167, + "fcm_dpo/margin": 81.80308532714844, + "fcm_dpo/q_t": 0.3782350420951843, + "grad_norm": 23.294269561767578, + "learning_rate": 4.7548030719819154e-07, + "logits/chosen": -0.40555089712142944, + "logits/rejected": -0.4124807119369507, + "logps/chosen": -139.71270751953125, + "logps/ref_chosen": -61.368438720703125, + "logps/ref_rejected": -107.64636993408203, + "logps/rejected": -267.793701171875, + "loss": 1.0267, + "margin_dpo/margin_mean": 81.80308532714844, + "margin_dpo/margin_std": 100.67677307128906, + "step": 157 + }, + { + "KL/chosen_KL_mean": -78.94845581054688, + "KL/mean": -122.50050354003906, + "KL/rejected_KL_mean": -166.05255126953125, + "KL/std": 109.67285919189453, + "epoch": 0.23201174743024963, + "fcm_dpo/beta": 0.006357924081385136, + "fcm_dpo/delta": -0.16277411580085754, + "fcm_dpo/margin": 87.10411834716797, + "fcm_dpo/q_t": 0.386168897151947, + "grad_norm": 20.690141677856445, + "learning_rate": 4.7492306773041136e-07, + "logits/chosen": -0.3853977918624878, + "logits/rejected": -0.4026561975479126, + "logps/chosen": -136.56137084960938, + "logps/ref_chosen": -57.612918853759766, + "logps/ref_rejected": -113.6946792602539, + "logps/rejected": -279.74725341796875, + "loss": 1.0524, + "margin_dpo/margin_mean": 87.10411071777344, + "margin_dpo/margin_std": 131.93109130859375, + "step": 158 + }, + { + "KL/chosen_KL_mean": -89.82890319824219, + "KL/mean": -119.66598510742188, + "KL/rejected_KL_mean": -149.5030517578125, + "KL/std": 97.01811218261719, + "epoch": 0.23348017621145375, + "fcm_dpo/beta": 0.0063509754836559296, + "fcm_dpo/delta": 0.02134835720062256, + "fcm_dpo/margin": 59.67415237426758, + "fcm_dpo/q_t": 0.4156301021575928, + "grad_norm": 21.42896270751953, + "learning_rate": 4.743599013306165e-07, + "logits/chosen": -0.4047996401786804, + "logits/rejected": -0.37329649925231934, + "logps/chosen": -171.38925170898438, + "logps/ref_chosen": -81.56034851074219, + "logps/ref_rejected": -88.89871215820312, + "logps/rejected": -238.40176391601562, + "loss": 1.1467, + "margin_dpo/margin_mean": 59.674156188964844, + "margin_dpo/margin_std": 104.58900451660156, + "step": 159 + }, + { + "KL/chosen_KL_mean": -91.19225311279297, + "KL/mean": -130.95059204101562, + "KL/rejected_KL_mean": -170.70892333984375, + "KL/std": 102.61758422851562, + "epoch": 0.23494860499265785, + "fcm_dpo/beta": 0.006208137609064579, + "fcm_dpo/delta": -0.09941543638706207, + "fcm_dpo/margin": 79.51667785644531, + "fcm_dpo/q_t": 0.39641568064689636, + "grad_norm": 22.98140525817871, + "learning_rate": 4.737908228387656e-07, + "logits/chosen": -0.3908793032169342, + "logits/rejected": -0.38329264521598816, + "logps/chosen": -156.92312622070312, + "logps/ref_chosen": -65.73088073730469, + "logps/ref_rejected": -97.21781921386719, + "logps/rejected": -267.9267578125, + "loss": 1.091, + "margin_dpo/margin_mean": 79.51667785644531, + "margin_dpo/margin_std": 128.43882751464844, + "step": 160 + }, + { + "KL/chosen_KL_mean": -78.4635238647461, + "KL/mean": -113.22474670410156, + "KL/rejected_KL_mean": -147.9859619140625, + "KL/std": 81.19598388671875, + "epoch": 0.23641703377386197, + "fcm_dpo/beta": 0.00617564469575882, + "fcm_dpo/delta": -0.03067013993859291, + "fcm_dpo/margin": 69.5224380493164, + "fcm_dpo/q_t": 0.4049830436706543, + "grad_norm": 21.47429084777832, + "learning_rate": 4.7321584725060594e-07, + "logits/chosen": -0.3746282160282135, + "logits/rejected": -0.3739486634731293, + "logps/chosen": -130.89999389648438, + "logps/ref_chosen": -52.43647003173828, + "logps/ref_rejected": -83.43095397949219, + "logps/rejected": -231.41690063476562, + "loss": 1.0944, + "margin_dpo/margin_mean": 69.5224380493164, + "margin_dpo/margin_std": 102.78611755371094, + "step": 161 + }, + { + "KL/chosen_KL_mean": -75.56187438964844, + "KL/mean": -110.21990966796875, + "KL/rejected_KL_mean": -144.87796020507812, + "KL/std": 90.19256591796875, + "epoch": 0.23788546255506607, + "fcm_dpo/beta": 0.006094048731029034, + "fcm_dpo/delta": -0.02500341832637787, + "fcm_dpo/margin": 69.31608581542969, + "fcm_dpo/q_t": 0.4067990183830261, + "grad_norm": 23.615018844604492, + "learning_rate": 4.7263498971727905e-07, + "logits/chosen": -0.4356382191181183, + "logits/rejected": -0.41989463567733765, + "logps/chosen": -138.17245483398438, + "logps/ref_chosen": -62.6105842590332, + "logps/ref_rejected": -89.39057922363281, + "logps/rejected": -234.26853942871094, + "loss": 1.108, + "margin_dpo/margin_mean": 69.31608581542969, + "margin_dpo/margin_std": 106.76126861572266, + "step": 162 + }, + { + "KL/chosen_KL_mean": -85.64208221435547, + "KL/mean": -119.12933349609375, + "KL/rejected_KL_mean": -152.61660766601562, + "KL/std": 90.75747680664062, + "epoch": 0.2393538913362702, + "fcm_dpo/beta": 0.006128158885985613, + "fcm_dpo/delta": -0.010875340551137924, + "fcm_dpo/margin": 66.97450256347656, + "fcm_dpo/q_t": 0.41041916608810425, + "grad_norm": 22.441957473754883, + "learning_rate": 4.720482655449212e-07, + "logits/chosen": -0.32167524099349976, + "logits/rejected": -0.3008124828338623, + "logps/chosen": -140.66371154785156, + "logps/ref_chosen": -55.021629333496094, + "logps/ref_rejected": -75.418212890625, + "logps/rejected": -228.03482055664062, + "loss": 1.1182, + "margin_dpo/margin_mean": 66.97450256347656, + "margin_dpo/margin_std": 109.2835693359375, + "step": 163 + }, + { + "KL/chosen_KL_mean": -78.14186096191406, + "KL/mean": -119.51951599121094, + "KL/rejected_KL_mean": -160.89715576171875, + "KL/std": 89.18498992919922, + "epoch": 0.24082232011747431, + "fcm_dpo/beta": 0.005989897530525923, + "fcm_dpo/delta": -0.10233054310083389, + "fcm_dpo/margin": 82.75530242919922, + "fcm_dpo/q_t": 0.38811802864074707, + "grad_norm": 22.113636016845703, + "learning_rate": 4.714556901942599e-07, + "logits/chosen": -0.3749139904975891, + "logits/rejected": -0.36253267526626587, + "logps/chosen": -133.78253173828125, + "logps/ref_chosen": -55.64066696166992, + "logps/ref_rejected": -79.66463470458984, + "logps/rejected": -240.56179809570312, + "loss": 1.0369, + "margin_dpo/margin_mean": 82.75530242919922, + "margin_dpo/margin_std": 102.402587890625, + "step": 164 + }, + { + "KL/chosen_KL_mean": -83.58428192138672, + "KL/mean": -110.63142395019531, + "KL/rejected_KL_mean": -137.67855834960938, + "KL/std": 75.4303970336914, + "epoch": 0.2422907488986784, + "fcm_dpo/beta": 0.0060513936914503574, + "fcm_dpo/delta": 0.07517173886299133, + "fcm_dpo/margin": 54.094295501708984, + "fcm_dpo/q_t": 0.4266318678855896, + "grad_norm": 23.23404312133789, + "learning_rate": 4.708572792802069e-07, + "logits/chosen": -0.37104758620262146, + "logits/rejected": -0.3426979184150696, + "logps/chosen": -144.8949737548828, + "logps/ref_chosen": -61.310691833496094, + "logps/ref_rejected": -73.67060852050781, + "logps/rejected": -211.34918212890625, + "loss": 1.1723, + "margin_dpo/margin_mean": 54.09429931640625, + "margin_dpo/margin_std": 100.12647247314453, + "step": 165 + }, + { + "KL/chosen_KL_mean": -74.41826629638672, + "KL/mean": -124.09632873535156, + "KL/rejected_KL_mean": -173.77439880371094, + "KL/std": 109.63383483886719, + "epoch": 0.24375917767988253, + "fcm_dpo/beta": 0.005887492559850216, + "fcm_dpo/delta": -0.1977493166923523, + "fcm_dpo/margin": 99.35612487792969, + "fcm_dpo/q_t": 0.38029175996780396, + "grad_norm": 18.252317428588867, + "learning_rate": 4.702530485714461e-07, + "logits/chosen": -0.3085266351699829, + "logits/rejected": -0.3185623288154602, + "logps/chosen": -125.40187072753906, + "logps/ref_chosen": -50.98360061645508, + "logps/ref_rejected": -98.09512329101562, + "logps/rejected": -271.8695068359375, + "loss": 1.0172, + "margin_dpo/margin_mean": 99.35612487792969, + "margin_dpo/margin_std": 138.07797241210938, + "step": 166 + }, + { + "KL/chosen_KL_mean": -74.95281982421875, + "KL/mean": -127.87260437011719, + "KL/rejected_KL_mean": -180.79238891601562, + "KL/std": 100.11784362792969, + "epoch": 0.24522760646108663, + "fcm_dpo/beta": 0.005670108832418919, + "fcm_dpo/delta": -0.21297289431095123, + "fcm_dpo/margin": 105.83956909179688, + "fcm_dpo/q_t": 0.36611077189445496, + "grad_norm": 20.504474639892578, + "learning_rate": 4.6964301399001877e-07, + "logits/chosen": -0.32652735710144043, + "logits/rejected": -0.32873308658599854, + "logps/chosen": -125.37691497802734, + "logps/ref_chosen": -50.424095153808594, + "logps/ref_rejected": -96.03042602539062, + "logps/rejected": -276.82281494140625, + "loss": 0.9685, + "margin_dpo/margin_mean": 105.83956909179688, + "margin_dpo/margin_std": 115.47872924804688, + "step": 167 + }, + { + "KL/chosen_KL_mean": -81.31246948242188, + "KL/mean": -119.5781478881836, + "KL/rejected_KL_mean": -157.84381103515625, + "KL/std": 93.51333618164062, + "epoch": 0.24669603524229075, + "fcm_dpo/beta": 0.00557487178593874, + "fcm_dpo/delta": -0.027986720204353333, + "fcm_dpo/margin": 76.5313491821289, + "fcm_dpo/q_t": 0.40447282791137695, + "grad_norm": 20.050628662109375, + "learning_rate": 4.690271916109034e-07, + "logits/chosen": -0.34561973810195923, + "logits/rejected": -0.33558547496795654, + "logps/chosen": -130.77529907226562, + "logps/ref_chosen": -49.462825775146484, + "logps/ref_rejected": -75.30855560302734, + "logps/rejected": -233.15237426757812, + "loss": 1.0813, + "margin_dpo/margin_mean": 76.53134155273438, + "margin_dpo/margin_std": 104.77227783203125, + "step": 168 + }, + { + "KL/chosen_KL_mean": -83.71910095214844, + "KL/mean": -117.21250915527344, + "KL/rejected_KL_mean": -150.70590209960938, + "KL/std": 92.11152648925781, + "epoch": 0.24816446402349487, + "fcm_dpo/beta": 0.005499421618878841, + "fcm_dpo/delta": -0.07137174159288406, + "fcm_dpo/margin": 66.98680114746094, + "fcm_dpo/q_t": 0.42043811082839966, + "grad_norm": 20.767568588256836, + "learning_rate": 4.6840559766159235e-07, + "logits/chosen": -0.38283443450927734, + "logits/rejected": -0.3673766255378723, + "logps/chosen": -143.52255249023438, + "logps/ref_chosen": -59.803443908691406, + "logps/ref_rejected": -83.34574890136719, + "logps/rejected": -234.05165100097656, + "loss": 1.1616, + "margin_dpo/margin_mean": 66.98680877685547, + "margin_dpo/margin_std": 126.373779296875, + "step": 169 + }, + { + "KL/chosen_KL_mean": -73.89985656738281, + "KL/mean": -113.43084716796875, + "KL/rejected_KL_mean": -152.96185302734375, + "KL/std": 87.49028778076172, + "epoch": 0.24963289280469897, + "fcm_dpo/beta": 0.005440497770905495, + "fcm_dpo/delta": -0.032409437000751495, + "fcm_dpo/margin": 79.06198120117188, + "fcm_dpo/q_t": 0.4014926552772522, + "grad_norm": 18.433393478393555, + "learning_rate": 4.6777824852166437e-07, + "logits/chosen": -0.32298341393470764, + "logits/rejected": -0.3117997348308563, + "logps/chosen": -123.37162780761719, + "logps/ref_chosen": -49.471771240234375, + "logps/ref_rejected": -75.91734313964844, + "logps/rejected": -228.87918090820312, + "loss": 1.0797, + "margin_dpo/margin_mean": 79.06198120117188, + "margin_dpo/margin_std": 104.669189453125, + "step": 170 + }, + { + "KL/chosen_KL_mean": -109.0721435546875, + "KL/mean": -140.8416290283203, + "KL/rejected_KL_mean": -172.61111450195312, + "KL/std": 102.56979370117188, + "epoch": 0.2511013215859031, + "fcm_dpo/beta": 0.005503002088516951, + "fcm_dpo/delta": 0.05219453573226929, + "fcm_dpo/margin": 63.538963317871094, + "fcm_dpo/q_t": 0.42633184790611267, + "grad_norm": 27.85107421875, + "learning_rate": 4.6714516072235273e-07, + "logits/chosen": -0.3782072067260742, + "logits/rejected": -0.36336031556129456, + "logps/chosen": -193.5714569091797, + "logps/ref_chosen": -84.49931335449219, + "logps/ref_rejected": -109.38209533691406, + "logps/rejected": -281.99322509765625, + "loss": 1.1822, + "margin_dpo/margin_mean": 63.538963317871094, + "margin_dpo/margin_std": 132.4144287109375, + "step": 171 + }, + { + "KL/chosen_KL_mean": -95.08564758300781, + "KL/mean": -128.90762329101562, + "KL/rejected_KL_mean": -162.72958374023438, + "KL/std": 98.64192199707031, + "epoch": 0.2525697503671072, + "fcm_dpo/beta": 0.0055364081636071205, + "fcm_dpo/delta": 0.02648979052901268, + "fcm_dpo/margin": 67.64393615722656, + "fcm_dpo/q_t": 0.41614508628845215, + "grad_norm": 19.190082550048828, + "learning_rate": 4.6650635094610966e-07, + "logits/chosen": -0.3858957886695862, + "logits/rejected": -0.36709922552108765, + "logps/chosen": -163.73956298828125, + "logps/ref_chosen": -68.65391540527344, + "logps/ref_rejected": -85.43667602539062, + "logps/rejected": -248.166259765625, + "loss": 1.1328, + "margin_dpo/margin_mean": 67.64393615722656, + "margin_dpo/margin_std": 111.18234252929688, + "step": 172 + }, + { + "KL/chosen_KL_mean": -86.4188461303711, + "KL/mean": -119.80665588378906, + "KL/rejected_KL_mean": -153.1944580078125, + "KL/std": 91.71368408203125, + "epoch": 0.2540381791483113, + "fcm_dpo/beta": 0.005593603476881981, + "fcm_dpo/delta": 0.027135606855154037, + "fcm_dpo/margin": 66.7756118774414, + "fcm_dpo/q_t": 0.4151589870452881, + "grad_norm": 20.20654296875, + "learning_rate": 4.6586183602616687e-07, + "logits/chosen": -0.4084116816520691, + "logits/rejected": -0.38007980585098267, + "logps/chosen": -149.4697265625, + "logps/ref_chosen": -63.050880432128906, + "logps/ref_rejected": -78.68392181396484, + "logps/rejected": -231.87838745117188, + "loss": 1.1132, + "margin_dpo/margin_mean": 66.7756118774414, + "margin_dpo/margin_std": 95.39866638183594, + "step": 173 + }, + { + "KL/chosen_KL_mean": -82.23112487792969, + "KL/mean": -121.97492980957031, + "KL/rejected_KL_mean": -161.71873474121094, + "KL/std": 96.5755615234375, + "epoch": 0.2555066079295154, + "fcm_dpo/beta": 0.005577293690294027, + "fcm_dpo/delta": -0.045830775052309036, + "fcm_dpo/margin": 79.48760986328125, + "fcm_dpo/q_t": 0.40260159969329834, + "grad_norm": 21.535198211669922, + "learning_rate": 4.652116329460919e-07, + "logits/chosen": -0.320295512676239, + "logits/rejected": -0.338248610496521, + "logps/chosen": -135.59408569335938, + "logps/ref_chosen": -53.36296844482422, + "logps/ref_rejected": -101.91120910644531, + "logps/rejected": -263.62994384765625, + "loss": 1.089, + "margin_dpo/margin_mean": 79.48760986328125, + "margin_dpo/margin_std": 115.94807434082031, + "step": 174 + }, + { + "KL/chosen_KL_mean": -75.58938598632812, + "KL/mean": -130.3022003173828, + "KL/rejected_KL_mean": -185.0150146484375, + "KL/std": 103.86154174804688, + "epoch": 0.25697503671071953, + "fcm_dpo/beta": 0.00536087341606617, + "fcm_dpo/delta": -0.19861072301864624, + "fcm_dpo/margin": 109.42562866210938, + "fcm_dpo/q_t": 0.3664923906326294, + "grad_norm": 27.933094024658203, + "learning_rate": 4.645557588393406e-07, + "logits/chosen": -0.31206628680229187, + "logits/rejected": -0.29794448614120483, + "logps/chosen": -121.00715637207031, + "logps/ref_chosen": -45.417762756347656, + "logps/ref_rejected": -89.50579833984375, + "logps/rejected": -274.52081298828125, + "loss": 0.959, + "margin_dpo/margin_mean": 109.42562866210938, + "margin_dpo/margin_std": 109.13046264648438, + "step": 175 + }, + { + "KL/chosen_KL_mean": -80.81350708007812, + "KL/mean": -126.73405456542969, + "KL/rejected_KL_mean": -172.65460205078125, + "KL/std": 101.8584976196289, + "epoch": 0.25844346549192365, + "fcm_dpo/beta": 0.0052553461864590645, + "fcm_dpo/delta": -0.08676035702228546, + "fcm_dpo/margin": 91.84110260009766, + "fcm_dpo/q_t": 0.3937104344367981, + "grad_norm": 19.47554588317871, + "learning_rate": 4.638942309888058e-07, + "logits/chosen": -0.3381134867668152, + "logits/rejected": -0.35593676567077637, + "logps/chosen": -131.26634216308594, + "logps/ref_chosen": -50.452842712402344, + "logps/ref_rejected": -95.5589599609375, + "logps/rejected": -268.21356201171875, + "loss": 1.0458, + "margin_dpo/margin_mean": 91.84110260009766, + "margin_dpo/margin_std": 117.94707489013672, + "step": 176 + }, + { + "KL/chosen_KL_mean": -95.21261596679688, + "KL/mean": -140.65003967285156, + "KL/rejected_KL_mean": -186.08746337890625, + "KL/std": 111.35872650146484, + "epoch": 0.2599118942731278, + "fcm_dpo/beta": 0.005180859938263893, + "fcm_dpo/delta": -0.07426586002111435, + "fcm_dpo/margin": 90.87483215332031, + "fcm_dpo/q_t": 0.39563536643981934, + "grad_norm": 30.431970596313477, + "learning_rate": 4.6322706682636137e-07, + "logits/chosen": -0.35000866651535034, + "logits/rejected": -0.3415108621120453, + "logps/chosen": -156.4290771484375, + "logps/ref_chosen": -61.216468811035156, + "logps/ref_rejected": -95.89378356933594, + "logps/rejected": -281.98126220703125, + "loss": 1.053, + "margin_dpo/margin_mean": 90.87483215332031, + "margin_dpo/margin_std": 118.72608947753906, + "step": 177 + }, + { + "KL/chosen_KL_mean": -104.90020751953125, + "KL/mean": -162.3438720703125, + "KL/rejected_KL_mean": -219.78750610351562, + "KL/std": 130.68798828125, + "epoch": 0.26138032305433184, + "fcm_dpo/beta": 0.004986546002328396, + "fcm_dpo/delta": -0.1844998002052307, + "fcm_dpo/margin": 114.88732147216797, + "fcm_dpo/q_t": 0.37574303150177, + "grad_norm": 27.06403350830078, + "learning_rate": 4.6255428393240354e-07, + "logits/chosen": -0.25030016899108887, + "logits/rejected": -0.2403268814086914, + "logps/chosen": -163.16500854492188, + "logps/ref_chosen": -58.26478958129883, + "logps/ref_rejected": -105.3653335571289, + "logps/rejected": -325.15283203125, + "loss": 1.0007, + "margin_dpo/margin_mean": 114.88731384277344, + "margin_dpo/margin_std": 142.7379150390625, + "step": 178 + }, + { + "KL/chosen_KL_mean": -108.67526245117188, + "KL/mean": -149.40911865234375, + "KL/rejected_KL_mean": -190.14297485351562, + "KL/std": 112.27867126464844, + "epoch": 0.26284875183553597, + "fcm_dpo/beta": 0.004935364704579115, + "fcm_dpo/delta": -0.0025902092456817627, + "fcm_dpo/margin": 81.46771240234375, + "fcm_dpo/q_t": 0.4115809500217438, + "grad_norm": 36.69063949584961, + "learning_rate": 4.6187590003538724e-07, + "logits/chosen": -0.3055553138256073, + "logits/rejected": -0.315255343914032, + "logps/chosen": -169.73358154296875, + "logps/ref_chosen": -61.05832290649414, + "logps/ref_rejected": -90.52782440185547, + "logps/rejected": -280.6708068847656, + "loss": 1.1354, + "margin_dpo/margin_mean": 81.46771240234375, + "margin_dpo/margin_std": 140.07054138183594, + "step": 179 + }, + { + "KL/chosen_KL_mean": -93.43641662597656, + "KL/mean": -145.72601318359375, + "KL/rejected_KL_mean": -198.01556396484375, + "KL/std": 100.83357238769531, + "epoch": 0.2643171806167401, + "fcm_dpo/beta": 0.0048674289137125015, + "fcm_dpo/delta": -0.11503924429416656, + "fcm_dpo/margin": 104.57914733886719, + "fcm_dpo/q_t": 0.38437995314598083, + "grad_norm": 20.31671142578125, + "learning_rate": 4.611919330113591e-07, + "logits/chosen": -0.28088757395744324, + "logits/rejected": -0.27398407459259033, + "logps/chosen": -147.77914428710938, + "logps/ref_chosen": -54.34272003173828, + "logps/ref_rejected": -98.21183776855469, + "logps/rejected": -296.2274169921875, + "loss": 1.0254, + "margin_dpo/margin_mean": 104.57914733886719, + "margin_dpo/margin_std": 125.9128646850586, + "step": 180 + }, + { + "KL/chosen_KL_mean": -81.22648620605469, + "KL/mean": -113.69107818603516, + "KL/rejected_KL_mean": -146.1556854248047, + "KL/std": 92.112060546875, + "epoch": 0.2657856093979442, + "fcm_dpo/beta": 0.00491193775087595, + "fcm_dpo/delta": 0.0835873931646347, + "fcm_dpo/margin": 64.92919158935547, + "fcm_dpo/q_t": 0.42752861976623535, + "grad_norm": 17.186668395996094, + "learning_rate": 4.605024008834863e-07, + "logits/chosen": -0.3439704179763794, + "logits/rejected": -0.32105350494384766, + "logps/chosen": -136.22694396972656, + "logps/ref_chosen": -55.000457763671875, + "logps/ref_rejected": -61.656166076660156, + "logps/rejected": -207.81185913085938, + "loss": 1.1687, + "margin_dpo/margin_mean": 64.92919921875, + "margin_dpo/margin_std": 116.01119995117188, + "step": 181 + }, + { + "KL/chosen_KL_mean": -77.05168151855469, + "KL/mean": -133.05014038085938, + "KL/rejected_KL_mean": -189.04859924316406, + "KL/std": 110.59321594238281, + "epoch": 0.26725403817914833, + "fcm_dpo/beta": 0.0048008207231760025, + "fcm_dpo/delta": -0.14640963077545166, + "fcm_dpo/margin": 111.99693298339844, + "fcm_dpo/q_t": 0.3778771162033081, + "grad_norm": 19.202186584472656, + "learning_rate": 4.598073218215817e-07, + "logits/chosen": -0.30555886030197144, + "logits/rejected": -0.3154027462005615, + "logps/chosen": -118.15953063964844, + "logps/ref_chosen": -41.107852935791016, + "logps/ref_rejected": -89.5215835571289, + "logps/rejected": -278.5701904296875, + "loss": 1.0138, + "margin_dpo/margin_mean": 111.99693298339844, + "margin_dpo/margin_std": 133.52532958984375, + "step": 182 + }, + { + "KL/chosen_KL_mean": -114.24200439453125, + "KL/mean": -144.60440063476562, + "KL/rejected_KL_mean": -174.966796875, + "KL/std": 92.102294921875, + "epoch": 0.2687224669603524, + "fcm_dpo/beta": 0.00474231131374836, + "fcm_dpo/delta": -0.043582916259765625, + "fcm_dpo/margin": 60.724796295166016, + "fcm_dpo/q_t": 0.4329318106174469, + "grad_norm": 21.197261810302734, + "learning_rate": 4.5910671414162484e-07, + "logits/chosen": -0.32367801666259766, + "logits/rejected": -0.31501567363739014, + "logps/chosen": -171.76657104492188, + "logps/ref_chosen": -57.52456283569336, + "logps/ref_rejected": -75.97572326660156, + "logps/rejected": -250.94252014160156, + "loss": 1.1824, + "margin_dpo/margin_mean": 60.72479248046875, + "margin_dpo/margin_std": 101.34217834472656, + "step": 183 + }, + { + "KL/chosen_KL_mean": -94.91139221191406, + "KL/mean": -126.96908569335938, + "KL/rejected_KL_mean": -159.0267791748047, + "KL/std": 86.96229553222656, + "epoch": 0.2701908957415565, + "fcm_dpo/beta": 0.004741538781672716, + "fcm_dpo/delta": -0.00162951136007905, + "fcm_dpo/margin": 64.11538696289062, + "fcm_dpo/q_t": 0.430799275636673, + "grad_norm": 20.073440551757812, + "learning_rate": 4.5840059630527985e-07, + "logits/chosen": -0.35681042075157166, + "logits/rejected": -0.34759992361068726, + "logps/chosen": -153.45635986328125, + "logps/ref_chosen": -58.544952392578125, + "logps/ref_rejected": -76.63406372070312, + "logps/rejected": -235.6608428955078, + "loss": 1.1707, + "margin_dpo/margin_mean": 64.11538696289062, + "margin_dpo/margin_std": 109.54376220703125, + "step": 184 + }, + { + "KL/chosen_KL_mean": -102.1708755493164, + "KL/mean": -127.47833251953125, + "KL/rejected_KL_mean": -152.78579711914062, + "KL/std": 99.92794799804688, + "epoch": 0.27165932452276065, + "fcm_dpo/beta": 0.004851914010941982, + "fcm_dpo/delta": 0.15838554501533508, + "fcm_dpo/margin": 50.61490249633789, + "fcm_dpo/q_t": 0.44705960154533386, + "grad_norm": 19.03368377685547, + "learning_rate": 4.5768898691940836e-07, + "logits/chosen": -0.33275556564331055, + "logits/rejected": -0.3096786439418793, + "logps/chosen": -164.19671630859375, + "logps/ref_chosen": -62.025848388671875, + "logps/ref_rejected": -73.7625961303711, + "logps/rejected": -226.5483856201172, + "loss": 1.2336, + "margin_dpo/margin_mean": 50.61490249633789, + "margin_dpo/margin_std": 120.33627319335938, + "step": 185 + }, + { + "KL/chosen_KL_mean": -93.70341491699219, + "KL/mean": -141.43142700195312, + "KL/rejected_KL_mean": -189.159423828125, + "KL/std": 100.74044799804688, + "epoch": 0.27312775330396477, + "fcm_dpo/beta": 0.00484071671962738, + "fcm_dpo/delta": -0.06518108397722244, + "fcm_dpo/margin": 95.45602416992188, + "fcm_dpo/q_t": 0.39502984285354614, + "grad_norm": 30.832712173461914, + "learning_rate": 4.5697190473557947e-07, + "logits/chosen": -0.35813000798225403, + "logits/rejected": -0.3332071304321289, + "logps/chosen": -163.056884765625, + "logps/ref_chosen": -69.35346984863281, + "logps/ref_rejected": -88.07244873046875, + "logps/rejected": -277.23187255859375, + "loss": 1.0476, + "margin_dpo/margin_mean": 95.45602416992188, + "margin_dpo/margin_std": 115.35481262207031, + "step": 186 + }, + { + "KL/chosen_KL_mean": -88.89076232910156, + "KL/mean": -128.86146545410156, + "KL/rejected_KL_mean": -168.83216857910156, + "KL/std": 96.70646667480469, + "epoch": 0.2745961820851689, + "fcm_dpo/beta": 0.004859459586441517, + "fcm_dpo/delta": 0.011555861681699753, + "fcm_dpo/margin": 79.94140625, + "fcm_dpo/q_t": 0.4108119606971741, + "grad_norm": 24.247724533081055, + "learning_rate": 4.5624936864957555e-07, + "logits/chosen": -0.32936474680900574, + "logits/rejected": -0.3230019807815552, + "logps/chosen": -141.647216796875, + "logps/ref_chosen": -52.7564582824707, + "logps/ref_rejected": -81.96910095214844, + "logps/rejected": -250.80126953125, + "loss": 1.0976, + "margin_dpo/margin_mean": 79.94140625, + "margin_dpo/margin_std": 105.25481414794922, + "step": 187 + }, + { + "KL/chosen_KL_mean": -83.75914001464844, + "KL/mean": -132.92103576660156, + "KL/rejected_KL_mean": -182.08291625976562, + "KL/std": 108.79667663574219, + "epoch": 0.27606461086637296, + "fcm_dpo/beta": 0.004792365245521069, + "fcm_dpo/delta": -0.07467129826545715, + "fcm_dpo/margin": 98.32379150390625, + "fcm_dpo/q_t": 0.393817663192749, + "grad_norm": 34.55025863647461, + "learning_rate": 4.5552139770089454e-07, + "logits/chosen": -0.33150649070739746, + "logits/rejected": -0.338370680809021, + "logps/chosen": -133.17462158203125, + "logps/ref_chosen": -49.415489196777344, + "logps/ref_rejected": -89.54043579101562, + "logps/rejected": -271.62335205078125, + "loss": 1.0429, + "margin_dpo/margin_mean": 98.32378387451172, + "margin_dpo/margin_std": 119.06608581542969, + "step": 188 + }, + { + "KL/chosen_KL_mean": -99.01392364501953, + "KL/mean": -138.84449768066406, + "KL/rejected_KL_mean": -178.67507934570312, + "KL/std": 109.14806365966797, + "epoch": 0.2775330396475771, + "fcm_dpo/beta": 0.00478787487372756, + "fcm_dpo/delta": 0.019290301948785782, + "fcm_dpo/margin": 79.6611328125, + "fcm_dpo/q_t": 0.41624516248703003, + "grad_norm": 29.447795867919922, + "learning_rate": 4.5478801107224794e-07, + "logits/chosen": -0.334136962890625, + "logits/rejected": -0.31781691312789917, + "logps/chosen": -151.4128875732422, + "logps/ref_chosen": -52.39896011352539, + "logps/ref_rejected": -72.16735076904297, + "logps/rejected": -250.84242248535156, + "loss": 1.1305, + "margin_dpo/margin_mean": 79.6611328125, + "margin_dpo/margin_std": 133.5395050048828, + "step": 189 + }, + { + "KL/chosen_KL_mean": -105.0693359375, + "KL/mean": -152.8391876220703, + "KL/rejected_KL_mean": -200.60903930664062, + "KL/std": 116.60220336914062, + "epoch": 0.2790014684287812, + "fcm_dpo/beta": 0.004786365665495396, + "fcm_dpo/delta": -0.061055850237607956, + "fcm_dpo/margin": 95.53968811035156, + "fcm_dpo/q_t": 0.39865192770957947, + "grad_norm": 18.84038543701172, + "learning_rate": 4.5404922808905543e-07, + "logits/chosen": -0.34232112765312195, + "logits/rejected": -0.3296660780906677, + "logps/chosen": -169.75241088867188, + "logps/ref_chosen": -64.68305969238281, + "logps/ref_rejected": -102.55052185058594, + "logps/rejected": -303.1595458984375, + "loss": 1.0824, + "margin_dpo/margin_mean": 95.53968811035156, + "margin_dpo/margin_std": 135.1875, + "step": 190 + }, + { + "KL/chosen_KL_mean": -98.90615844726562, + "KL/mean": -167.89767456054688, + "KL/rejected_KL_mean": -236.88919067382812, + "KL/std": 137.1860809326172, + "epoch": 0.28046989720998533, + "fcm_dpo/beta": 0.0045428648591041565, + "fcm_dpo/delta": -0.2434038668870926, + "fcm_dpo/margin": 137.9830322265625, + "fcm_dpo/q_t": 0.36233189702033997, + "grad_norm": 21.52570152282715, + "learning_rate": 4.5330506821893565e-07, + "logits/chosen": -0.34583958983421326, + "logits/rejected": -0.3241385817527771, + "logps/chosen": -167.56503295898438, + "logps/ref_chosen": -68.65887451171875, + "logps/ref_rejected": -110.1396713256836, + "logps/rejected": -347.02886962890625, + "loss": 0.9544, + "margin_dpo/margin_mean": 137.9830322265625, + "margin_dpo/margin_std": 151.53329467773438, + "step": 191 + }, + { + "KL/chosen_KL_mean": -127.5339126586914, + "KL/mean": -173.3841552734375, + "KL/rejected_KL_mean": -219.23440551757812, + "KL/std": 119.96187591552734, + "epoch": 0.28193832599118945, + "fcm_dpo/beta": 0.004507323727011681, + "fcm_dpo/delta": -0.014000019058585167, + "fcm_dpo/margin": 91.70048522949219, + "fcm_dpo/q_t": 0.4093227982521057, + "grad_norm": 25.540145874023438, + "learning_rate": 4.5255555107119336e-07, + "logits/chosen": -0.2959958016872406, + "logits/rejected": -0.296117901802063, + "logps/chosen": -197.26083374023438, + "logps/ref_chosen": -69.72691345214844, + "logps/ref_rejected": -103.32135009765625, + "logps/rejected": -322.55572509765625, + "loss": 1.115, + "margin_dpo/margin_mean": 91.70048522949219, + "margin_dpo/margin_std": 148.0252685546875, + "step": 192 + }, + { + "KL/chosen_KL_mean": -127.05119323730469, + "KL/mean": -153.37745666503906, + "KL/rejected_KL_mean": -179.70370483398438, + "KL/std": 109.23312377929688, + "epoch": 0.2834067547723935, + "fcm_dpo/beta": 0.004510689992457628, + "fcm_dpo/delta": 0.040593214333057404, + "fcm_dpo/margin": 52.65251159667969, + "fcm_dpo/q_t": 0.44376885890960693, + "grad_norm": 29.541507720947266, + "learning_rate": 4.5180069639630236e-07, + "logits/chosen": -0.2886780798435211, + "logits/rejected": -0.27803605794906616, + "logps/chosen": -187.24169921875, + "logps/ref_chosen": -60.19049835205078, + "logps/ref_rejected": -76.40755462646484, + "logps/rejected": -256.11126708984375, + "loss": 1.2572, + "margin_dpo/margin_mean": 52.65251159667969, + "margin_dpo/margin_std": 139.58816528320312, + "step": 193 + }, + { + "KL/chosen_KL_mean": -78.30635833740234, + "KL/mean": -121.43690490722656, + "KL/rejected_KL_mean": -164.56744384765625, + "KL/std": 90.03581237792969, + "epoch": 0.28487518355359764, + "fcm_dpo/beta": 0.004522847011685371, + "fcm_dpo/delta": 0.01011504977941513, + "fcm_dpo/margin": 86.26107788085938, + "fcm_dpo/q_t": 0.40855488181114197, + "grad_norm": 18.051904678344727, + "learning_rate": 4.510405240853854e-07, + "logits/chosen": -0.25177642703056335, + "logits/rejected": -0.2375318706035614, + "logps/chosen": -116.146728515625, + "logps/ref_chosen": -37.84037399291992, + "logps/ref_rejected": -60.684783935546875, + "logps/rejected": -225.25222778320312, + "loss": 1.0818, + "margin_dpo/margin_mean": 86.26107788085938, + "margin_dpo/margin_std": 98.7254638671875, + "step": 194 + }, + { + "KL/chosen_KL_mean": -125.2491226196289, + "KL/mean": -171.83770751953125, + "KL/rejected_KL_mean": -218.42633056640625, + "KL/std": 114.07196807861328, + "epoch": 0.28634361233480177, + "fcm_dpo/beta": 0.004522291943430901, + "fcm_dpo/delta": -0.022319436073303223, + "fcm_dpo/margin": 93.17718505859375, + "fcm_dpo/q_t": 0.4035479426383972, + "grad_norm": 21.63848876953125, + "learning_rate": 4.5027505416968985e-07, + "logits/chosen": -0.2544304132461548, + "logits/rejected": -0.27329152822494507, + "logps/chosen": -180.14068603515625, + "logps/ref_chosen": -54.891571044921875, + "logps/ref_rejected": -96.77095794677734, + "logps/rejected": -315.197265625, + "loss": 1.0745, + "margin_dpo/margin_mean": 93.17718505859375, + "margin_dpo/margin_std": 118.36261749267578, + "step": 195 + }, + { + "KL/chosen_KL_mean": -96.55429077148438, + "KL/mean": -149.50570678710938, + "KL/rejected_KL_mean": -202.45712280273438, + "KL/std": 115.13066101074219, + "epoch": 0.2878120411160059, + "fcm_dpo/beta": 0.0044434089213609695, + "fcm_dpo/delta": -0.07507769018411636, + "fcm_dpo/margin": 105.90283966064453, + "fcm_dpo/q_t": 0.3950856924057007, + "grad_norm": 17.6580753326416, + "learning_rate": 4.495043068200599e-07, + "logits/chosen": -0.29589658975601196, + "logits/rejected": -0.2812860608100891, + "logps/chosen": -149.79953002929688, + "logps/ref_chosen": -53.245243072509766, + "logps/ref_rejected": -76.05294799804688, + "logps/rejected": -278.51007080078125, + "loss": 1.0612, + "margin_dpo/margin_mean": 105.90283966064453, + "margin_dpo/margin_std": 138.68316650390625, + "step": 196 + }, + { + "KL/chosen_KL_mean": -98.7095947265625, + "KL/mean": -139.4966278076172, + "KL/rejected_KL_mean": -180.28367614746094, + "KL/std": 99.37328338623047, + "epoch": 0.28928046989721, + "fcm_dpo/beta": 0.004487765487283468, + "fcm_dpo/delta": 0.03489822521805763, + "fcm_dpo/margin": 81.57408142089844, + "fcm_dpo/q_t": 0.41627001762390137, + "grad_norm": 18.291038513183594, + "learning_rate": 4.4872830234640493e-07, + "logits/chosen": -0.28296738862991333, + "logits/rejected": -0.27726900577545166, + "logps/chosen": -159.1299285888672, + "logps/ref_chosen": -60.42033386230469, + "logps/ref_rejected": -77.20890808105469, + "logps/rejected": -257.4925842285156, + "loss": 1.113, + "margin_dpo/margin_mean": 81.5740737915039, + "margin_dpo/margin_std": 112.14630889892578, + "step": 197 + }, + { + "KL/chosen_KL_mean": -111.65481567382812, + "KL/mean": -163.402099609375, + "KL/rejected_KL_mean": -215.14935302734375, + "KL/std": 123.84089660644531, + "epoch": 0.2907488986784141, + "fcm_dpo/beta": 0.00444161519408226, + "fcm_dpo/delta": -0.06254196166992188, + "fcm_dpo/margin": 103.49454498291016, + "fcm_dpo/q_t": 0.39844024181365967, + "grad_norm": 21.28237533569336, + "learning_rate": 4.479470611971645e-07, + "logits/chosen": -0.29576927423477173, + "logits/rejected": -0.2956548035144806, + "logps/chosen": -166.69100952148438, + "logps/ref_chosen": -55.03618621826172, + "logps/ref_rejected": -97.24325561523438, + "logps/rejected": -312.3926086425781, + "loss": 1.0634, + "margin_dpo/margin_mean": 103.49453735351562, + "margin_dpo/margin_std": 140.37669372558594, + "step": 198 + }, + { + "KL/chosen_KL_mean": -105.4992904663086, + "KL/mean": -157.50863647460938, + "KL/rejected_KL_mean": -209.5179901123047, + "KL/std": 112.66731262207031, + "epoch": 0.2922173274596182, + "fcm_dpo/beta": 0.004363642539829016, + "fcm_dpo/delta": -0.057444989681243896, + "fcm_dpo/margin": 104.01869201660156, + "fcm_dpo/q_t": 0.39733150601387024, + "grad_norm": 25.73158836364746, + "learning_rate": 4.471606039587695e-07, + "logits/chosen": -0.3182041049003601, + "logits/rejected": -0.3008995056152344, + "logps/chosen": -162.328125, + "logps/ref_chosen": -56.828826904296875, + "logps/ref_rejected": -84.64820861816406, + "logps/rejected": -294.16619873046875, + "loss": 1.0671, + "margin_dpo/margin_mean": 104.01869201660156, + "margin_dpo/margin_std": 136.8724365234375, + "step": 199 + }, + { + "KL/chosen_KL_mean": -104.5189208984375, + "KL/mean": -155.94215393066406, + "KL/rejected_KL_mean": -207.36537170410156, + "KL/std": 121.28309631347656, + "epoch": 0.2936857562408223, + "fcm_dpo/beta": 0.004329666495323181, + "fcm_dpo/delta": -0.047804687172174454, + "fcm_dpo/margin": 102.84647369384766, + "fcm_dpo/q_t": 0.4015011191368103, + "grad_norm": 21.32215690612793, + "learning_rate": 4.4636895135509966e-07, + "logits/chosen": -0.2771759629249573, + "logits/rejected": -0.25995227694511414, + "logps/chosen": -157.58596801757812, + "logps/ref_chosen": -53.06706237792969, + "logps/ref_rejected": -80.60843658447266, + "logps/rejected": -287.97381591796875, + "loss": 1.091, + "margin_dpo/margin_mean": 102.84646606445312, + "margin_dpo/margin_std": 154.68792724609375, + "step": 200 + }, + { + "KL/chosen_KL_mean": -107.73637390136719, + "KL/mean": -158.36456298828125, + "KL/rejected_KL_mean": -208.99273681640625, + "KL/std": 125.75869750976562, + "epoch": 0.29515418502202645, + "fcm_dpo/beta": 0.004319292958825827, + "fcm_dpo/delta": -0.03909054771065712, + "fcm_dpo/margin": 101.25636291503906, + "fcm_dpo/q_t": 0.4014075696468353, + "grad_norm": 19.30495262145996, + "learning_rate": 4.455721242469372e-07, + "logits/chosen": -0.3616677224636078, + "logits/rejected": -0.3575963079929352, + "logps/chosen": -183.1385955810547, + "logps/ref_chosen": -75.4022216796875, + "logps/ref_rejected": -114.80821990966797, + "logps/rejected": -323.80096435546875, + "loss": 1.0822, + "margin_dpo/margin_mean": 101.25636291503906, + "margin_dpo/margin_std": 141.83740234375, + "step": 201 + }, + { + "KL/chosen_KL_mean": -111.09174346923828, + "KL/mean": -147.0572509765625, + "KL/rejected_KL_mean": -183.02273559570312, + "KL/std": 109.2310791015625, + "epoch": 0.2966226138032305, + "fcm_dpo/beta": 0.004360673949122429, + "fcm_dpo/delta": 0.08905763924121857, + "fcm_dpo/margin": 71.93099975585938, + "fcm_dpo/q_t": 0.4304364323616028, + "grad_norm": 21.131303787231445, + "learning_rate": 4.4477014363141755e-07, + "logits/chosen": -0.3155418336391449, + "logits/rejected": -0.32926225662231445, + "logps/chosen": -161.19305419921875, + "logps/ref_chosen": -50.101318359375, + "logps/ref_rejected": -86.98503112792969, + "logps/rejected": -270.0077819824219, + "loss": 1.1852, + "margin_dpo/margin_mean": 71.93099975585938, + "margin_dpo/margin_std": 141.54080200195312, + "step": 202 + }, + { + "KL/chosen_KL_mean": -109.16009521484375, + "KL/mean": -153.531005859375, + "KL/rejected_KL_mean": -197.90194702148438, + "KL/std": 110.72138977050781, + "epoch": 0.29809104258443464, + "fcm_dpo/beta": 0.004380302503705025, + "fcm_dpo/delta": 0.011728717014193535, + "fcm_dpo/margin": 88.74185180664062, + "fcm_dpo/q_t": 0.41108816862106323, + "grad_norm": 20.558147430419922, + "learning_rate": 4.439630306414758e-07, + "logits/chosen": -0.3222927153110504, + "logits/rejected": -0.3095286190509796, + "logps/chosen": -169.769775390625, + "logps/ref_chosen": -60.60969543457031, + "logps/ref_rejected": -85.89596557617188, + "logps/rejected": -283.79791259765625, + "loss": 1.1006, + "margin_dpo/margin_mean": 88.74185180664062, + "margin_dpo/margin_std": 121.73361206054688, + "step": 203 + }, + { + "KL/chosen_KL_mean": -121.08845520019531, + "KL/mean": -162.0384521484375, + "KL/rejected_KL_mean": -202.98841857910156, + "KL/std": 120.96675109863281, + "epoch": 0.29955947136563876, + "fcm_dpo/beta": 0.0044115157797932625, + "fcm_dpo/delta": 0.04016388952732086, + "fcm_dpo/margin": 81.89998626708984, + "fcm_dpo/q_t": 0.4205179810523987, + "grad_norm": 21.446792602539062, + "learning_rate": 4.431508065452897e-07, + "logits/chosen": -0.42557811737060547, + "logits/rejected": -0.3860868215560913, + "logps/chosen": -201.25341796875, + "logps/ref_chosen": -80.16496276855469, + "logps/ref_rejected": -87.69590759277344, + "logps/rejected": -290.684326171875, + "loss": 1.1481, + "margin_dpo/margin_mean": 81.89998626708984, + "margin_dpo/margin_std": 141.7510528564453, + "step": 204 + }, + { + "KL/chosen_KL_mean": -117.55274200439453, + "KL/mean": -171.8702392578125, + "KL/rejected_KL_mean": -226.18775939941406, + "KL/std": 123.94536590576172, + "epoch": 0.3010279001468429, + "fcm_dpo/beta": 0.0043370481580495834, + "fcm_dpo/delta": -0.07669728994369507, + "fcm_dpo/margin": 108.635009765625, + "fcm_dpo/q_t": 0.39190131425857544, + "grad_norm": 22.160913467407227, + "learning_rate": 4.4233349274571974e-07, + "logits/chosen": -0.3477054834365845, + "logits/rejected": -0.3181983232498169, + "logps/chosen": -176.93746948242188, + "logps/ref_chosen": -59.384735107421875, + "logps/ref_rejected": -85.12505340576172, + "logps/rejected": -311.31280517578125, + "loss": 1.0557, + "margin_dpo/margin_mean": 108.635009765625, + "margin_dpo/margin_std": 135.24069213867188, + "step": 205 + }, + { + "KL/chosen_KL_mean": -107.3229751586914, + "KL/mean": -165.88604736328125, + "KL/rejected_KL_mean": -224.44912719726562, + "KL/std": 115.81060791015625, + "epoch": 0.302496328928047, + "fcm_dpo/beta": 0.004273426253348589, + "fcm_dpo/delta": -0.10639244318008423, + "fcm_dpo/margin": 117.12614440917969, + "fcm_dpo/q_t": 0.3835982382297516, + "grad_norm": 24.27658462524414, + "learning_rate": 4.415111107797445e-07, + "logits/chosen": -0.25635069608688354, + "logits/rejected": -0.2581319212913513, + "logps/chosen": -154.2874755859375, + "logps/ref_chosen": -46.964500427246094, + "logps/ref_rejected": -98.9534912109375, + "logps/rejected": -323.4026184082031, + "loss": 1.0153, + "margin_dpo/margin_mean": 117.12614440917969, + "margin_dpo/margin_std": 126.34098052978516, + "step": 206 + }, + { + "KL/chosen_KL_mean": -99.67330932617188, + "KL/mean": -166.9073028564453, + "KL/rejected_KL_mean": -234.1413116455078, + "KL/std": 132.98760986328125, + "epoch": 0.3039647577092511, + "fcm_dpo/beta": 0.0041627888567745686, + "fcm_dpo/delta": -0.1690816581249237, + "fcm_dpo/margin": 134.46800231933594, + "fcm_dpo/q_t": 0.37616121768951416, + "grad_norm": 23.397769927978516, + "learning_rate": 4.4068368231789365e-07, + "logits/chosen": -0.3611038029193878, + "logits/rejected": -0.3351825773715973, + "logps/chosen": -155.7295684814453, + "logps/ref_chosen": -56.05625915527344, + "logps/ref_rejected": -84.44779968261719, + "logps/rejected": -318.589111328125, + "loss": 0.993, + "margin_dpo/margin_mean": 134.46800231933594, + "margin_dpo/margin_std": 155.12615966796875, + "step": 207 + }, + { + "KL/chosen_KL_mean": -153.80804443359375, + "KL/mean": -207.006591796875, + "KL/rejected_KL_mean": -260.2052001953125, + "KL/std": 128.0139617919922, + "epoch": 0.3054331864904552, + "fcm_dpo/beta": 0.004091139882802963, + "fcm_dpo/delta": -0.037054985761642456, + "fcm_dpo/margin": 106.39715576171875, + "fcm_dpo/q_t": 0.40143436193466187, + "grad_norm": 26.311290740966797, + "learning_rate": 4.398512291636768e-07, + "logits/chosen": -0.37917637825012207, + "logits/rejected": -0.3616452217102051, + "logps/chosen": -220.87564086914062, + "logps/ref_chosen": -67.06761169433594, + "logps/ref_rejected": -94.28689575195312, + "logps/rejected": -354.4920654296875, + "loss": 1.0912, + "margin_dpo/margin_mean": 106.39714813232422, + "margin_dpo/margin_std": 155.67181396484375, + "step": 208 + }, + { + "KL/chosen_KL_mean": -130.76773071289062, + "KL/mean": -177.64361572265625, + "KL/rejected_KL_mean": -224.51953125, + "KL/std": 116.13549041748047, + "epoch": 0.3069016152716593, + "fcm_dpo/beta": 0.004103041719645262, + "fcm_dpo/delta": 0.01593739353120327, + "fcm_dpo/margin": 93.7518081665039, + "fcm_dpo/q_t": 0.4129374623298645, + "grad_norm": 29.005294799804688, + "learning_rate": 4.3901377325300857e-07, + "logits/chosen": -0.28290677070617676, + "logits/rejected": -0.27193692326545715, + "logps/chosen": -186.9494171142578, + "logps/ref_chosen": -56.18169403076172, + "logps/ref_rejected": -80.94152069091797, + "logps/rejected": -305.4610595703125, + "loss": 1.1266, + "margin_dpo/margin_mean": 93.75180053710938, + "margin_dpo/margin_std": 147.88401794433594, + "step": 209 + }, + { + "KL/chosen_KL_mean": -119.01356506347656, + "KL/mean": -173.27197265625, + "KL/rejected_KL_mean": -227.5303955078125, + "KL/std": 119.99595642089844, + "epoch": 0.30837004405286345, + "fcm_dpo/beta": 0.0040941243059933186, + "fcm_dpo/delta": -0.04651525244116783, + "fcm_dpo/margin": 108.5168228149414, + "fcm_dpo/q_t": 0.400329053401947, + "grad_norm": 26.729955673217773, + "learning_rate": 4.381713366536311e-07, + "logits/chosen": -0.2923848628997803, + "logits/rejected": -0.2843049168586731, + "logps/chosen": -165.3853759765625, + "logps/ref_chosen": -46.371822357177734, + "logps/ref_rejected": -76.68162536621094, + "logps/rejected": -304.2120056152344, + "loss": 1.0743, + "margin_dpo/margin_mean": 108.51681518554688, + "margin_dpo/margin_std": 146.48370361328125, + "step": 210 + }, + { + "KL/chosen_KL_mean": -171.04937744140625, + "KL/mean": -217.49700927734375, + "KL/rejected_KL_mean": -263.94464111328125, + "KL/std": 137.9586944580078, + "epoch": 0.30983847283406757, + "fcm_dpo/beta": 0.004082635045051575, + "fcm_dpo/delta": 0.021537447348237038, + "fcm_dpo/margin": 92.89524841308594, + "fcm_dpo/q_t": 0.41833657026290894, + "grad_norm": 36.12271499633789, + "learning_rate": 4.373239415645323e-07, + "logits/chosen": -0.3229391574859619, + "logits/rejected": -0.2848234474658966, + "logps/chosen": -249.98175048828125, + "logps/ref_chosen": -78.93235778808594, + "logps/ref_rejected": -86.82098388671875, + "logps/rejected": -350.765625, + "loss": 1.1436, + "margin_dpo/margin_mean": 92.89524841308594, + "margin_dpo/margin_std": 161.31678771972656, + "step": 211 + }, + { + "KL/chosen_KL_mean": -140.74990844726562, + "KL/mean": -206.79635620117188, + "KL/rejected_KL_mean": -272.84283447265625, + "KL/std": 147.50723266601562, + "epoch": 0.31130690161527164, + "fcm_dpo/beta": 0.003975285217165947, + "fcm_dpo/delta": -0.13405390083789825, + "fcm_dpo/margin": 132.0928955078125, + "fcm_dpo/q_t": 0.3824244737625122, + "grad_norm": 24.483768463134766, + "learning_rate": 4.3647161031536086e-07, + "logits/chosen": -0.2860155701637268, + "logits/rejected": -0.27637436985969543, + "logps/chosen": -198.94692993164062, + "logps/ref_chosen": -58.19701385498047, + "logps/ref_rejected": -103.05785369873047, + "logps/rejected": -375.9006652832031, + "loss": 1.0326, + "margin_dpo/margin_mean": 132.0928955078125, + "margin_dpo/margin_std": 163.81600952148438, + "step": 212 + }, + { + "KL/chosen_KL_mean": -130.84754943847656, + "KL/mean": -193.90252685546875, + "KL/rejected_KL_mean": -256.9574890136719, + "KL/std": 128.05874633789062, + "epoch": 0.31277533039647576, + "fcm_dpo/beta": 0.0039049675688147545, + "fcm_dpo/delta": -0.09785507619380951, + "fcm_dpo/margin": 126.10995483398438, + "fcm_dpo/q_t": 0.38795384764671326, + "grad_norm": 35.1922607421875, + "learning_rate": 4.3561436536583774e-07, + "logits/chosen": -0.3229708671569824, + "logits/rejected": -0.29631006717681885, + "logps/chosen": -198.36026000976562, + "logps/ref_chosen": -67.51271057128906, + "logps/ref_rejected": -93.91471862792969, + "logps/rejected": -350.8721923828125, + "loss": 1.0354, + "margin_dpo/margin_mean": 126.10995483398438, + "margin_dpo/margin_std": 152.4547119140625, + "step": 213 + }, + { + "KL/chosen_KL_mean": -112.09881591796875, + "KL/mean": -168.9818115234375, + "KL/rejected_KL_mean": -225.86480712890625, + "KL/std": 126.0855712890625, + "epoch": 0.3142437591776799, + "fcm_dpo/beta": 0.003874241840094328, + "fcm_dpo/delta": -0.04264100641012192, + "fcm_dpo/margin": 113.76599884033203, + "fcm_dpo/q_t": 0.4002940356731415, + "grad_norm": 24.354455947875977, + "learning_rate": 4.3475222930516473e-07, + "logits/chosen": -0.23961499333381653, + "logits/rejected": -0.24341589212417603, + "logps/chosen": -153.70370483398438, + "logps/ref_chosen": -41.604888916015625, + "logps/ref_rejected": -77.51741027832031, + "logps/rejected": -303.3822326660156, + "loss": 1.0675, + "margin_dpo/margin_mean": 113.76599884033203, + "margin_dpo/margin_std": 146.8253173828125, + "step": 214 + }, + { + "KL/chosen_KL_mean": -131.69699096679688, + "KL/mean": -190.83827209472656, + "KL/rejected_KL_mean": -249.97955322265625, + "KL/std": 130.4374237060547, + "epoch": 0.315712187958884, + "fcm_dpo/beta": 0.0038247781340032816, + "fcm_dpo/delta": -0.05516364052891731, + "fcm_dpo/margin": 118.28255462646484, + "fcm_dpo/q_t": 0.3958727717399597, + "grad_norm": 24.70524787902832, + "learning_rate": 4.3388522485142885e-07, + "logits/chosen": -0.2865249514579773, + "logits/rejected": -0.2768559455871582, + "logps/chosen": -184.97625732421875, + "logps/ref_chosen": -53.279266357421875, + "logps/ref_rejected": -89.96464538574219, + "logps/rejected": -339.9442138671875, + "loss": 1.0476, + "margin_dpo/margin_mean": 118.28255462646484, + "margin_dpo/margin_std": 136.10702514648438, + "step": 215 + }, + { + "KL/chosen_KL_mean": -133.49032592773438, + "KL/mean": -192.19558715820312, + "KL/rejected_KL_mean": -250.90084838867188, + "KL/std": 134.2762451171875, + "epoch": 0.31718061674008813, + "fcm_dpo/beta": 0.0038109051529318094, + "fcm_dpo/delta": -0.04979248717427254, + "fcm_dpo/margin": 117.41053009033203, + "fcm_dpo/q_t": 0.3997488021850586, + "grad_norm": 23.230796813964844, + "learning_rate": 4.330133748510036e-07, + "logits/chosen": -0.3021494150161743, + "logits/rejected": -0.28650131821632385, + "logps/chosen": -182.37811279296875, + "logps/ref_chosen": -48.887794494628906, + "logps/ref_rejected": -77.19892883300781, + "logps/rejected": -328.09979248046875, + "loss": 1.0813, + "margin_dpo/margin_mean": 117.4105224609375, + "margin_dpo/margin_std": 166.27999877929688, + "step": 216 + }, + { + "KL/chosen_KL_mean": -135.45867919921875, + "KL/mean": -203.8918914794922, + "KL/rejected_KL_mean": -272.3250732421875, + "KL/std": 136.49188232421875, + "epoch": 0.3186490455212922, + "fcm_dpo/beta": 0.003715306520462036, + "fcm_dpo/delta": -0.11458480358123779, + "fcm_dpo/margin": 136.8664093017578, + "fcm_dpo/q_t": 0.3847663104534149, + "grad_norm": 21.052268981933594, + "learning_rate": 4.3213670227794757e-07, + "logits/chosen": -0.270561158657074, + "logits/rejected": -0.26448899507522583, + "logps/chosen": -185.30398559570312, + "logps/ref_chosen": -49.845306396484375, + "logps/ref_rejected": -100.07832336425781, + "logps/rejected": -372.4034118652344, + "loss": 1.0148, + "margin_dpo/margin_mean": 136.8664093017578, + "margin_dpo/margin_std": 154.27218627929688, + "step": 217 + }, + { + "KL/chosen_KL_mean": -143.15106201171875, + "KL/mean": -196.36883544921875, + "KL/rejected_KL_mean": -249.58657836914062, + "KL/std": 135.85345458984375, + "epoch": 0.3201174743024963, + "fcm_dpo/beta": 0.0036980193108320236, + "fcm_dpo/delta": 0.0066223908215761185, + "fcm_dpo/margin": 106.43551635742188, + "fcm_dpo/q_t": 0.41142043471336365, + "grad_norm": 20.975133895874023, + "learning_rate": 4.3125523023339815e-07, + "logits/chosen": -0.2958596646785736, + "logits/rejected": -0.28984978795051575, + "logps/chosen": -201.72775268554688, + "logps/ref_chosen": -58.576683044433594, + "logps/ref_rejected": -87.84639739990234, + "logps/rejected": -337.4329833984375, + "loss": 1.1123, + "margin_dpo/margin_mean": 106.43551635742188, + "margin_dpo/margin_std": 159.1250457763672, + "step": 218 + }, + { + "KL/chosen_KL_mean": -152.82916259765625, + "KL/mean": -199.12118530273438, + "KL/rejected_KL_mean": -245.41322326660156, + "KL/std": 141.51058959960938, + "epoch": 0.32158590308370044, + "fcm_dpo/beta": 0.0037533333525061607, + "fcm_dpo/delta": 0.05362574756145477, + "fcm_dpo/margin": 92.58407592773438, + "fcm_dpo/q_t": 0.4223693311214447, + "grad_norm": 29.867891311645508, + "learning_rate": 4.303689819449636e-07, + "logits/chosen": -0.31183797121047974, + "logits/rejected": -0.30414023995399475, + "logps/chosen": -213.9130096435547, + "logps/ref_chosen": -61.083858489990234, + "logps/ref_rejected": -85.83042907714844, + "logps/rejected": -331.24365234375, + "loss": 1.1677, + "margin_dpo/margin_mean": 92.58406066894531, + "margin_dpo/margin_std": 172.917236328125, + "step": 219 + }, + { + "KL/chosen_KL_mean": -172.810791015625, + "KL/mean": -211.48831176757812, + "KL/rejected_KL_mean": -250.16583251953125, + "KL/std": 122.16136169433594, + "epoch": 0.32305433186490456, + "fcm_dpo/beta": 0.0037990869022905827, + "fcm_dpo/delta": 0.10947298258543015, + "fcm_dpo/margin": 77.35502624511719, + "fcm_dpo/q_t": 0.43133461475372314, + "grad_norm": 25.090055465698242, + "learning_rate": 4.2947798076611047e-07, + "logits/chosen": -0.2918081283569336, + "logits/rejected": -0.26791825890541077, + "logps/chosen": -242.8420867919922, + "logps/ref_chosen": -70.03128051757812, + "logps/ref_rejected": -87.68551635742188, + "logps/rejected": -337.8513488769531, + "loss": 1.1694, + "margin_dpo/margin_mean": 77.35502624511719, + "margin_dpo/margin_std": 126.34854888916016, + "step": 220 + }, + { + "KL/chosen_KL_mean": -147.82846069335938, + "KL/mean": -233.2677764892578, + "KL/rejected_KL_mean": -318.70709228515625, + "KL/std": 156.06951904296875, + "epoch": 0.3245227606461087, + "fcm_dpo/beta": 0.003700793255120516, + "fcm_dpo/delta": -0.24753707647323608, + "fcm_dpo/margin": 170.87864685058594, + "fcm_dpo/q_t": 0.35590487718582153, + "grad_norm": 26.41898536682129, + "learning_rate": 4.285822501755485e-07, + "logits/chosen": -0.2831432819366455, + "logits/rejected": -0.2893243730068207, + "logps/chosen": -199.983154296875, + "logps/ref_chosen": -52.15470886230469, + "logps/ref_rejected": -106.46768188476562, + "logps/rejected": -425.1748046875, + "loss": 0.9327, + "margin_dpo/margin_mean": 170.878662109375, + "margin_dpo/margin_std": 160.57461547851562, + "step": 221 + }, + { + "KL/chosen_KL_mean": -155.30770874023438, + "KL/mean": -219.45260620117188, + "KL/rejected_KL_mean": -283.5975341796875, + "KL/std": 144.82785034179688, + "epoch": 0.32599118942731276, + "fcm_dpo/beta": 0.003622027114033699, + "fcm_dpo/delta": -0.06790776550769806, + "fcm_dpo/margin": 128.28977966308594, + "fcm_dpo/q_t": 0.39438772201538086, + "grad_norm": 20.189659118652344, + "learning_rate": 4.276818137766118e-07, + "logits/chosen": -0.31003278493881226, + "logits/rejected": -0.31088048219680786, + "logps/chosen": -216.27880859375, + "logps/ref_chosen": -60.971099853515625, + "logps/ref_rejected": -100.00115203857422, + "logps/rejected": -383.5986633300781, + "loss": 1.0542, + "margin_dpo/margin_mean": 128.28977966308594, + "margin_dpo/margin_std": 161.27833557128906, + "step": 222 + }, + { + "KL/chosen_KL_mean": -164.61181640625, + "KL/mean": -218.14041137695312, + "KL/rejected_KL_mean": -271.66900634765625, + "KL/std": 140.00912475585938, + "epoch": 0.3274596182085169, + "fcm_dpo/beta": 0.003598616225644946, + "fcm_dpo/delta": 0.01532889436930418, + "fcm_dpo/margin": 107.05719757080078, + "fcm_dpo/q_t": 0.4134790301322937, + "grad_norm": 24.665771484375, + "learning_rate": 4.2677669529663686e-07, + "logits/chosen": -0.252638041973114, + "logits/rejected": -0.24833783507347107, + "logps/chosen": -217.2523956298828, + "logps/ref_chosen": -52.64057540893555, + "logps/ref_rejected": -82.82502746582031, + "logps/rejected": -354.4940185546875, + "loss": 1.1327, + "margin_dpo/margin_mean": 107.05718994140625, + "margin_dpo/margin_std": 178.87998962402344, + "step": 223 + }, + { + "KL/chosen_KL_mean": -146.0878143310547, + "KL/mean": -208.273681640625, + "KL/rejected_KL_mean": -270.4595642089844, + "KL/std": 157.24331665039062, + "epoch": 0.328928046989721, + "fcm_dpo/beta": 0.003553580492734909, + "fcm_dpo/delta": -0.04542648792266846, + "fcm_dpo/margin": 124.37174987792969, + "fcm_dpo/q_t": 0.4029054641723633, + "grad_norm": 26.398706436157227, + "learning_rate": 4.2586691858633747e-07, + "logits/chosen": -0.27615243196487427, + "logits/rejected": -0.25664016604423523, + "logps/chosen": -194.6832275390625, + "logps/ref_chosen": -48.59541320800781, + "logps/ref_rejected": -77.11648559570312, + "logps/rejected": -347.5760498046875, + "loss": 1.089, + "margin_dpo/margin_mean": 124.37174987792969, + "margin_dpo/margin_std": 181.30401611328125, + "step": 224 + }, + { + "KL/chosen_KL_mean": -169.44223022460938, + "KL/mean": -242.9072265625, + "KL/rejected_KL_mean": -316.37225341796875, + "KL/std": 153.35971069335938, + "epoch": 0.3303964757709251, + "fcm_dpo/beta": 0.003504401072859764, + "fcm_dpo/delta": -0.12130744010210037, + "fcm_dpo/margin": 146.93002319335938, + "fcm_dpo/q_t": 0.38479962944984436, + "grad_norm": 20.279191970825195, + "learning_rate": 4.249525076191759e-07, + "logits/chosen": -0.2664515972137451, + "logits/rejected": -0.2539185881614685, + "logps/chosen": -227.44268798828125, + "logps/ref_chosen": -58.000465393066406, + "logps/ref_rejected": -99.90291595458984, + "logps/rejected": -416.275146484375, + "loss": 1.0314, + "margin_dpo/margin_mean": 146.93002319335938, + "margin_dpo/margin_std": 186.25558471679688, + "step": 225 + }, + { + "KL/chosen_KL_mean": -140.94992065429688, + "KL/mean": -200.4373321533203, + "KL/rejected_KL_mean": -259.92474365234375, + "KL/std": 148.1170654296875, + "epoch": 0.33186490455212925, + "fcm_dpo/beta": 0.0034582829102873802, + "fcm_dpo/delta": -0.01282452791929245, + "fcm_dpo/margin": 118.97482299804688, + "fcm_dpo/q_t": 0.4084652364253998, + "grad_norm": 25.379802703857422, + "learning_rate": 4.2403348649073167e-07, + "logits/chosen": -0.35269731283187866, + "logits/rejected": -0.31248384714126587, + "logps/chosen": -199.84872436523438, + "logps/ref_chosen": -58.898799896240234, + "logps/ref_rejected": -78.68775939941406, + "logps/rejected": -338.61248779296875, + "loss": 1.0981, + "margin_dpo/margin_mean": 118.9748306274414, + "margin_dpo/margin_std": 169.6277313232422, + "step": 226 + }, + { + "KL/chosen_KL_mean": -163.4235382080078, + "KL/mean": -235.5128173828125, + "KL/rejected_KL_mean": -307.60211181640625, + "KL/std": 166.22787475585938, + "epoch": 0.3333333333333333, + "fcm_dpo/beta": 0.0034146863035857677, + "fcm_dpo/delta": -0.09776041656732559, + "fcm_dpo/margin": 144.17855834960938, + "fcm_dpo/q_t": 0.38793981075286865, + "grad_norm": 25.049428939819336, + "learning_rate": 4.2310987941806615e-07, + "logits/chosen": -0.37827032804489136, + "logits/rejected": -0.3669503331184387, + "logps/chosen": -222.4957275390625, + "logps/ref_chosen": -59.072181701660156, + "logps/ref_rejected": -99.41236877441406, + "logps/rejected": -407.0144958496094, + "loss": 1.0323, + "margin_dpo/margin_mean": 144.17855834960938, + "margin_dpo/margin_std": 172.8520050048828, + "step": 227 + }, + { + "KL/chosen_KL_mean": -163.7899169921875, + "KL/mean": -215.34075927734375, + "KL/rejected_KL_mean": -266.8916015625, + "KL/std": 135.03140258789062, + "epoch": 0.33480176211453744, + "fcm_dpo/beta": 0.0034392657689750195, + "fcm_dpo/delta": 0.046966154128313065, + "fcm_dpo/margin": 103.10169982910156, + "fcm_dpo/q_t": 0.4187896251678467, + "grad_norm": 24.645570755004883, + "learning_rate": 4.2218171073908463e-07, + "logits/chosen": -0.35644814372062683, + "logits/rejected": -0.3388446569442749, + "logps/chosen": -229.6811981201172, + "logps/ref_chosen": -65.89128875732422, + "logps/ref_rejected": -91.04875183105469, + "logps/rejected": -357.94036865234375, + "loss": 1.139, + "margin_dpo/margin_mean": 103.1017074584961, + "margin_dpo/margin_std": 165.435546875, + "step": 228 + }, + { + "KL/chosen_KL_mean": -158.61346435546875, + "KL/mean": -213.9613037109375, + "KL/rejected_KL_mean": -269.30914306640625, + "KL/std": 150.13076782226562, + "epoch": 0.33627019089574156, + "fcm_dpo/beta": 0.003454534336924553, + "fcm_dpo/delta": 0.018272558227181435, + "fcm_dpo/margin": 110.69569396972656, + "fcm_dpo/q_t": 0.41169029474258423, + "grad_norm": 30.438304901123047, + "learning_rate": 4.212490049118951e-07, + "logits/chosen": -0.44706737995147705, + "logits/rejected": -0.41668009757995605, + "logps/chosen": -229.31982421875, + "logps/ref_chosen": -70.70637512207031, + "logps/ref_rejected": -84.52741241455078, + "logps/rejected": -353.8365478515625, + "loss": 1.1093, + "margin_dpo/margin_mean": 110.69569396972656, + "margin_dpo/margin_std": 158.08216857910156, + "step": 229 + }, + { + "KL/chosen_KL_mean": -118.41344451904297, + "KL/mean": -198.5885772705078, + "KL/rejected_KL_mean": -278.76373291015625, + "KL/std": 142.54483032226562, + "epoch": 0.3377386196769457, + "fcm_dpo/beta": 0.0033752424642443657, + "fcm_dpo/delta": -0.1495116651058197, + "fcm_dpo/margin": 160.35025024414062, + "fcm_dpo/q_t": 0.37446969747543335, + "grad_norm": 34.35503005981445, + "learning_rate": 4.203117865141635e-07, + "logits/chosen": -0.33029061555862427, + "logits/rejected": -0.3350130319595337, + "logps/chosen": -157.69544982910156, + "logps/ref_chosen": -39.282005310058594, + "logps/ref_rejected": -85.62191009521484, + "logps/rejected": -364.3856201171875, + "loss": 0.9808, + "margin_dpo/margin_mean": 160.35025024414062, + "margin_dpo/margin_std": 157.2830810546875, + "step": 230 + }, + { + "KL/chosen_KL_mean": -144.67678833007812, + "KL/mean": -201.7078094482422, + "KL/rejected_KL_mean": -258.7388610839844, + "KL/std": 129.312255859375, + "epoch": 0.3392070484581498, + "fcm_dpo/beta": 0.0033622784540057182, + "fcm_dpo/delta": 0.017147505655884743, + "fcm_dpo/margin": 114.06205749511719, + "fcm_dpo/q_t": 0.41282835602760315, + "grad_norm": 26.74052619934082, + "learning_rate": 4.1937008024246625e-07, + "logits/chosen": -0.4000471532344818, + "logits/rejected": -0.3726590871810913, + "logps/chosen": -207.95321655273438, + "logps/ref_chosen": -63.27644348144531, + "logps/ref_rejected": -74.1239013671875, + "logps/rejected": -332.86273193359375, + "loss": 1.0974, + "margin_dpo/margin_mean": 114.06205749511719, + "margin_dpo/margin_std": 149.9913330078125, + "step": 231 + }, + { + "KL/chosen_KL_mean": -183.89407348632812, + "KL/mean": -230.66317749023438, + "KL/rejected_KL_mean": -277.4322814941406, + "KL/std": 155.87942504882812, + "epoch": 0.3406754772393539, + "fcm_dpo/beta": 0.0034015290439128876, + "fcm_dpo/delta": 0.0846027284860611, + "fcm_dpo/margin": 93.53819274902344, + "fcm_dpo/q_t": 0.4293164014816284, + "grad_norm": 25.34986686706543, + "learning_rate": 4.1842391091163933e-07, + "logits/chosen": -0.3858756422996521, + "logits/rejected": -0.3642328381538391, + "logps/chosen": -254.642822265625, + "logps/ref_chosen": -70.74876403808594, + "logps/ref_rejected": -83.97706604003906, + "logps/rejected": -361.40936279296875, + "loss": 1.1607, + "margin_dpo/margin_mean": 93.53819274902344, + "margin_dpo/margin_std": 159.80599975585938, + "step": 232 + }, + { + "KL/chosen_KL_mean": -168.03411865234375, + "KL/mean": -241.22540283203125, + "KL/rejected_KL_mean": -314.41668701171875, + "KL/std": 164.896728515625, + "epoch": 0.342143906020558, + "fcm_dpo/beta": 0.0033752245362848043, + "fcm_dpo/delta": -0.09889530390501022, + "fcm_dpo/margin": 146.382568359375, + "fcm_dpo/q_t": 0.3929908871650696, + "grad_norm": 26.03354835510254, + "learning_rate": 4.174733034541245e-07, + "logits/chosen": -0.38083887100219727, + "logits/rejected": -0.384868860244751, + "logps/chosen": -222.91705322265625, + "logps/ref_chosen": -54.8829345703125, + "logps/ref_rejected": -107.4800796508789, + "logps/rejected": -421.89678955078125, + "loss": 1.0648, + "margin_dpo/margin_mean": 146.382568359375, + "margin_dpo/margin_std": 210.76010131835938, + "step": 233 + }, + { + "KL/chosen_KL_mean": -162.310791015625, + "KL/mean": -236.50253295898438, + "KL/rejected_KL_mean": -310.69427490234375, + "KL/std": 145.2280731201172, + "epoch": 0.3436123348017621, + "fcm_dpo/beta": 0.0032870229333639145, + "fcm_dpo/delta": -0.09375564754009247, + "fcm_dpo/margin": 148.3834686279297, + "fcm_dpo/q_t": 0.38880789279937744, + "grad_norm": 51.05461502075195, + "learning_rate": 4.165182829193126e-07, + "logits/chosen": -0.33859947323799133, + "logits/rejected": -0.36448922753334045, + "logps/chosen": -206.4053192138672, + "logps/ref_chosen": -44.094520568847656, + "logps/ref_rejected": -100.00663757324219, + "logps/rejected": -410.7008972167969, + "loss": 1.0276, + "margin_dpo/margin_mean": 148.3834686279297, + "margin_dpo/margin_std": 165.46307373046875, + "step": 234 + }, + { + "KL/chosen_KL_mean": -193.5567626953125, + "KL/mean": -242.59243774414062, + "KL/rejected_KL_mean": -291.62811279296875, + "KL/std": 139.42709350585938, + "epoch": 0.34508076358296624, + "fcm_dpo/beta": 0.0033407763112336397, + "fcm_dpo/delta": 0.07434496283531189, + "fcm_dpo/margin": 98.07133483886719, + "fcm_dpo/q_t": 0.42465054988861084, + "grad_norm": 30.565860748291016, + "learning_rate": 4.1555887447288255e-07, + "logits/chosen": -0.4063182473182678, + "logits/rejected": -0.388034462928772, + "logps/chosen": -255.794677734375, + "logps/ref_chosen": -62.237911224365234, + "logps/ref_rejected": -90.39506530761719, + "logps/rejected": -382.0231628417969, + "loss": 1.1602, + "margin_dpo/margin_mean": 98.07133483886719, + "margin_dpo/margin_std": 167.26129150390625, + "step": 235 + }, + { + "KL/chosen_KL_mean": -141.887451171875, + "KL/mean": -219.4722900390625, + "KL/rejected_KL_mean": -297.0571594238281, + "KL/std": 149.41790771484375, + "epoch": 0.3465491923641703, + "fcm_dpo/beta": 0.003285345621407032, + "fcm_dpo/delta": -0.11568379402160645, + "fcm_dpo/margin": 155.1697235107422, + "fcm_dpo/q_t": 0.38063254952430725, + "grad_norm": 56.562007904052734, + "learning_rate": 4.1459510339613946e-07, + "logits/chosen": -0.3532152771949768, + "logits/rejected": -0.35226863622665405, + "logps/chosen": -191.2288055419922, + "logps/ref_chosen": -49.34136199951172, + "logps/ref_rejected": -103.51162719726562, + "logps/rejected": -400.56878662109375, + "loss": 0.9904, + "margin_dpo/margin_mean": 155.1697235107422, + "margin_dpo/margin_std": 141.39013671875, + "step": 236 + }, + { + "KL/chosen_KL_mean": -188.53512573242188, + "KL/mean": -248.512451171875, + "KL/rejected_KL_mean": -308.48980712890625, + "KL/std": 152.22921752929688, + "epoch": 0.34801762114537443, + "fcm_dpo/beta": 0.0032739704474806786, + "fcm_dpo/delta": 0.00752119068056345, + "fcm_dpo/margin": 119.95466613769531, + "fcm_dpo/q_t": 0.4108693599700928, + "grad_norm": 27.002674102783203, + "learning_rate": 4.136269950853473e-07, + "logits/chosen": -0.3559209108352661, + "logits/rejected": -0.3510690927505493, + "logps/chosen": -242.7032470703125, + "logps/ref_chosen": -54.168121337890625, + "logps/ref_rejected": -94.78036499023438, + "logps/rejected": -403.2701416015625, + "loss": 1.1075, + "margin_dpo/margin_mean": 119.95466613769531, + "margin_dpo/margin_std": 175.58291625976562, + "step": 237 + }, + { + "KL/chosen_KL_mean": -165.1414794921875, + "KL/mean": -224.89373779296875, + "KL/rejected_KL_mean": -284.64599609375, + "KL/std": 151.98275756835938, + "epoch": 0.34948604992657856, + "fcm_dpo/beta": 0.0032602387946099043, + "fcm_dpo/delta": 0.01038980484008789, + "fcm_dpo/margin": 119.50453186035156, + "fcm_dpo/q_t": 0.4138892889022827, + "grad_norm": 23.35243034362793, + "learning_rate": 4.126545750510605e-07, + "logits/chosen": -0.329600989818573, + "logits/rejected": -0.3452579975128174, + "logps/chosen": -219.11459350585938, + "logps/ref_chosen": -53.973121643066406, + "logps/ref_rejected": -89.41795349121094, + "logps/rejected": -374.06396484375, + "loss": 1.1065, + "margin_dpo/margin_mean": 119.50453186035156, + "margin_dpo/margin_std": 172.2950439453125, + "step": 238 + }, + { + "KL/chosen_KL_mean": -174.6372833251953, + "KL/mean": -240.18380737304688, + "KL/rejected_KL_mean": -305.7303466796875, + "KL/std": 140.24794006347656, + "epoch": 0.3509544787077827, + "fcm_dpo/beta": 0.003244359279051423, + "fcm_dpo/delta": -0.02737080305814743, + "fcm_dpo/margin": 131.09304809570312, + "fcm_dpo/q_t": 0.4014323949813843, + "grad_norm": 35.462642669677734, + "learning_rate": 4.116778689174514e-07, + "logits/chosen": -0.35082727670669556, + "logits/rejected": -0.33832210302352905, + "logps/chosen": -232.735107421875, + "logps/ref_chosen": -58.09782409667969, + "logps/ref_rejected": -93.59294128417969, + "logps/rejected": -399.3232727050781, + "loss": 1.0714, + "margin_dpo/margin_mean": 131.09304809570312, + "margin_dpo/margin_std": 158.469970703125, + "step": 239 + }, + { + "KL/chosen_KL_mean": -180.39125061035156, + "KL/mean": -235.38194274902344, + "KL/rejected_KL_mean": -290.3726501464844, + "KL/std": 150.54681396484375, + "epoch": 0.3524229074889868, + "fcm_dpo/beta": 0.003269023261964321, + "fcm_dpo/delta": 0.041884519159793854, + "fcm_dpo/margin": 109.98141479492188, + "fcm_dpo/q_t": 0.4180099368095398, + "grad_norm": 37.75619125366211, + "learning_rate": 4.106969024216348e-07, + "logits/chosen": -0.4051019549369812, + "logits/rejected": -0.38454490900039673, + "logps/chosen": -241.0057373046875, + "logps/ref_chosen": -60.6144905090332, + "logps/ref_rejected": -74.1185302734375, + "logps/rejected": -364.4911804199219, + "loss": 1.1409, + "margin_dpo/margin_mean": 109.98140716552734, + "margin_dpo/margin_std": 178.82452392578125, + "step": 240 + }, + { + "KL/chosen_KL_mean": -149.62850952148438, + "KL/mean": -235.679443359375, + "KL/rejected_KL_mean": -321.7303771972656, + "KL/std": 166.00146484375, + "epoch": 0.35389133627019087, + "fcm_dpo/beta": 0.0032152351923286915, + "fcm_dpo/delta": -0.1620943695306778, + "fcm_dpo/margin": 172.10183715820312, + "fcm_dpo/q_t": 0.3768247365951538, + "grad_norm": 28.70929527282715, + "learning_rate": 4.097117014129903e-07, + "logits/chosen": -0.4123689532279968, + "logits/rejected": -0.38682758808135986, + "logps/chosen": -215.71958923339844, + "logps/ref_chosen": -66.091064453125, + "logps/ref_rejected": -88.06088256835938, + "logps/rejected": -409.791259765625, + "loss": 0.9959, + "margin_dpo/margin_mean": 172.10183715820312, + "margin_dpo/margin_std": 193.75653076171875, + "step": 241 + }, + { + "KL/chosen_KL_mean": -172.19378662109375, + "KL/mean": -234.080078125, + "KL/rejected_KL_mean": -295.96636962890625, + "KL/std": 142.62332153320312, + "epoch": 0.355359765051395, + "fcm_dpo/beta": 0.003198289545252919, + "fcm_dpo/delta": 0.0038902349770069122, + "fcm_dpo/margin": 123.77262115478516, + "fcm_dpo/q_t": 0.41040879487991333, + "grad_norm": 33.44797134399414, + "learning_rate": 4.087222918524807e-07, + "logits/chosen": -0.3743210732936859, + "logits/rejected": -0.3515356183052063, + "logps/chosen": -240.05770874023438, + "logps/ref_chosen": -67.86392974853516, + "logps/ref_rejected": -83.36033630371094, + "logps/rejected": -379.32672119140625, + "loss": 1.097, + "margin_dpo/margin_mean": 123.77262115478516, + "margin_dpo/margin_std": 168.39976501464844, + "step": 242 + }, + { + "KL/chosen_KL_mean": -173.37445068359375, + "KL/mean": -249.24774169921875, + "KL/rejected_KL_mean": -325.12103271484375, + "KL/std": 158.65621948242188, + "epoch": 0.3568281938325991, + "fcm_dpo/beta": 0.0031418318394571543, + "fcm_dpo/delta": -0.08077876269817352, + "fcm_dpo/margin": 151.74655151367188, + "fcm_dpo/q_t": 0.3906528353691101, + "grad_norm": 23.109296798706055, + "learning_rate": 4.07728699811968e-07, + "logits/chosen": -0.3913189172744751, + "logits/rejected": -0.36040928959846497, + "logps/chosen": -236.45870971679688, + "logps/ref_chosen": -63.0842399597168, + "logps/ref_rejected": -76.33563232421875, + "logps/rejected": -401.4566650390625, + "loss": 1.0334, + "margin_dpo/margin_mean": 151.74655151367188, + "margin_dpo/margin_std": 174.57302856445312, + "step": 243 + }, + { + "KL/chosen_KL_mean": -153.2425079345703, + "KL/mean": -229.29331970214844, + "KL/rejected_KL_mean": -305.3441467285156, + "KL/std": 151.1732635498047, + "epoch": 0.35829662261380324, + "fcm_dpo/beta": 0.0030940580181777477, + "fcm_dpo/delta": -0.07428047060966492, + "fcm_dpo/margin": 152.1016387939453, + "fcm_dpo/q_t": 0.3908138573169708, + "grad_norm": 40.61009216308594, + "learning_rate": 4.067309514735267e-07, + "logits/chosen": -0.4778062701225281, + "logits/rejected": -0.4712453782558441, + "logps/chosen": -214.38320922851562, + "logps/ref_chosen": -61.140689849853516, + "logps/ref_rejected": -94.89193725585938, + "logps/rejected": -400.236083984375, + "loss": 1.0244, + "margin_dpo/margin_mean": 152.1016387939453, + "margin_dpo/margin_std": 155.42276000976562, + "step": 244 + }, + { + "KL/chosen_KL_mean": -179.92636108398438, + "KL/mean": -241.31871032714844, + "KL/rejected_KL_mean": -302.7110290527344, + "KL/std": 152.4658660888672, + "epoch": 0.35976505139500736, + "fcm_dpo/beta": 0.0030736280605196953, + "fcm_dpo/delta": 0.022404037415981293, + "fcm_dpo/margin": 122.78466796875, + "fcm_dpo/q_t": 0.41335082054138184, + "grad_norm": 25.50909423828125, + "learning_rate": 4.057290731287531e-07, + "logits/chosen": -0.4174082279205322, + "logits/rejected": -0.3904969394207001, + "logps/chosen": -247.1886444091797, + "logps/ref_chosen": -67.26228332519531, + "logps/ref_rejected": -87.64010620117188, + "logps/rejected": -390.35113525390625, + "loss": 1.1128, + "margin_dpo/margin_mean": 122.78466796875, + "margin_dpo/margin_std": 168.45639038085938, + "step": 245 + }, + { + "KL/chosen_KL_mean": -178.84425354003906, + "KL/mean": -241.6790313720703, + "KL/rejected_KL_mean": -304.5137939453125, + "KL/std": 169.49676513671875, + "epoch": 0.36123348017621143, + "fcm_dpo/beta": 0.003099266439676285, + "fcm_dpo/delta": 0.010860616341233253, + "fcm_dpo/margin": 125.66952514648438, + "fcm_dpo/q_t": 0.4126628637313843, + "grad_norm": 26.56377601623535, + "learning_rate": 4.047230911780736e-07, + "logits/chosen": -0.4388137459754944, + "logits/rejected": -0.40211886167526245, + "logps/chosen": -245.54122924804688, + "logps/ref_chosen": -66.69696807861328, + "logps/ref_rejected": -84.34634399414062, + "logps/rejected": -388.8601379394531, + "loss": 1.1069, + "margin_dpo/margin_mean": 125.66952514648438, + "margin_dpo/margin_std": 182.71670532226562, + "step": 246 + }, + { + "KL/chosen_KL_mean": -211.3436279296875, + "KL/mean": -300.1796875, + "KL/rejected_KL_mean": -389.0157470703125, + "KL/std": 181.46612548828125, + "epoch": 0.36270190895741555, + "fcm_dpo/beta": 0.003036319278180599, + "fcm_dpo/delta": -0.14767590165138245, + "fcm_dpo/margin": 177.672119140625, + "fcm_dpo/q_t": 0.3771999478340149, + "grad_norm": 27.967557907104492, + "learning_rate": 4.0371303213004814e-07, + "logits/chosen": -0.3680022656917572, + "logits/rejected": -0.36837178468704224, + "logps/chosen": -267.948974609375, + "logps/ref_chosen": -56.6053466796875, + "logps/ref_rejected": -106.29326629638672, + "logps/rejected": -495.30902099609375, + "loss": 0.9995, + "margin_dpo/margin_mean": 177.672119140625, + "margin_dpo/margin_std": 196.26101684570312, + "step": 247 + }, + { + "KL/chosen_KL_mean": -183.7578125, + "KL/mean": -260.38360595703125, + "KL/rejected_KL_mean": -337.0093994140625, + "KL/std": 146.3836669921875, + "epoch": 0.3641703377386197, + "fcm_dpo/beta": 0.002979197073727846, + "fcm_dpo/delta": -0.05982068181037903, + "fcm_dpo/margin": 153.2515411376953, + "fcm_dpo/q_t": 0.39190369844436646, + "grad_norm": 25.113601684570312, + "learning_rate": 4.0269892260067197e-07, + "logits/chosen": -0.33221954107284546, + "logits/rejected": -0.3516564965248108, + "logps/chosen": -227.80104064941406, + "logps/ref_chosen": -44.043216705322266, + "logps/ref_rejected": -91.85687255859375, + "logps/rejected": -428.86627197265625, + "loss": 1.0241, + "margin_dpo/margin_mean": 153.25155639648438, + "margin_dpo/margin_std": 141.49179077148438, + "step": 248 + }, + { + "KL/chosen_KL_mean": -234.96240234375, + "KL/mean": -277.1217041015625, + "KL/rejected_KL_mean": -319.281005859375, + "KL/std": 158.48863220214844, + "epoch": 0.3656387665198238, + "fcm_dpo/beta": 0.003046369180083275, + "fcm_dpo/delta": 0.1469813883304596, + "fcm_dpo/margin": 84.31859588623047, + "fcm_dpo/q_t": 0.44190624356269836, + "grad_norm": 34.834327697753906, + "learning_rate": 4.0168078931267426e-07, + "logits/chosen": -0.3620932698249817, + "logits/rejected": -0.33795762062072754, + "logps/chosen": -297.4047546386719, + "logps/ref_chosen": -62.442352294921875, + "logps/ref_rejected": -80.46806335449219, + "logps/rejected": -399.7490539550781, + "loss": 1.2226, + "margin_dpo/margin_mean": 84.31858825683594, + "margin_dpo/margin_std": 186.0101318359375, + "step": 249 + }, + { + "KL/chosen_KL_mean": -206.82498168945312, + "KL/mean": -285.0578918457031, + "KL/rejected_KL_mean": -363.29083251953125, + "KL/std": 159.93698120117188, + "epoch": 0.3671071953010279, + "fcm_dpo/beta": 0.003046911209821701, + "fcm_dpo/delta": -0.08060043305158615, + "fcm_dpo/margin": 156.46585083007812, + "fcm_dpo/q_t": 0.38924139738082886, + "grad_norm": 35.6130485534668, + "learning_rate": 4.006586590948141e-07, + "logits/chosen": -0.33317434787750244, + "logits/rejected": -0.2766192555427551, + "logps/chosen": -272.461669921875, + "logps/ref_chosen": -65.63668823242188, + "logps/ref_rejected": -73.87184143066406, + "logps/rejected": -437.16265869140625, + "loss": 1.0237, + "margin_dpo/margin_mean": 156.4658660888672, + "margin_dpo/margin_std": 162.3227996826172, + "step": 250 + }, + { + "KL/chosen_KL_mean": -218.11549377441406, + "KL/mean": -271.47332763671875, + "KL/rejected_KL_mean": -324.8311767578125, + "KL/std": 169.133056640625, + "epoch": 0.368575624082232, + "fcm_dpo/beta": 0.0030482178553938866, + "fcm_dpo/delta": 0.07727696746587753, + "fcm_dpo/margin": 106.71568298339844, + "fcm_dpo/q_t": 0.42633721232414246, + "grad_norm": 28.103612899780273, + "learning_rate": 3.9963255888117325e-07, + "logits/chosen": -0.32768577337265015, + "logits/rejected": -0.30002111196517944, + "logps/chosen": -275.2982177734375, + "logps/ref_chosen": -57.182716369628906, + "logps/ref_rejected": -77.66343688964844, + "logps/rejected": -402.4945983886719, + "loss": 1.1649, + "margin_dpo/margin_mean": 106.71568298339844, + "margin_dpo/margin_std": 182.81895446777344, + "step": 251 + }, + { + "KL/chosen_KL_mean": -215.59848022460938, + "KL/mean": -287.9356689453125, + "KL/rejected_KL_mean": -360.2728271484375, + "KL/std": 146.37741088867188, + "epoch": 0.3700440528634361, + "fcm_dpo/beta": 0.0030416897498071194, + "fcm_dpo/delta": -0.04213904216885567, + "fcm_dpo/margin": 144.67434692382812, + "fcm_dpo/q_t": 0.394910991191864, + "grad_norm": 24.940649032592773, + "learning_rate": 3.9860251571044666e-07, + "logits/chosen": -0.4142671227455139, + "logits/rejected": -0.37881606817245483, + "logps/chosen": -287.28411865234375, + "logps/ref_chosen": -71.68563842773438, + "logps/ref_rejected": -84.75799560546875, + "logps/rejected": -445.03082275390625, + "loss": 1.0441, + "margin_dpo/margin_mean": 144.67434692382812, + "margin_dpo/margin_std": 149.41883850097656, + "step": 252 + }, + { + "KL/chosen_KL_mean": -186.49574279785156, + "KL/mean": -257.6673583984375, + "KL/rejected_KL_mean": -328.8389892578125, + "KL/std": 157.70240783691406, + "epoch": 0.37151248164464024, + "fcm_dpo/beta": 0.003045113291591406, + "fcm_dpo/delta": -0.03548625111579895, + "fcm_dpo/margin": 142.34326171875, + "fcm_dpo/q_t": 0.39946746826171875, + "grad_norm": 19.748661041259766, + "learning_rate": 3.9756855672522986e-07, + "logits/chosen": -0.39257919788360596, + "logits/rejected": -0.3857148289680481, + "logps/chosen": -255.62966918945312, + "logps/ref_chosen": -69.1339340209961, + "logps/ref_rejected": -98.70252990722656, + "logps/rejected": -427.54150390625, + "loss": 1.0705, + "margin_dpo/margin_mean": 142.34324645996094, + "margin_dpo/margin_std": 176.71458435058594, + "step": 253 + }, + { + "KL/chosen_KL_mean": -179.48486328125, + "KL/mean": -239.58071899414062, + "KL/rejected_KL_mean": -299.67657470703125, + "KL/std": 164.30686950683594, + "epoch": 0.37298091042584436, + "fcm_dpo/beta": 0.0030348035506904125, + "fcm_dpo/delta": 0.03659197315573692, + "fcm_dpo/margin": 120.19171142578125, + "fcm_dpo/q_t": 0.4199643135070801, + "grad_norm": 25.14466094970703, + "learning_rate": 3.965307091713037e-07, + "logits/chosen": -0.3829053044319153, + "logits/rejected": -0.36821985244750977, + "logps/chosen": -233.63986206054688, + "logps/ref_chosen": -54.154998779296875, + "logps/ref_rejected": -90.30764770507812, + "logps/rejected": -389.9842224121094, + "loss": 1.1421, + "margin_dpo/margin_mean": 120.19171142578125, + "margin_dpo/margin_std": 206.28839111328125, + "step": 254 + }, + { + "KL/chosen_KL_mean": -183.00448608398438, + "KL/mean": -247.2868194580078, + "KL/rejected_KL_mean": -311.56915283203125, + "KL/std": 143.20118713378906, + "epoch": 0.3744493392070485, + "fcm_dpo/beta": 0.0030361046083271503, + "fcm_dpo/delta": 0.009780865162611008, + "fcm_dpo/margin": 128.564697265625, + "fcm_dpo/q_t": 0.4100106954574585, + "grad_norm": 20.29219627380371, + "learning_rate": 3.954890003969163e-07, + "logits/chosen": -0.34874504804611206, + "logits/rejected": -0.338106632232666, + "logps/chosen": -240.14614868164062, + "logps/ref_chosen": -57.14167022705078, + "logps/ref_rejected": -90.2085952758789, + "logps/rejected": -401.77777099609375, + "loss": 1.1067, + "margin_dpo/margin_mean": 128.564697265625, + "margin_dpo/margin_std": 180.92298889160156, + "step": 255 + }, + { + "KL/chosen_KL_mean": -158.49624633789062, + "KL/mean": -227.50888061523438, + "KL/rejected_KL_mean": -296.521484375, + "KL/std": 154.35052490234375, + "epoch": 0.37591776798825255, + "fcm_dpo/beta": 0.003040488576516509, + "fcm_dpo/delta": -0.02052391692996025, + "fcm_dpo/margin": 138.02523803710938, + "fcm_dpo/q_t": 0.40354132652282715, + "grad_norm": 26.907196044921875, + "learning_rate": 3.944434578520628e-07, + "logits/chosen": -0.31947365403175354, + "logits/rejected": -0.3274417519569397, + "logps/chosen": -213.6597442626953, + "logps/ref_chosen": -55.163490295410156, + "logps/ref_rejected": -92.56291961669922, + "logps/rejected": -389.08441162109375, + "loss": 1.0781, + "margin_dpo/margin_mean": 138.02523803710938, + "margin_dpo/margin_std": 178.26329040527344, + "step": 256 + }, + { + "KL/chosen_KL_mean": -155.09429931640625, + "KL/mean": -228.4635009765625, + "KL/rejected_KL_mean": -301.83270263671875, + "KL/std": 161.17501831054688, + "epoch": 0.37738619676945667, + "fcm_dpo/beta": 0.0029973145574331284, + "fcm_dpo/delta": -0.042888298630714417, + "fcm_dpo/margin": 146.73841857910156, + "fcm_dpo/q_t": 0.4001276195049286, + "grad_norm": 20.602216720581055, + "learning_rate": 3.933941090877615e-07, + "logits/chosen": -0.35195407271385193, + "logits/rejected": -0.338517963886261, + "logps/chosen": -204.51800537109375, + "logps/ref_chosen": -49.42369842529297, + "logps/ref_rejected": -79.53791809082031, + "logps/rejected": -381.37060546875, + "loss": 1.0685, + "margin_dpo/margin_mean": 146.73841857910156, + "margin_dpo/margin_std": 182.62957763671875, + "step": 257 + }, + { + "KL/chosen_KL_mean": -204.89328002929688, + "KL/mean": -276.2060546875, + "KL/rejected_KL_mean": -347.518798828125, + "KL/std": 168.318115234375, + "epoch": 0.3788546255506608, + "fcm_dpo/beta": 0.0030030158814042807, + "fcm_dpo/delta": -0.029582539573311806, + "fcm_dpo/margin": 142.62550354003906, + "fcm_dpo/q_t": 0.40110859274864197, + "grad_norm": 29.499923706054688, + "learning_rate": 3.923409817553284e-07, + "logits/chosen": -0.3328630030155182, + "logits/rejected": -0.33087849617004395, + "logps/chosen": -264.27740478515625, + "logps/ref_chosen": -59.384124755859375, + "logps/ref_rejected": -95.99010467529297, + "logps/rejected": -443.5089111328125, + "loss": 1.0867, + "margin_dpo/margin_mean": 142.62550354003906, + "margin_dpo/margin_std": 199.8113250732422, + "step": 258 + }, + { + "KL/chosen_KL_mean": -198.52450561523438, + "KL/mean": -258.52996826171875, + "KL/rejected_KL_mean": -318.53546142578125, + "KL/std": 160.81214904785156, + "epoch": 0.3803230543318649, + "fcm_dpo/beta": 0.0030021152924746275, + "fcm_dpo/delta": 0.04111909121274948, + "fcm_dpo/margin": 120.01093292236328, + "fcm_dpo/q_t": 0.41781848669052124, + "grad_norm": 23.914457321166992, + "learning_rate": 3.9128410360564793e-07, + "logits/chosen": -0.4086461663246155, + "logits/rejected": -0.4056541323661804, + "logps/chosen": -251.35284423828125, + "logps/ref_chosen": -52.828346252441406, + "logps/ref_rejected": -89.191650390625, + "logps/rejected": -407.72711181640625, + "loss": 1.1286, + "margin_dpo/margin_mean": 120.01094055175781, + "margin_dpo/margin_std": 181.0947723388672, + "step": 259 + }, + { + "KL/chosen_KL_mean": -203.28500366210938, + "KL/mean": -284.781982421875, + "KL/rejected_KL_mean": -366.2789306640625, + "KL/std": 167.090087890625, + "epoch": 0.38179148311306904, + "fcm_dpo/beta": 0.0029884944669902325, + "fcm_dpo/delta": -0.09151086211204529, + "fcm_dpo/margin": 162.99392700195312, + "fcm_dpo/q_t": 0.3894280791282654, + "grad_norm": 31.707292556762695, + "learning_rate": 3.9022350248844246e-07, + "logits/chosen": -0.3748651146888733, + "logits/rejected": -0.391143798828125, + "logps/chosen": -250.7026824951172, + "logps/ref_chosen": -47.41767501831055, + "logps/ref_rejected": -95.08978271484375, + "logps/rejected": -461.36871337890625, + "loss": 1.0208, + "margin_dpo/margin_mean": 162.99392700195312, + "margin_dpo/margin_std": 178.20040893554688, + "step": 260 + }, + { + "KL/chosen_KL_mean": -212.65603637695312, + "KL/mean": -288.040771484375, + "KL/rejected_KL_mean": -363.4255065917969, + "KL/std": 181.0100555419922, + "epoch": 0.3832599118942731, + "fcm_dpo/beta": 0.002936106640845537, + "fcm_dpo/delta": -0.04489829018712044, + "fcm_dpo/margin": 150.76947021484375, + "fcm_dpo/q_t": 0.39958545565605164, + "grad_norm": 21.78121566772461, + "learning_rate": 3.891592063515376e-07, + "logits/chosen": -0.3127421438694, + "logits/rejected": -0.31155508756637573, + "logps/chosen": -265.68743896484375, + "logps/ref_chosen": -53.03137969970703, + "logps/ref_rejected": -88.51494598388672, + "logps/rejected": -451.9404602050781, + "loss": 1.0713, + "margin_dpo/margin_mean": 150.76947021484375, + "margin_dpo/margin_std": 200.406005859375, + "step": 261 + }, + { + "KL/chosen_KL_mean": -252.27764892578125, + "KL/mean": -314.0080871582031, + "KL/rejected_KL_mean": -375.738525390625, + "KL/std": 165.99118041992188, + "epoch": 0.38472834067547723, + "fcm_dpo/beta": 0.0029631485231220722, + "fcm_dpo/delta": 0.0350569412112236, + "fcm_dpo/margin": 123.46089172363281, + "fcm_dpo/q_t": 0.4153136610984802, + "grad_norm": 28.57038688659668, + "learning_rate": 3.880912432401264e-07, + "logits/chosen": -0.3218010663986206, + "logits/rejected": -0.2904987037181854, + "logps/chosen": -311.89776611328125, + "logps/ref_chosen": -59.620140075683594, + "logps/ref_rejected": -86.41853332519531, + "logps/rejected": -462.15704345703125, + "loss": 1.1117, + "margin_dpo/margin_mean": 123.46089172363281, + "margin_dpo/margin_std": 167.72061157226562, + "step": 262 + }, + { + "KL/chosen_KL_mean": -227.56837463378906, + "KL/mean": -315.9892883300781, + "KL/rejected_KL_mean": -404.4101867675781, + "KL/std": 192.87933349609375, + "epoch": 0.38619676945668135, + "fcm_dpo/beta": 0.0028930227272212505, + "fcm_dpo/delta": -0.11854880303144455, + "fcm_dpo/margin": 176.84181213378906, + "fcm_dpo/q_t": 0.3831733465194702, + "grad_norm": 26.555845260620117, + "learning_rate": 3.870196412960302e-07, + "logits/chosen": -0.3501706123352051, + "logits/rejected": -0.3262799084186554, + "logps/chosen": -286.98931884765625, + "logps/ref_chosen": -59.42094421386719, + "logps/ref_rejected": -96.85720825195312, + "logps/rejected": -501.26739501953125, + "loss": 1.0194, + "margin_dpo/margin_mean": 176.84181213378906, + "margin_dpo/margin_std": 203.54107666015625, + "step": 263 + }, + { + "KL/chosen_KL_mean": -234.73135375976562, + "KL/mean": -310.48260498046875, + "KL/rejected_KL_mean": -386.2339172363281, + "KL/std": 176.64036560058594, + "epoch": 0.3876651982378855, + "fcm_dpo/beta": 0.0028530117124319077, + "fcm_dpo/delta": -0.034962985664606094, + "fcm_dpo/margin": 151.5025634765625, + "fcm_dpo/q_t": 0.40188103914260864, + "grad_norm": 29.80936622619629, + "learning_rate": 3.8594442875695665e-07, + "logits/chosen": -0.38784724473953247, + "logits/rejected": -0.377646803855896, + "logps/chosen": -297.45343017578125, + "logps/ref_chosen": -62.722084045410156, + "logps/ref_rejected": -93.85620880126953, + "logps/rejected": -480.09014892578125, + "loss": 1.0785, + "margin_dpo/margin_mean": 151.50254821777344, + "margin_dpo/margin_std": 198.95477294921875, + "step": 264 + }, + { + "KL/chosen_KL_mean": -255.1561279296875, + "KL/mean": -327.5084533691406, + "KL/rejected_KL_mean": -399.8607482910156, + "KL/std": 199.5748291015625, + "epoch": 0.3891336270190896, + "fcm_dpo/beta": 0.0028611307498067617, + "fcm_dpo/delta": -0.014629107899963856, + "fcm_dpo/margin": 144.70462036132812, + "fcm_dpo/q_t": 0.4080343246459961, + "grad_norm": 29.3031005859375, + "learning_rate": 3.848656339557562e-07, + "logits/chosen": -0.3426782488822937, + "logits/rejected": -0.32741084694862366, + "logps/chosen": -317.12762451171875, + "logps/ref_chosen": -61.971466064453125, + "logps/ref_rejected": -88.02059936523438, + "logps/rejected": -487.88134765625, + "loss": 1.1186, + "margin_dpo/margin_mean": 144.70462036132812, + "margin_dpo/margin_std": 237.73236083984375, + "step": 265 + }, + { + "KL/chosen_KL_mean": -252.57809448242188, + "KL/mean": -311.6585693359375, + "KL/rejected_KL_mean": -370.73907470703125, + "KL/std": 166.17433166503906, + "epoch": 0.39060205580029367, + "fcm_dpo/beta": 0.002886436879634857, + "fcm_dpo/delta": 0.061003364622592926, + "fcm_dpo/margin": 118.16098022460938, + "fcm_dpo/q_t": 0.42259740829467773, + "grad_norm": 43.39963912963867, + "learning_rate": 3.8378328531967507e-07, + "logits/chosen": -0.37320268154144287, + "logits/rejected": -0.3327832818031311, + "logps/chosen": -319.6777648925781, + "logps/ref_chosen": -67.09967041015625, + "logps/ref_rejected": -67.97122192382812, + "logps/rejected": -438.7103271484375, + "loss": 1.146, + "margin_dpo/margin_mean": 118.16098022460938, + "margin_dpo/margin_std": 192.56309509277344, + "step": 266 + }, + { + "KL/chosen_KL_mean": -226.99365234375, + "KL/mean": -299.7353515625, + "KL/rejected_KL_mean": -372.47698974609375, + "KL/std": 180.788818359375, + "epoch": 0.3920704845814978, + "fcm_dpo/beta": 0.002876041457056999, + "fcm_dpo/delta": -0.01955413445830345, + "fcm_dpo/margin": 145.48333740234375, + "fcm_dpo/q_t": 0.40417009592056274, + "grad_norm": 31.87999153137207, + "learning_rate": 3.8269741136960646e-07, + "logits/chosen": -0.39031726121902466, + "logits/rejected": -0.3596029281616211, + "logps/chosen": -295.96441650390625, + "logps/ref_chosen": -68.97075653076172, + "logps/ref_rejected": -90.16844940185547, + "logps/rejected": -462.64544677734375, + "loss": 1.0906, + "margin_dpo/margin_mean": 145.48333740234375, + "margin_dpo/margin_std": 203.5807342529297, + "step": 267 + }, + { + "KL/chosen_KL_mean": -232.36375427246094, + "KL/mean": -301.0833740234375, + "KL/rejected_KL_mean": -369.802978515625, + "KL/std": 167.608154296875, + "epoch": 0.3935389133627019, + "fcm_dpo/beta": 0.0028773611411452293, + "fcm_dpo/delta": 0.004575518891215324, + "fcm_dpo/margin": 137.43919372558594, + "fcm_dpo/q_t": 0.4103718400001526, + "grad_norm": 29.13970184326172, + "learning_rate": 3.8160804071933894e-07, + "logits/chosen": -0.3367459774017334, + "logits/rejected": -0.34343862533569336, + "logps/chosen": -288.2640686035156, + "logps/ref_chosen": -55.90031051635742, + "logps/ref_rejected": -101.64763641357422, + "logps/rejected": -471.4505920410156, + "loss": 1.1069, + "margin_dpo/margin_mean": 137.439208984375, + "margin_dpo/margin_std": 202.26385498046875, + "step": 268 + }, + { + "KL/chosen_KL_mean": -247.22647094726562, + "KL/mean": -333.1874084472656, + "KL/rejected_KL_mean": -419.1483154296875, + "KL/std": 174.40249633789062, + "epoch": 0.39500734214390604, + "fcm_dpo/beta": 0.002847407478839159, + "fcm_dpo/delta": -0.09410010278224945, + "fcm_dpo/margin": 171.92185974121094, + "fcm_dpo/q_t": 0.38866060972213745, + "grad_norm": 24.988513946533203, + "learning_rate": 3.8051520207480204e-07, + "logits/chosen": -0.3828558027744293, + "logits/rejected": -0.363941490650177, + "logps/chosen": -317.26605224609375, + "logps/ref_chosen": -70.03955841064453, + "logps/ref_rejected": -107.34937286376953, + "logps/rejected": -526.4976806640625, + "loss": 1.0466, + "margin_dpo/margin_mean": 171.92185974121094, + "margin_dpo/margin_std": 220.83987426757812, + "step": 269 + }, + { + "KL/chosen_KL_mean": -214.0692138671875, + "KL/mean": -278.02642822265625, + "KL/rejected_KL_mean": -341.98370361328125, + "KL/std": 153.86318969726562, + "epoch": 0.3964757709251101, + "fcm_dpo/beta": 0.002855871804058552, + "fcm_dpo/delta": 0.035725079476833344, + "fcm_dpo/margin": 127.91445922851562, + "fcm_dpo/q_t": 0.41616952419281006, + "grad_norm": 27.773122787475586, + "learning_rate": 3.794189242333106e-07, + "logits/chosen": -0.44005024433135986, + "logits/rejected": -0.43610844016075134, + "logps/chosen": -283.6026916503906, + "logps/ref_chosen": -69.53347778320312, + "logps/ref_rejected": -109.92864990234375, + "logps/rejected": -451.9123229980469, + "loss": 1.1275, + "margin_dpo/margin_mean": 127.91445922851562, + "margin_dpo/margin_std": 195.2829132080078, + "step": 270 + }, + { + "KL/chosen_KL_mean": -198.44947814941406, + "KL/mean": -275.9836120605469, + "KL/rejected_KL_mean": -353.5177001953125, + "KL/std": 153.60189819335938, + "epoch": 0.39794419970631423, + "fcm_dpo/beta": 0.002827045973390341, + "fcm_dpo/delta": -0.04044891148805618, + "fcm_dpo/margin": 155.06826782226562, + "fcm_dpo/q_t": 0.3988415598869324, + "grad_norm": 24.570371627807617, + "learning_rate": 3.7831923608280514e-07, + "logits/chosen": -0.36762213706970215, + "logits/rejected": -0.35115593671798706, + "logps/chosen": -255.21405029296875, + "logps/ref_chosen": -56.76456832885742, + "logps/ref_rejected": -92.51383972167969, + "logps/rejected": -446.03155517578125, + "loss": 1.0517, + "margin_dpo/margin_mean": 155.06826782226562, + "margin_dpo/margin_std": 173.97885131835938, + "step": 271 + }, + { + "KL/chosen_KL_mean": -188.15049743652344, + "KL/mean": -285.01849365234375, + "KL/rejected_KL_mean": -381.8865051269531, + "KL/std": 175.69546508789062, + "epoch": 0.39941262848751835, + "fcm_dpo/beta": 0.002780818846076727, + "fcm_dpo/delta": -0.1463950276374817, + "fcm_dpo/margin": 193.7360076904297, + "fcm_dpo/q_t": 0.3746742010116577, + "grad_norm": 35.40150451660156, + "learning_rate": 3.772161666010912e-07, + "logits/chosen": -0.3044808804988861, + "logits/rejected": -0.3166738450527191, + "logps/chosen": -237.64764404296875, + "logps/ref_chosen": -49.497154235839844, + "logps/ref_rejected": -105.54279327392578, + "logps/rejected": -487.4293212890625, + "loss": 0.9804, + "margin_dpo/margin_mean": 193.7360076904297, + "margin_dpo/margin_std": 182.89303588867188, + "step": 272 + }, + { + "KL/chosen_KL_mean": -219.2149658203125, + "KL/mean": -313.57196044921875, + "KL/rejected_KL_mean": -407.92889404296875, + "KL/std": 178.0384063720703, + "epoch": 0.4008810572687225, + "fcm_dpo/beta": 0.002692791633307934, + "fcm_dpo/delta": -0.11455152183771133, + "fcm_dpo/margin": 188.71389770507812, + "fcm_dpo/q_t": 0.3825136423110962, + "grad_norm": 26.675121307373047, + "learning_rate": 3.761097448550755e-07, + "logits/chosen": -0.32241398096084595, + "logits/rejected": -0.30522340536117554, + "logps/chosen": -282.19036865234375, + "logps/ref_chosen": -62.97539520263672, + "logps/ref_rejected": -92.49858093261719, + "logps/rejected": -500.427490234375, + "loss": 1.0069, + "margin_dpo/margin_mean": 188.71389770507812, + "margin_dpo/margin_std": 197.89047241210938, + "step": 273 + }, + { + "KL/chosen_KL_mean": -264.813232421875, + "KL/mean": -337.2680358886719, + "KL/rejected_KL_mean": -409.72283935546875, + "KL/std": 165.03564453125, + "epoch": 0.4023494860499266, + "fcm_dpo/beta": 0.0026972047053277493, + "fcm_dpo/delta": 0.00932791456580162, + "fcm_dpo/margin": 144.9096221923828, + "fcm_dpo/q_t": 0.4096784293651581, + "grad_norm": 25.53626823425293, + "learning_rate": 3.75e-07, + "logits/chosen": -0.30063068866729736, + "logits/rejected": -0.2833949625492096, + "logps/chosen": -320.48095703125, + "logps/ref_chosen": -55.66770935058594, + "logps/ref_rejected": -77.33308410644531, + "logps/rejected": -487.055908203125, + "loss": 1.0939, + "margin_dpo/margin_mean": 144.9096221923828, + "margin_dpo/margin_std": 187.6047821044922, + "step": 274 + }, + { + "KL/chosen_KL_mean": -206.4573974609375, + "KL/mean": -283.26287841796875, + "KL/rejected_KL_mean": -360.0683898925781, + "KL/std": 167.62567138671875, + "epoch": 0.40381791483113066, + "fcm_dpo/beta": 0.0026927865110337734, + "fcm_dpo/delta": -0.0143581572920084, + "fcm_dpo/margin": 153.6110076904297, + "fcm_dpo/q_t": 0.4042346179485321, + "grad_norm": 24.385211944580078, + "learning_rate": 3.738869612786737e-07, + "logits/chosen": -0.32019296288490295, + "logits/rejected": -0.32504212856292725, + "logps/chosen": -255.0521240234375, + "logps/ref_chosen": -48.594703674316406, + "logps/ref_rejected": -93.30369567871094, + "logps/rejected": -453.3720703125, + "loss": 1.0768, + "margin_dpo/margin_mean": 153.61099243164062, + "margin_dpo/margin_std": 189.52232360839844, + "step": 275 + }, + { + "KL/chosen_KL_mean": -225.26080322265625, + "KL/mean": -302.2386779785156, + "KL/rejected_KL_mean": -379.216552734375, + "KL/std": 172.80694580078125, + "epoch": 0.4052863436123348, + "fcm_dpo/beta": 0.002671858761459589, + "fcm_dpo/delta": -0.011981412768363953, + "fcm_dpo/margin": 153.95571899414062, + "fcm_dpo/q_t": 0.40615737438201904, + "grad_norm": 26.473548889160156, + "learning_rate": 3.7277065802070204e-07, + "logits/chosen": -0.3291136622428894, + "logits/rejected": -0.30698275566101074, + "logps/chosen": -281.83819580078125, + "logps/ref_chosen": -56.57740783691406, + "logps/ref_rejected": -70.36566925048828, + "logps/rejected": -449.58221435546875, + "loss": 1.0868, + "margin_dpo/margin_mean": 153.9557342529297, + "margin_dpo/margin_std": 205.688232421875, + "step": 276 + }, + { + "KL/chosen_KL_mean": -248.16168212890625, + "KL/mean": -327.30120849609375, + "KL/rejected_KL_mean": -406.44073486328125, + "KL/std": 174.60836791992188, + "epoch": 0.4067547723935389, + "fcm_dpo/beta": 0.002672237576916814, + "fcm_dpo/delta": -0.02400265261530876, + "fcm_dpo/margin": 158.27906799316406, + "fcm_dpo/q_t": 0.4026916027069092, + "grad_norm": 30.444353103637695, + "learning_rate": 3.71651119641714e-07, + "logits/chosen": -0.32184985280036926, + "logits/rejected": -0.30650681257247925, + "logps/chosen": -304.4332275390625, + "logps/ref_chosen": -56.27156066894531, + "logps/ref_rejected": -92.88127136230469, + "logps/rejected": -499.322021484375, + "loss": 1.0794, + "margin_dpo/margin_mean": 158.27908325195312, + "margin_dpo/margin_std": 206.87417602539062, + "step": 277 + }, + { + "KL/chosen_KL_mean": -222.39517211914062, + "KL/mean": -314.3505859375, + "KL/rejected_KL_mean": -406.30596923828125, + "KL/std": 186.62579345703125, + "epoch": 0.40822320117474303, + "fcm_dpo/beta": 0.0026234271936118603, + "fcm_dpo/delta": -0.08697425574064255, + "fcm_dpo/margin": 183.9108123779297, + "fcm_dpo/q_t": 0.389728844165802, + "grad_norm": 26.94320297241211, + "learning_rate": 3.705283756425872e-07, + "logits/chosen": -0.34066635370254517, + "logits/rejected": -0.3471217155456543, + "logps/chosen": -275.33709716796875, + "logps/ref_chosen": -52.94194030761719, + "logps/ref_rejected": -91.25357818603516, + "logps/rejected": -497.5595397949219, + "loss": 1.0289, + "margin_dpo/margin_mean": 183.9108123779297, + "margin_dpo/margin_std": 208.0062255859375, + "step": 278 + }, + { + "KL/chosen_KL_mean": -254.62709045410156, + "KL/mean": -344.94903564453125, + "KL/rejected_KL_mean": -435.2709655761719, + "KL/std": 193.30276489257812, + "epoch": 0.40969162995594716, + "fcm_dpo/beta": 0.0025754275266081095, + "fcm_dpo/delta": -0.06957367807626724, + "fcm_dpo/margin": 180.64385986328125, + "fcm_dpo/q_t": 0.39524978399276733, + "grad_norm": 29.17497444152832, + "learning_rate": 3.6940245560867e-07, + "logits/chosen": -0.26812469959259033, + "logits/rejected": -0.26797914505004883, + "logps/chosen": -303.2684326171875, + "logps/ref_chosen": -48.641319274902344, + "logps/ref_rejected": -87.8514404296875, + "logps/rejected": -523.1224365234375, + "loss": 1.061, + "margin_dpo/margin_mean": 180.6438751220703, + "margin_dpo/margin_std": 237.0701141357422, + "step": 279 + }, + { + "KL/chosen_KL_mean": -249.67874145507812, + "KL/mean": -342.8053894042969, + "KL/rejected_KL_mean": -435.93206787109375, + "KL/std": 178.5480499267578, + "epoch": 0.4111600587371512, + "fcm_dpo/beta": 0.002550060860812664, + "fcm_dpo/delta": -0.07875210046768188, + "fcm_dpo/margin": 186.25331115722656, + "fcm_dpo/q_t": 0.3887876272201538, + "grad_norm": 33.25874710083008, + "learning_rate": 3.6827338920900253e-07, + "logits/chosen": -0.29288673400878906, + "logits/rejected": -0.29508256912231445, + "logps/chosen": -308.4758605957031, + "logps/ref_chosen": -58.797122955322266, + "logps/ref_rejected": -98.61885070800781, + "logps/rejected": -534.5509033203125, + "loss": 1.0245, + "margin_dpo/margin_mean": 186.25331115722656, + "margin_dpo/margin_std": 194.61471557617188, + "step": 280 + }, + { + "KL/chosen_KL_mean": -224.7278289794922, + "KL/mean": -309.36773681640625, + "KL/rejected_KL_mean": -394.0076904296875, + "KL/std": 175.15994262695312, + "epoch": 0.41262848751835535, + "fcm_dpo/beta": 0.0025381785817444324, + "fcm_dpo/delta": -0.031121131032705307, + "fcm_dpo/margin": 169.27987670898438, + "fcm_dpo/q_t": 0.3983193635940552, + "grad_norm": 21.142444610595703, + "learning_rate": 3.6714120619553435e-07, + "logits/chosen": -0.33014634251594543, + "logits/rejected": -0.30275779962539673, + "logps/chosen": -280.2163391113281, + "logps/ref_chosen": -55.488521575927734, + "logps/ref_rejected": -80.88258361816406, + "logps/rejected": -474.8902587890625, + "loss": 1.0583, + "margin_dpo/margin_mean": 169.27987670898438, + "margin_dpo/margin_std": 189.16592407226562, + "step": 281 + }, + { + "KL/chosen_KL_mean": -250.84788513183594, + "KL/mean": -315.7371826171875, + "KL/rejected_KL_mean": -380.62646484375, + "KL/std": 179.92788696289062, + "epoch": 0.41409691629955947, + "fcm_dpo/beta": 0.002561165951192379, + "fcm_dpo/delta": 0.06942006200551987, + "fcm_dpo/margin": 129.77859497070312, + "fcm_dpo/q_t": 0.42569971084594727, + "grad_norm": 26.15049934387207, + "learning_rate": 3.660059364023408e-07, + "logits/chosen": -0.41560858488082886, + "logits/rejected": -0.3941164016723633, + "logps/chosen": -323.91802978515625, + "logps/ref_chosen": -73.07014465332031, + "logps/ref_rejected": -95.35098266601562, + "logps/rejected": -475.9774475097656, + "loss": 1.1459, + "margin_dpo/margin_mean": 129.77859497070312, + "margin_dpo/margin_std": 209.7483673095703, + "step": 282 + }, + { + "KL/chosen_KL_mean": -260.31744384765625, + "KL/mean": -355.35028076171875, + "KL/rejected_KL_mean": -450.3831787109375, + "KL/std": 210.8460693359375, + "epoch": 0.4155653450807636, + "fcm_dpo/beta": 0.002537979045882821, + "fcm_dpo/delta": -0.08652851730585098, + "fcm_dpo/margin": 190.0657196044922, + "fcm_dpo/q_t": 0.3873726427555084, + "grad_norm": 35.29081726074219, + "learning_rate": 3.6486760974483685e-07, + "logits/chosen": -0.3637465834617615, + "logits/rejected": -0.3678331673145294, + "logps/chosen": -322.21588134765625, + "logps/ref_chosen": -61.89844512939453, + "logps/ref_rejected": -96.98655700683594, + "logps/rejected": -547.3697509765625, + "loss": 1.0228, + "margin_dpo/margin_mean": 190.0657196044922, + "margin_dpo/margin_std": 199.068115234375, + "step": 283 + }, + { + "KL/chosen_KL_mean": -247.5406494140625, + "KL/mean": -339.8221435546875, + "KL/rejected_KL_mean": -432.1036071777344, + "KL/std": 199.65481567382812, + "epoch": 0.4170337738619677, + "fcm_dpo/beta": 0.002489683451130986, + "fcm_dpo/delta": -0.06252136826515198, + "fcm_dpo/margin": 184.56295776367188, + "fcm_dpo/q_t": 0.39381855726242065, + "grad_norm": 27.652767181396484, + "learning_rate": 3.6372625621898863e-07, + "logits/chosen": -0.3889576494693756, + "logits/rejected": -0.37424755096435547, + "logps/chosen": -305.9761962890625, + "logps/ref_chosen": -58.4355354309082, + "logps/ref_rejected": -93.46926879882812, + "logps/rejected": -525.5728759765625, + "loss": 1.0346, + "margin_dpo/margin_mean": 184.56295776367188, + "margin_dpo/margin_std": 199.2593994140625, + "step": 284 + }, + { + "KL/chosen_KL_mean": -287.4296875, + "KL/mean": -370.3851318359375, + "KL/rejected_KL_mean": -453.340576171875, + "KL/std": 177.46621704101562, + "epoch": 0.4185022026431718, + "fcm_dpo/beta": 0.0024931158404797316, + "fcm_dpo/delta": -0.014689784497022629, + "fcm_dpo/margin": 165.91085815429688, + "fcm_dpo/q_t": 0.4023195803165436, + "grad_norm": 25.144207000732422, + "learning_rate": 3.625819059005228e-07, + "logits/chosen": -0.35408467054367065, + "logits/rejected": -0.3383770287036896, + "logps/chosen": -353.661865234375, + "logps/ref_chosen": -66.23219299316406, + "logps/ref_rejected": -99.1268310546875, + "logps/rejected": -552.4674072265625, + "loss": 1.0729, + "margin_dpo/margin_mean": 165.91085815429688, + "margin_dpo/margin_std": 193.71629333496094, + "step": 285 + }, + { + "KL/chosen_KL_mean": -297.5478210449219, + "KL/mean": -387.598388671875, + "KL/rejected_KL_mean": -477.64892578125, + "KL/std": 204.80935668945312, + "epoch": 0.4199706314243759, + "fcm_dpo/beta": 0.00246500875800848, + "fcm_dpo/delta": -0.04596859961748123, + "fcm_dpo/margin": 180.1011199951172, + "fcm_dpo/q_t": 0.39727091789245605, + "grad_norm": 25.498445510864258, + "learning_rate": 3.614345889441346e-07, + "logits/chosen": -0.37891730666160583, + "logits/rejected": -0.36352336406707764, + "logps/chosen": -370.49884033203125, + "logps/ref_chosen": -72.95100402832031, + "logps/ref_rejected": -88.58845520019531, + "logps/rejected": -566.2373657226562, + "loss": 1.0588, + "margin_dpo/margin_mean": 180.10110473632812, + "margin_dpo/margin_std": 218.45489501953125, + "step": 286 + }, + { + "KL/chosen_KL_mean": -279.0062255859375, + "KL/mean": -351.84539794921875, + "KL/rejected_KL_mean": -424.6845703125, + "KL/std": 178.66867065429688, + "epoch": 0.42143906020558003, + "fcm_dpo/beta": 0.0024740160442888737, + "fcm_dpo/delta": 0.04098087176680565, + "fcm_dpo/margin": 145.67831420898438, + "fcm_dpo/q_t": 0.41632279753685, + "grad_norm": 28.178863525390625, + "learning_rate": 3.6028433558269275e-07, + "logits/chosen": -0.37850940227508545, + "logits/rejected": -0.35931217670440674, + "logps/chosen": -340.5473937988281, + "logps/ref_chosen": -61.54115295410156, + "logps/ref_rejected": -77.69607543945312, + "logps/rejected": -502.380615234375, + "loss": 1.1114, + "margin_dpo/margin_mean": 145.67831420898438, + "margin_dpo/margin_std": 192.67111206054688, + "step": 287 + }, + { + "KL/chosen_KL_mean": -276.2578430175781, + "KL/mean": -370.36175537109375, + "KL/rejected_KL_mean": -464.46563720703125, + "KL/std": 186.77584838867188, + "epoch": 0.42290748898678415, + "fcm_dpo/beta": 0.0024337535724043846, + "fcm_dpo/delta": -0.0626014918088913, + "fcm_dpo/margin": 188.20782470703125, + "fcm_dpo/q_t": 0.3927251994609833, + "grad_norm": 25.88406753540039, + "learning_rate": 3.5913117612644327e-07, + "logits/chosen": -0.34253329038619995, + "logits/rejected": -0.3305118680000305, + "logps/chosen": -332.9190673828125, + "logps/ref_chosen": -56.661224365234375, + "logps/ref_rejected": -87.33570098876953, + "logps/rejected": -551.8013916015625, + "loss": 1.0367, + "margin_dpo/margin_mean": 188.20785522460938, + "margin_dpo/margin_std": 193.08023071289062, + "step": 288 + }, + { + "KL/chosen_KL_mean": -269.5328369140625, + "KL/mean": -373.37274169921875, + "KL/rejected_KL_mean": -477.2126770019531, + "KL/std": 201.24783325195312, + "epoch": 0.4243759177679883, + "fcm_dpo/beta": 0.0024142626207321882, + "fcm_dpo/delta": -0.10661280155181885, + "fcm_dpo/margin": 207.67982482910156, + "fcm_dpo/q_t": 0.38501453399658203, + "grad_norm": 38.1318473815918, + "learning_rate": 3.5797514096221024e-07, + "logits/chosen": -0.2764891982078552, + "logits/rejected": -0.27845776081085205, + "logps/chosen": -314.76324462890625, + "logps/ref_chosen": -45.23039245605469, + "logps/ref_rejected": -87.64266967773438, + "logps/rejected": -564.8553466796875, + "loss": 1.0157, + "margin_dpo/margin_mean": 207.6798095703125, + "margin_dpo/margin_std": 225.15972900390625, + "step": 289 + }, + { + "KL/chosen_KL_mean": -279.3312072753906, + "KL/mean": -385.49371337890625, + "KL/rejected_KL_mean": -491.65625, + "KL/std": 216.3263397216797, + "epoch": 0.42584434654919234, + "fcm_dpo/beta": 0.0023515745997428894, + "fcm_dpo/delta": -0.10482804477214813, + "fcm_dpo/margin": 212.32504272460938, + "fcm_dpo/q_t": 0.38738417625427246, + "grad_norm": 25.646024703979492, + "learning_rate": 3.568162605525952e-07, + "logits/chosen": -0.2992396950721741, + "logits/rejected": -0.318649560213089, + "logps/chosen": -334.8027038574219, + "logps/ref_chosen": -55.47149658203125, + "logps/ref_rejected": -116.70857238769531, + "logps/rejected": -608.3648071289062, + "loss": 1.0308, + "margin_dpo/margin_mean": 212.3250274658203, + "margin_dpo/margin_std": 258.3596496582031, + "step": 290 + }, + { + "KL/chosen_KL_mean": -233.43585205078125, + "KL/mean": -327.27154541015625, + "KL/rejected_KL_mean": -421.1072082519531, + "KL/std": 182.66287231445312, + "epoch": 0.42731277533039647, + "fcm_dpo/beta": 0.002339608035981655, + "fcm_dpo/delta": -0.04106954485177994, + "fcm_dpo/margin": 187.67138671875, + "fcm_dpo/q_t": 0.3976435661315918, + "grad_norm": 23.715065002441406, + "learning_rate": 3.5565456543517485e-07, + "logits/chosen": -0.3613763451576233, + "logits/rejected": -0.3522465229034424, + "logps/chosen": -296.69622802734375, + "logps/ref_chosen": -63.26036834716797, + "logps/ref_rejected": -89.29708862304688, + "logps/rejected": -510.404296875, + "loss": 1.0598, + "margin_dpo/margin_mean": 187.67138671875, + "margin_dpo/margin_std": 221.575927734375, + "step": 291 + }, + { + "KL/chosen_KL_mean": -245.43325805664062, + "KL/mean": -345.85260009765625, + "KL/rejected_KL_mean": -446.27191162109375, + "KL/std": 213.15567016601562, + "epoch": 0.4287812041116006, + "fcm_dpo/beta": 0.0023100622929632664, + "fcm_dpo/delta": -0.06708824634552002, + "fcm_dpo/margin": 200.83868408203125, + "fcm_dpo/q_t": 0.39310041069984436, + "grad_norm": 20.776294708251953, + "learning_rate": 3.5449008622169583e-07, + "logits/chosen": -0.3369968831539154, + "logits/rejected": -0.32491156458854675, + "logps/chosen": -299.351806640625, + "logps/ref_chosen": -53.91852951049805, + "logps/ref_rejected": -89.96138000488281, + "logps/rejected": -536.2332763671875, + "loss": 1.0516, + "margin_dpo/margin_mean": 200.83868408203125, + "margin_dpo/margin_std": 247.34228515625, + "step": 292 + }, + { + "KL/chosen_KL_mean": -245.34991455078125, + "KL/mean": -322.8323669433594, + "KL/rejected_KL_mean": -400.3148193359375, + "KL/std": 209.0297088623047, + "epoch": 0.4302496328928047, + "fcm_dpo/beta": 0.0023124441504478455, + "fcm_dpo/delta": 0.043067529797554016, + "fcm_dpo/margin": 154.9648895263672, + "fcm_dpo/q_t": 0.4190768599510193, + "grad_norm": 49.972896575927734, + "learning_rate": 3.5332285359726846e-07, + "logits/chosen": -0.3593342900276184, + "logits/rejected": -0.35283225774765015, + "logps/chosen": -305.7259521484375, + "logps/ref_chosen": -60.376033782958984, + "logps/ref_rejected": -77.85244750976562, + "logps/rejected": -478.167236328125, + "loss": 1.1325, + "margin_dpo/margin_mean": 154.9648895263672, + "margin_dpo/margin_std": 242.87945556640625, + "step": 293 + }, + { + "KL/chosen_KL_mean": -227.1817626953125, + "KL/mean": -313.35595703125, + "KL/rejected_KL_mean": -399.5301513671875, + "KL/std": 188.7653045654297, + "epoch": 0.43171806167400884, + "fcm_dpo/beta": 0.0023088366724550724, + "fcm_dpo/delta": 0.002053305506706238, + "fcm_dpo/margin": 172.348388671875, + "fcm_dpo/q_t": 0.4094581604003906, + "grad_norm": 26.523639678955078, + "learning_rate": 3.5215289831955786e-07, + "logits/chosen": -0.34593725204467773, + "logits/rejected": -0.3513278663158417, + "logps/chosen": -275.269287109375, + "logps/ref_chosen": -48.0875358581543, + "logps/ref_rejected": -81.89698791503906, + "logps/rejected": -481.4271240234375, + "loss": 1.0954, + "margin_dpo/margin_mean": 172.348388671875, + "margin_dpo/margin_std": 234.5027618408203, + "step": 294 + }, + { + "KL/chosen_KL_mean": -278.93609619140625, + "KL/mean": -374.4381103515625, + "KL/rejected_KL_mean": -469.940185546875, + "KL/std": 214.35740661621094, + "epoch": 0.4331864904552129, + "fcm_dpo/beta": 0.0023033185862004757, + "fcm_dpo/delta": -0.041762471199035645, + "fcm_dpo/margin": 191.0041046142578, + "fcm_dpo/q_t": 0.39998045563697815, + "grad_norm": 33.26468276977539, + "learning_rate": 3.509802512179737e-07, + "logits/chosen": -0.3557334244251251, + "logits/rejected": -0.3590019941329956, + "logps/chosen": -328.86077880859375, + "logps/ref_chosen": -49.92467498779297, + "logps/ref_rejected": -87.45632934570312, + "logps/rejected": -557.396484375, + "loss": 1.0729, + "margin_dpo/margin_mean": 191.0041046142578, + "margin_dpo/margin_std": 251.3482208251953, + "step": 295 + }, + { + "KL/chosen_KL_mean": -355.9974060058594, + "KL/mean": -421.620849609375, + "KL/rejected_KL_mean": -487.2442932128906, + "KL/std": 209.83924865722656, + "epoch": 0.434654919236417, + "fcm_dpo/beta": 0.0022890730760991573, + "fcm_dpo/delta": -0.013507579453289509, + "fcm_dpo/margin": 131.2469024658203, + "fcm_dpo/q_t": 0.4310154318809509, + "grad_norm": 39.7788200378418, + "learning_rate": 3.498049431928577e-07, + "logits/chosen": -0.380574107170105, + "logits/rejected": -0.36485421657562256, + "logps/chosen": -421.4886474609375, + "logps/ref_chosen": -65.49124145507812, + "logps/ref_rejected": -93.08908081054688, + "logps/rejected": -580.3333740234375, + "loss": 1.2009, + "margin_dpo/margin_mean": 131.2469024658203, + "margin_dpo/margin_std": 270.313232421875, + "step": 296 + }, + { + "KL/chosen_KL_mean": -318.48907470703125, + "KL/mean": -398.3763122558594, + "KL/rejected_KL_mean": -478.2635192871094, + "KL/std": 206.62014770507812, + "epoch": 0.43612334801762115, + "fcm_dpo/beta": 0.0022979602217674255, + "fcm_dpo/delta": 0.03410791605710983, + "fcm_dpo/margin": 159.7744598388672, + "fcm_dpo/q_t": 0.41503405570983887, + "grad_norm": 35.391021728515625, + "learning_rate": 3.486270052146694e-07, + "logits/chosen": -0.37360668182373047, + "logits/rejected": -0.3786901831626892, + "logps/chosen": -374.9660339355469, + "logps/ref_chosen": -56.476951599121094, + "logps/ref_rejected": -95.1385498046875, + "logps/rejected": -573.402099609375, + "loss": 1.1089, + "margin_dpo/margin_mean": 159.77444458007812, + "margin_dpo/margin_std": 212.7035369873047, + "step": 297 + }, + { + "KL/chosen_KL_mean": -348.817626953125, + "KL/mean": -454.68817138671875, + "KL/rejected_KL_mean": -560.5587158203125, + "KL/std": 270.39697265625, + "epoch": 0.43759177679882527, + "fcm_dpo/beta": 0.0022850334644317627, + "fcm_dpo/delta": -0.08811478316783905, + "fcm_dpo/margin": 211.7411346435547, + "fcm_dpo/q_t": 0.3950398564338684, + "grad_norm": 28.887287139892578, + "learning_rate": 3.474464683231698e-07, + "logits/chosen": -0.3834819495677948, + "logits/rejected": -0.3983224630355835, + "logps/chosen": -416.14276123046875, + "logps/ref_chosen": -67.32516479492188, + "logps/ref_rejected": -116.66217041015625, + "logps/rejected": -677.2208862304688, + "loss": 1.0737, + "margin_dpo/margin_mean": 211.7411346435547, + "margin_dpo/margin_std": 316.9564514160156, + "step": 298 + }, + { + "KL/chosen_KL_mean": -286.4410095214844, + "KL/mean": -374.9492492675781, + "KL/rejected_KL_mean": -463.45751953125, + "KL/std": 203.78659057617188, + "epoch": 0.4390602055800294, + "fcm_dpo/beta": 0.0022771679796278477, + "fcm_dpo/delta": -0.0039763785898685455, + "fcm_dpo/margin": 177.01644897460938, + "fcm_dpo/q_t": 0.4095514416694641, + "grad_norm": 41.97990417480469, + "learning_rate": 3.462633636266041e-07, + "logits/chosen": -0.36679205298423767, + "logits/rejected": -0.37225013971328735, + "logps/chosen": -335.4031066894531, + "logps/ref_chosen": -48.96209716796875, + "logps/ref_rejected": -84.32823944091797, + "logps/rejected": -547.7857666015625, + "loss": 1.1071, + "margin_dpo/margin_mean": 177.01646423339844, + "margin_dpo/margin_std": 260.2517395019531, + "step": 299 + }, + { + "KL/chosen_KL_mean": -354.22882080078125, + "KL/mean": -464.64483642578125, + "KL/rejected_KL_mean": -575.0608520507812, + "KL/std": 244.3948974609375, + "epoch": 0.44052863436123346, + "fcm_dpo/beta": 0.002236669883131981, + "fcm_dpo/delta": -0.09869952499866486, + "fcm_dpo/margin": 220.83203125, + "fcm_dpo/q_t": 0.38846272230148315, + "grad_norm": 29.504758834838867, + "learning_rate": 3.4507772230088147e-07, + "logits/chosen": -0.35466793179512024, + "logits/rejected": -0.35978570580482483, + "logps/chosen": -413.30255126953125, + "logps/ref_chosen": -59.07371139526367, + "logps/ref_rejected": -95.9664535522461, + "logps/rejected": -671.02734375, + "loss": 1.0565, + "margin_dpo/margin_mean": 220.83203125, + "margin_dpo/margin_std": 298.01202392578125, + "step": 300 + }, + { + "KL/chosen_KL_mean": -297.02484130859375, + "KL/mean": -402.21649169921875, + "KL/rejected_KL_mean": -507.4081115722656, + "KL/std": 221.8513641357422, + "epoch": 0.4419970631424376, + "fcm_dpo/beta": 0.0021925170440226793, + "fcm_dpo/delta": -0.06446747481822968, + "fcm_dpo/margin": 210.38323974609375, + "fcm_dpo/q_t": 0.39620107412338257, + "grad_norm": 24.30299949645996, + "learning_rate": 3.4388957558875316e-07, + "logits/chosen": -0.319614976644516, + "logits/rejected": -0.31558164954185486, + "logps/chosen": -354.27423095703125, + "logps/ref_chosen": -57.249366760253906, + "logps/ref_rejected": -92.35354614257812, + "logps/rejected": -599.7616577148438, + "loss": 1.0522, + "margin_dpo/margin_mean": 210.38323974609375, + "margin_dpo/margin_std": 263.6629333496094, + "step": 301 + }, + { + "KL/chosen_KL_mean": -258.3230895996094, + "KL/mean": -345.97906494140625, + "KL/rejected_KL_mean": -433.63507080078125, + "KL/std": 195.16726684570312, + "epoch": 0.4434654919236417, + "fcm_dpo/beta": 0.0021865563467144966, + "fcm_dpo/delta": 0.017055466771125793, + "fcm_dpo/margin": 175.31198120117188, + "fcm_dpo/q_t": 0.41217830777168274, + "grad_norm": 22.057884216308594, + "learning_rate": 3.426989547989902e-07, + "logits/chosen": -0.3739083409309387, + "logits/rejected": -0.3783670663833618, + "logps/chosen": -309.5210876464844, + "logps/ref_chosen": -51.197994232177734, + "logps/ref_rejected": -97.22636413574219, + "logps/rejected": -530.8614501953125, + "loss": 1.1013, + "margin_dpo/margin_mean": 175.31198120117188, + "margin_dpo/margin_std": 233.5333251953125, + "step": 302 + }, + { + "KL/chosen_KL_mean": -268.55145263671875, + "KL/mean": -352.54443359375, + "KL/rejected_KL_mean": -436.537353515625, + "KL/std": 213.00778198242188, + "epoch": 0.44493392070484583, + "fcm_dpo/beta": 0.002201956696808338, + "fcm_dpo/delta": 0.031198769807815552, + "fcm_dpo/margin": 167.98590087890625, + "fcm_dpo/q_t": 0.4157490134239197, + "grad_norm": 21.407352447509766, + "learning_rate": 3.4150589130555773e-07, + "logits/chosen": -0.3980519771575928, + "logits/rejected": -0.38533222675323486, + "logps/chosen": -335.2654113769531, + "logps/ref_chosen": -66.71394348144531, + "logps/ref_rejected": -86.94542694091797, + "logps/rejected": -523.4827880859375, + "loss": 1.1241, + "margin_dpo/margin_mean": 167.98590087890625, + "margin_dpo/margin_std": 252.49813842773438, + "step": 303 + }, + { + "KL/chosen_KL_mean": -235.60621643066406, + "KL/mean": -327.85736083984375, + "KL/rejected_KL_mean": -420.10845947265625, + "KL/std": 188.72259521484375, + "epoch": 0.44640234948604995, + "fcm_dpo/beta": 0.002216983586549759, + "fcm_dpo/delta": -0.009714346379041672, + "fcm_dpo/margin": 184.502197265625, + "fcm_dpo/q_t": 0.40225833654403687, + "grad_norm": 33.168792724609375, + "learning_rate": 3.403104165467883e-07, + "logits/chosen": -0.42254841327667236, + "logits/rejected": -0.41290074586868286, + "logps/chosen": -307.55694580078125, + "logps/ref_chosen": -71.95069885253906, + "logps/ref_rejected": -90.47203063964844, + "logps/rejected": -510.58050537109375, + "loss": 1.0529, + "margin_dpo/margin_mean": 184.50221252441406, + "margin_dpo/margin_std": 165.41900634765625, + "step": 304 + }, + { + "KL/chosen_KL_mean": -246.27264404296875, + "KL/mean": -327.6571350097656, + "KL/rejected_KL_mean": -409.0416259765625, + "KL/std": 209.47964477539062, + "epoch": 0.447870778267254, + "fcm_dpo/beta": 0.002209282945841551, + "fcm_dpo/delta": 0.04157250002026558, + "fcm_dpo/margin": 162.76901245117188, + "fcm_dpo/q_t": 0.4180421531200409, + "grad_norm": 26.381446838378906, + "learning_rate": 3.391125620245535e-07, + "logits/chosen": -0.4123404622077942, + "logits/rejected": -0.39488470554351807, + "logps/chosen": -313.06787109375, + "logps/ref_chosen": -66.79523468017578, + "logps/ref_rejected": -92.75459289550781, + "logps/rejected": -501.7962341308594, + "loss": 1.1211, + "margin_dpo/margin_mean": 162.76901245117188, + "margin_dpo/margin_std": 229.144775390625, + "step": 305 + }, + { + "KL/chosen_KL_mean": -237.97418212890625, + "KL/mean": -316.761474609375, + "KL/rejected_KL_mean": -395.54876708984375, + "KL/std": 183.5909881591797, + "epoch": 0.44933920704845814, + "fcm_dpo/beta": 0.0022436161525547504, + "fcm_dpo/delta": 0.0480995737016201, + "fcm_dpo/margin": 157.5745849609375, + "fcm_dpo/q_t": 0.417450875043869, + "grad_norm": 32.627220153808594, + "learning_rate": 3.3791235930343417e-07, + "logits/chosen": -0.41048943996429443, + "logits/rejected": -0.3866746425628662, + "logps/chosen": -307.6580810546875, + "logps/ref_chosen": -69.68389892578125, + "logps/ref_rejected": -85.15919494628906, + "logps/rejected": -480.70794677734375, + "loss": 1.1087, + "margin_dpo/margin_mean": 157.5745849609375, + "margin_dpo/margin_std": 195.82992553710938, + "step": 306 + }, + { + "KL/chosen_KL_mean": -217.2437286376953, + "KL/mean": -297.4852294921875, + "KL/rejected_KL_mean": -377.72674560546875, + "KL/std": 166.21441650390625, + "epoch": 0.45080763582966227, + "fcm_dpo/beta": 0.002255768049508333, + "fcm_dpo/delta": 0.03941379487514496, + "fcm_dpo/margin": 160.48297119140625, + "fcm_dpo/q_t": 0.415368914604187, + "grad_norm": 28.697053909301758, + "learning_rate": 3.367098400098881e-07, + "logits/chosen": -0.4190334677696228, + "logits/rejected": -0.400789737701416, + "logps/chosen": -287.4091491699219, + "logps/ref_chosen": -70.16542053222656, + "logps/ref_rejected": -86.97230529785156, + "logps/rejected": -464.69903564453125, + "loss": 1.1073, + "margin_dpo/margin_mean": 160.48297119140625, + "margin_dpo/margin_std": 204.71377563476562, + "step": 307 + }, + { + "KL/chosen_KL_mean": -227.00399780273438, + "KL/mean": -317.9705810546875, + "KL/rejected_KL_mean": -408.9371337890625, + "KL/std": 191.77737426757812, + "epoch": 0.4522760646108664, + "fcm_dpo/beta": 0.002262132242321968, + "fcm_dpo/delta": -0.012053810060024261, + "fcm_dpo/margin": 181.93313598632812, + "fcm_dpo/q_t": 0.40270644426345825, + "grad_norm": 32.00320053100586, + "learning_rate": 3.355050358314172e-07, + "logits/chosen": -0.40174388885498047, + "logits/rejected": -0.3911542594432831, + "logps/chosen": -282.2489929199219, + "logps/ref_chosen": -55.2449951171875, + "logps/ref_rejected": -79.37226104736328, + "logps/rejected": -488.3094177246094, + "loss": 1.0594, + "margin_dpo/margin_mean": 181.93313598632812, + "margin_dpo/margin_std": 186.53610229492188, + "step": 308 + }, + { + "KL/chosen_KL_mean": -235.02227783203125, + "KL/mean": -328.88671875, + "KL/rejected_KL_mean": -422.7511291503906, + "KL/std": 208.54153442382812, + "epoch": 0.45374449339207046, + "fcm_dpo/beta": 0.0022613410837948322, + "fcm_dpo/delta": -0.025947626680135727, + "fcm_dpo/margin": 187.72882080078125, + "fcm_dpo/q_t": 0.40044116973876953, + "grad_norm": 36.268280029296875, + "learning_rate": 3.3429797851573183e-07, + "logits/chosen": -0.3440445065498352, + "logits/rejected": -0.3352274000644684, + "logps/chosen": -283.98138427734375, + "logps/ref_chosen": -48.959083557128906, + "logps/ref_rejected": -82.34072875976562, + "logps/rejected": -505.09185791015625, + "loss": 1.067, + "margin_dpo/margin_mean": 187.72882080078125, + "margin_dpo/margin_std": 219.98861694335938, + "step": 309 + }, + { + "KL/chosen_KL_mean": -275.0888671875, + "KL/mean": -361.7545471191406, + "KL/rejected_KL_mean": -448.42022705078125, + "KL/std": 183.4505157470703, + "epoch": 0.4552129221732746, + "fcm_dpo/beta": 0.002258842345327139, + "fcm_dpo/delta": 0.008499890565872192, + "fcm_dpo/margin": 173.33135986328125, + "fcm_dpo/q_t": 0.40748023986816406, + "grad_norm": 27.633638381958008, + "learning_rate": 3.3308869986991487e-07, + "logits/chosen": -0.3811958432197571, + "logits/rejected": -0.36551085114479065, + "logps/chosen": -337.8306579589844, + "logps/ref_chosen": -62.74177932739258, + "logps/ref_rejected": -79.9302978515625, + "logps/rejected": -528.3505249023438, + "loss": 1.0746, + "margin_dpo/margin_mean": 173.33135986328125, + "margin_dpo/margin_std": 180.37374877929688, + "step": 310 + }, + { + "KL/chosen_KL_mean": -287.706787109375, + "KL/mean": -388.335205078125, + "KL/rejected_KL_mean": -488.963623046875, + "KL/std": 243.3779296875, + "epoch": 0.4566813509544787, + "fcm_dpo/beta": 0.0022331131622195244, + "fcm_dpo/delta": -0.05178193002939224, + "fcm_dpo/margin": 201.25680541992188, + "fcm_dpo/q_t": 0.3981863260269165, + "grad_norm": 27.40664291381836, + "learning_rate": 3.3187723175958346e-07, + "logits/chosen": -0.3802347779273987, + "logits/rejected": -0.35250845551490784, + "logps/chosen": -340.7347717285156, + "logps/ref_chosen": -53.02798080444336, + "logps/ref_rejected": -77.43820190429688, + "logps/rejected": -566.40185546875, + "loss": 1.0617, + "margin_dpo/margin_mean": 201.25680541992188, + "margin_dpo/margin_std": 256.835693359375, + "step": 311 + }, + { + "KL/chosen_KL_mean": -276.9559326171875, + "KL/mean": -367.42620849609375, + "KL/rejected_KL_mean": -457.896484375, + "KL/std": 211.4532470703125, + "epoch": 0.4581497797356828, + "fcm_dpo/beta": 0.002234598621726036, + "fcm_dpo/delta": -0.004756327718496323, + "fcm_dpo/margin": 180.94058227539062, + "fcm_dpo/q_t": 0.4089137613773346, + "grad_norm": 25.603744506835938, + "learning_rate": 3.306636061080487e-07, + "logits/chosen": -0.3543195128440857, + "logits/rejected": -0.34367918968200684, + "logps/chosen": -326.34814453125, + "logps/ref_chosen": -49.39221954345703, + "logps/ref_rejected": -75.79280853271484, + "logps/rejected": -533.6893310546875, + "loss": 1.0944, + "margin_dpo/margin_mean": 180.94058227539062, + "margin_dpo/margin_std": 252.2486114501953, + "step": 312 + }, + { + "KL/chosen_KL_mean": -258.70135498046875, + "KL/mean": -349.859619140625, + "KL/rejected_KL_mean": -441.01788330078125, + "KL/std": 218.23260498046875, + "epoch": 0.45961820851688695, + "fcm_dpo/beta": 0.0022380563896149397, + "fcm_dpo/delta": -0.009237736463546753, + "fcm_dpo/margin": 182.31651306152344, + "fcm_dpo/q_t": 0.4060678482055664, + "grad_norm": 25.151771545410156, + "learning_rate": 3.2944785489547537e-07, + "logits/chosen": -0.40875959396362305, + "logits/rejected": -0.40443694591522217, + "logps/chosen": -308.8540954589844, + "logps/ref_chosen": -50.152740478515625, + "logps/ref_rejected": -86.40620422363281, + "logps/rejected": -527.424072265625, + "loss": 1.0913, + "margin_dpo/margin_mean": 182.31651306152344, + "margin_dpo/margin_std": 241.20245361328125, + "step": 313 + }, + { + "KL/chosen_KL_mean": -273.3019714355469, + "KL/mean": -359.92864990234375, + "KL/rejected_KL_mean": -446.5552978515625, + "KL/std": 209.75840759277344, + "epoch": 0.461086637298091, + "fcm_dpo/beta": 0.002218043664470315, + "fcm_dpo/delta": 0.01605740562081337, + "fcm_dpo/margin": 173.25335693359375, + "fcm_dpo/q_t": 0.4140198826789856, + "grad_norm": 21.262128829956055, + "learning_rate": 3.2823001015803857e-07, + "logits/chosen": -0.4432973861694336, + "logits/rejected": -0.44494277238845825, + "logps/chosen": -330.53955078125, + "logps/ref_chosen": -57.237579345703125, + "logps/ref_rejected": -97.5965347290039, + "logps/rejected": -544.15185546875, + "loss": 1.1216, + "margin_dpo/margin_mean": 173.25335693359375, + "margin_dpo/margin_std": 268.62567138671875, + "step": 314 + }, + { + "KL/chosen_KL_mean": -246.94622802734375, + "KL/mean": -326.3898620605469, + "KL/rejected_KL_mean": -405.83349609375, + "KL/std": 184.21778869628906, + "epoch": 0.46255506607929514, + "fcm_dpo/beta": 0.0022414117120206356, + "fcm_dpo/delta": 0.04551296681165695, + "fcm_dpo/margin": 158.88729858398438, + "fcm_dpo/q_t": 0.41757166385650635, + "grad_norm": 21.051586151123047, + "learning_rate": 3.270101039870797e-07, + "logits/chosen": -0.3383270502090454, + "logits/rejected": -0.33996307849884033, + "logps/chosen": -296.01580810546875, + "logps/ref_chosen": -49.06958770751953, + "logps/ref_rejected": -85.68087768554688, + "logps/rejected": -491.5143737792969, + "loss": 1.1129, + "margin_dpo/margin_mean": 158.88729858398438, + "margin_dpo/margin_std": 208.52919006347656, + "step": 315 + }, + { + "KL/chosen_KL_mean": -247.3397216796875, + "KL/mean": -359.8223876953125, + "KL/rejected_KL_mean": -472.3050537109375, + "KL/std": 221.7631378173828, + "epoch": 0.46402349486049926, + "fcm_dpo/beta": 0.002204576972872019, + "fcm_dpo/delta": -0.10198242217302322, + "fcm_dpo/margin": 224.96533203125, + "fcm_dpo/q_t": 0.386934369802475, + "grad_norm": 26.301368713378906, + "learning_rate": 3.2578816852826086e-07, + "logits/chosen": -0.37936335802078247, + "logits/rejected": -0.38319075107574463, + "logps/chosen": -301.6004638671875, + "logps/ref_chosen": -54.26074981689453, + "logps/ref_rejected": -101.2814712524414, + "logps/rejected": -573.5865478515625, + "loss": 1.0184, + "margin_dpo/margin_mean": 224.96531677246094, + "margin_dpo/margin_std": 245.899169921875, + "step": 316 + }, + { + "KL/chosen_KL_mean": -254.56671142578125, + "KL/mean": -377.3856201171875, + "KL/rejected_KL_mean": -500.2044372558594, + "KL/std": 204.3238525390625, + "epoch": 0.4654919236417034, + "fcm_dpo/beta": 0.0021673087030649185, + "fcm_dpo/delta": -0.1395837366580963, + "fcm_dpo/margin": 245.63772583007812, + "fcm_dpo/q_t": 0.3760732412338257, + "grad_norm": 27.939563751220703, + "learning_rate": 3.2456423598071783e-07, + "logits/chosen": -0.36965489387512207, + "logits/rejected": -0.3580781817436218, + "logps/chosen": -310.66094970703125, + "logps/ref_chosen": -56.094207763671875, + "logps/ref_rejected": -100.69905090332031, + "logps/rejected": -600.9035034179688, + "loss": 0.9823, + "margin_dpo/margin_mean": 245.6377410888672, + "margin_dpo/margin_std": 226.56988525390625, + "step": 317 + }, + { + "KL/chosen_KL_mean": -279.697265625, + "KL/mean": -372.810546875, + "KL/rejected_KL_mean": -465.92388916015625, + "KL/std": 213.03033447265625, + "epoch": 0.4669603524229075, + "fcm_dpo/beta": 0.002148838248103857, + "fcm_dpo/delta": -0.0003134552389383316, + "fcm_dpo/margin": 186.2266387939453, + "fcm_dpo/q_t": 0.4076169729232788, + "grad_norm": 25.99791717529297, + "learning_rate": 3.233383385962115e-07, + "logits/chosen": -0.4427594542503357, + "logits/rejected": -0.4159233570098877, + "logps/chosen": -344.34295654296875, + "logps/ref_chosen": -64.64569854736328, + "logps/ref_rejected": -82.76425170898438, + "logps/rejected": -548.6881103515625, + "loss": 1.0842, + "margin_dpo/margin_mean": 186.2266387939453, + "margin_dpo/margin_std": 232.8768310546875, + "step": 318 + }, + { + "KL/chosen_KL_mean": -254.43267822265625, + "KL/mean": -364.23388671875, + "KL/rejected_KL_mean": -474.03509521484375, + "KL/std": 231.52847290039062, + "epoch": 0.4684287812041116, + "fcm_dpo/beta": 0.002122014295309782, + "fcm_dpo/delta": -0.06920456886291504, + "fcm_dpo/margin": 219.60238647460938, + "fcm_dpo/q_t": 0.3920608162879944, + "grad_norm": 22.634004592895508, + "learning_rate": 3.2211050867827805e-07, + "logits/chosen": -0.40253257751464844, + "logits/rejected": -0.41706568002700806, + "logps/chosen": -303.81646728515625, + "logps/ref_chosen": -49.383758544921875, + "logps/ref_rejected": -113.90650939941406, + "logps/rejected": -587.9415893554688, + "loss": 1.04, + "margin_dpo/margin_mean": 219.60238647460938, + "margin_dpo/margin_std": 253.1864013671875, + "step": 319 + }, + { + "KL/chosen_KL_mean": -270.4678955078125, + "KL/mean": -391.669677734375, + "KL/rejected_KL_mean": -512.8714599609375, + "KL/std": 244.3293914794922, + "epoch": 0.4698972099853157, + "fcm_dpo/beta": 0.002073149662464857, + "fcm_dpo/delta": -0.10854032635688782, + "fcm_dpo/margin": 242.403564453125, + "fcm_dpo/q_t": 0.38467395305633545, + "grad_norm": 26.852354049682617, + "learning_rate": 3.208807785813777e-07, + "logits/chosen": -0.37782442569732666, + "logits/rejected": -0.3842761516571045, + "logps/chosen": -329.9727783203125, + "logps/ref_chosen": -59.50489044189453, + "logps/ref_rejected": -97.66717529296875, + "logps/rejected": -610.5386352539062, + "loss": 1.0125, + "margin_dpo/margin_mean": 242.403564453125, + "margin_dpo/margin_std": 261.4935607910156, + "step": 320 + }, + { + "KL/chosen_KL_mean": -335.5428771972656, + "KL/mean": -437.40179443359375, + "KL/rejected_KL_mean": -539.2606201171875, + "KL/std": 246.63409423828125, + "epoch": 0.4713656387665198, + "fcm_dpo/beta": 0.002050905954092741, + "fcm_dpo/delta": -0.019337691366672516, + "fcm_dpo/margin": 203.71778869628906, + "fcm_dpo/q_t": 0.4041670560836792, + "grad_norm": 25.74936294555664, + "learning_rate": 3.1964918071004217e-07, + "logits/chosen": -0.3634049594402313, + "logits/rejected": -0.35541000962257385, + "logps/chosen": -397.091552734375, + "logps/ref_chosen": -61.548683166503906, + "logps/ref_rejected": -91.64103698730469, + "logps/rejected": -630.9017333984375, + "loss": 1.0823, + "margin_dpo/margin_mean": 203.71778869628906, + "margin_dpo/margin_std": 262.6986083984375, + "step": 321 + }, + { + "KL/chosen_KL_mean": -293.60589599609375, + "KL/mean": -406.685302734375, + "KL/rejected_KL_mean": -519.7646484375, + "KL/std": 222.54376220703125, + "epoch": 0.47283406754772395, + "fcm_dpo/beta": 0.0020338515751063824, + "fcm_dpo/delta": -0.0633477047085762, + "fcm_dpo/margin": 226.15872192382812, + "fcm_dpo/q_t": 0.39247822761535645, + "grad_norm": 21.486406326293945, + "learning_rate": 3.184157475180207e-07, + "logits/chosen": -0.3742543160915375, + "logits/rejected": -0.37500399351119995, + "logps/chosen": -350.89593505859375, + "logps/ref_chosen": -57.29003143310547, + "logps/ref_rejected": -95.74992370605469, + "logps/rejected": -615.5145263671875, + "loss": 1.0333, + "margin_dpo/margin_mean": 226.15872192382812, + "margin_dpo/margin_std": 235.21531677246094, + "step": 322 + }, + { + "KL/chosen_KL_mean": -314.616943359375, + "KL/mean": -410.58935546875, + "KL/rejected_KL_mean": -506.56170654296875, + "KL/std": 222.89964294433594, + "epoch": 0.47430249632892807, + "fcm_dpo/beta": 0.002034769393503666, + "fcm_dpo/delta": 0.009797626174986362, + "fcm_dpo/margin": 191.94479370117188, + "fcm_dpo/q_t": 0.4092080295085907, + "grad_norm": 40.52562713623047, + "learning_rate": 3.171805115074251e-07, + "logits/chosen": -0.39556217193603516, + "logits/rejected": -0.3936944603919983, + "logps/chosen": -365.85089111328125, + "logps/ref_chosen": -51.23395919799805, + "logps/ref_rejected": -75.06192016601562, + "logps/rejected": -581.6236572265625, + "loss": 1.094, + "margin_dpo/margin_mean": 191.94479370117188, + "margin_dpo/margin_std": 242.95655822753906, + "step": 323 + }, + { + "KL/chosen_KL_mean": -364.46820068359375, + "KL/mean": -454.6254577636719, + "KL/rejected_KL_mean": -544.78271484375, + "KL/std": 239.51873779296875, + "epoch": 0.47577092511013214, + "fcm_dpo/beta": 0.0020614464301615953, + "fcm_dpo/delta": 0.0281895250082016, + "fcm_dpo/margin": 180.3145294189453, + "fcm_dpo/q_t": 0.41647300124168396, + "grad_norm": 45.35006332397461, + "learning_rate": 3.1594350522787295e-07, + "logits/chosen": -0.3872758746147156, + "logits/rejected": -0.3733510971069336, + "logps/chosen": -429.6033630371094, + "logps/ref_chosen": -65.13516998291016, + "logps/ref_rejected": -86.47750854492188, + "logps/rejected": -631.26025390625, + "loss": 1.1314, + "margin_dpo/margin_mean": 180.3145294189453, + "margin_dpo/margin_std": 275.48095703125, + "step": 324 + }, + { + "KL/chosen_KL_mean": -282.35870361328125, + "KL/mean": -360.821533203125, + "KL/rejected_KL_mean": -439.28436279296875, + "KL/std": 208.74551391601562, + "epoch": 0.47723935389133626, + "fcm_dpo/beta": 0.0020753461867570877, + "fcm_dpo/delta": 0.076748326420784, + "fcm_dpo/margin": 156.92562866210938, + "fcm_dpo/q_t": 0.42378658056259155, + "grad_norm": 30.042072296142578, + "learning_rate": 3.147047612756302e-07, + "logits/chosen": -0.44832050800323486, + "logits/rejected": -0.43317437171936035, + "logps/chosen": -338.57427978515625, + "logps/ref_chosen": -56.215599060058594, + "logps/ref_rejected": -70.08592987060547, + "logps/rejected": -509.37030029296875, + "loss": 1.1354, + "margin_dpo/margin_mean": 156.92564392089844, + "margin_dpo/margin_std": 211.30374145507812, + "step": 325 + }, + { + "KL/chosen_KL_mean": -312.6626281738281, + "KL/mean": -387.2479553222656, + "KL/rejected_KL_mean": -461.83331298828125, + "KL/std": 196.3777618408203, + "epoch": 0.4787077826725404, + "fcm_dpo/beta": 0.002105048391968012, + "fcm_dpo/delta": 0.08887322247028351, + "fcm_dpo/margin": 149.17068481445312, + "fcm_dpo/q_t": 0.4256167709827423, + "grad_norm": 45.18388748168945, + "learning_rate": 3.134643122927519e-07, + "logits/chosen": -0.4697904884815216, + "logits/rejected": -0.45466452836990356, + "logps/chosen": -385.3875732421875, + "logps/ref_chosen": -72.72496032714844, + "logps/ref_rejected": -79.8467788696289, + "logps/rejected": -541.6800537109375, + "loss": 1.1383, + "margin_dpo/margin_mean": 149.17068481445312, + "margin_dpo/margin_std": 190.10052490234375, + "step": 326 + }, + { + "KL/chosen_KL_mean": -267.8811950683594, + "KL/mean": -382.8114013671875, + "KL/rejected_KL_mean": -497.74163818359375, + "KL/std": 212.30575561523438, + "epoch": 0.4801762114537445, + "fcm_dpo/beta": 0.0020871213637292385, + "fcm_dpo/delta": -0.08413384854793549, + "fcm_dpo/margin": 229.8604278564453, + "fcm_dpo/q_t": 0.38719016313552856, + "grad_norm": 40.068824768066406, + "learning_rate": 3.1222219096622264e-07, + "logits/chosen": -0.419431209564209, + "logits/rejected": -0.40632164478302, + "logps/chosen": -337.015625, + "logps/ref_chosen": -69.13441467285156, + "logps/ref_rejected": -111.93377685546875, + "logps/rejected": -609.6754150390625, + "loss": 1.0185, + "margin_dpo/margin_mean": 229.86044311523438, + "margin_dpo/margin_std": 231.85646057128906, + "step": 327 + }, + { + "KL/chosen_KL_mean": -283.38665771484375, + "KL/mean": -388.9765930175781, + "KL/rejected_KL_mean": -494.5665283203125, + "KL/std": 235.5018310546875, + "epoch": 0.48164464023494863, + "fcm_dpo/beta": 0.002073537092655897, + "fcm_dpo/delta": -0.03960520401597023, + "fcm_dpo/margin": 211.17982482910156, + "fcm_dpo/q_t": 0.4000805914402008, + "grad_norm": 26.695911407470703, + "learning_rate": 3.1097843002709427e-07, + "logits/chosen": -0.40584391355514526, + "logits/rejected": -0.4096938371658325, + "logps/chosen": -343.0738525390625, + "logps/ref_chosen": -59.68719482421875, + "logps/ref_rejected": -90.85499572753906, + "logps/rejected": -585.4215087890625, + "loss": 1.0631, + "margin_dpo/margin_mean": 211.17984008789062, + "margin_dpo/margin_std": 262.86199951171875, + "step": 328 + }, + { + "KL/chosen_KL_mean": -315.77923583984375, + "KL/mean": -425.44476318359375, + "KL/rejected_KL_mean": -535.1102294921875, + "KL/std": 248.7440185546875, + "epoch": 0.4831130690161527, + "fcm_dpo/beta": 0.0020416276529431343, + "fcm_dpo/delta": -0.05077539384365082, + "fcm_dpo/margin": 219.33106994628906, + "fcm_dpo/q_t": 0.3970402479171753, + "grad_norm": 27.266719818115234, + "learning_rate": 3.0973306224962437e-07, + "logits/chosen": -0.39109185338020325, + "logits/rejected": -0.37896549701690674, + "logps/chosen": -381.0254211425781, + "logps/ref_chosen": -65.2461929321289, + "logps/ref_rejected": -100.69770812988281, + "logps/rejected": -635.8079833984375, + "loss": 1.0669, + "margin_dpo/margin_mean": 219.33106994628906, + "margin_dpo/margin_std": 276.76806640625, + "step": 329 + }, + { + "KL/chosen_KL_mean": -271.9311828613281, + "KL/mean": -387.9265441894531, + "KL/rejected_KL_mean": -503.921875, + "KL/std": 245.21937561035156, + "epoch": 0.4845814977973568, + "fcm_dpo/beta": 0.002023911103606224, + "fcm_dpo/delta": -0.07296737283468246, + "fcm_dpo/margin": 231.99075317382812, + "fcm_dpo/q_t": 0.39241012930870056, + "grad_norm": 22.988527297973633, + "learning_rate": 3.084861204504122e-07, + "logits/chosen": -0.38984963297843933, + "logits/rejected": -0.3909507393836975, + "logps/chosen": -318.92950439453125, + "logps/ref_chosen": -46.998348236083984, + "logps/ref_rejected": -86.87684631347656, + "logps/rejected": -590.7987060546875, + "loss": 1.0401, + "margin_dpo/margin_mean": 231.99075317382812, + "margin_dpo/margin_std": 271.9549560546875, + "step": 330 + }, + { + "KL/chosen_KL_mean": -292.27081298828125, + "KL/mean": -406.2383117675781, + "KL/rejected_KL_mean": -520.205810546875, + "KL/std": 200.16488647460938, + "epoch": 0.48604992657856094, + "fcm_dpo/beta": 0.001998601946979761, + "fcm_dpo/delta": -0.058225952088832855, + "fcm_dpo/margin": 227.9350128173828, + "fcm_dpo/q_t": 0.391870379447937, + "grad_norm": 29.254379272460938, + "learning_rate": 3.072376374875335e-07, + "logits/chosen": -0.4170438051223755, + "logits/rejected": -0.41196513175964355, + "logps/chosen": -342.7950744628906, + "logps/ref_chosen": -50.52424621582031, + "logps/ref_rejected": -89.01544189453125, + "logps/rejected": -609.2212524414062, + "loss": 1.0166, + "margin_dpo/margin_mean": 227.9350128173828, + "margin_dpo/margin_std": 194.46043395996094, + "step": 331 + }, + { + "KL/chosen_KL_mean": -294.25115966796875, + "KL/mean": -377.06805419921875, + "KL/rejected_KL_mean": -459.8848876953125, + "KL/std": 212.61550903320312, + "epoch": 0.48751835535976507, + "fcm_dpo/beta": 0.002016157377511263, + "fcm_dpo/delta": 0.06822776794433594, + "fcm_dpo/margin": 165.63372802734375, + "fcm_dpo/q_t": 0.4236387014389038, + "grad_norm": 23.76082420349121, + "learning_rate": 3.059876462596758e-07, + "logits/chosen": -0.4166898727416992, + "logits/rejected": -0.39659583568573, + "logps/chosen": -343.43145751953125, + "logps/ref_chosen": -49.18028259277344, + "logps/ref_rejected": -76.48515319824219, + "logps/rejected": -536.3699951171875, + "loss": 1.1333, + "margin_dpo/margin_mean": 165.63372802734375, + "margin_dpo/margin_std": 237.92779541015625, + "step": 332 + }, + { + "KL/chosen_KL_mean": -314.040771484375, + "KL/mean": -421.1554870605469, + "KL/rejected_KL_mean": -528.270263671875, + "KL/std": 245.66607666015625, + "epoch": 0.4889867841409692, + "fcm_dpo/beta": 0.0019987255800515413, + "fcm_dpo/delta": -0.030487176030874252, + "fcm_dpo/margin": 214.22947692871094, + "fcm_dpo/q_t": 0.40265339612960815, + "grad_norm": 21.290910720825195, + "learning_rate": 3.0473617970527015e-07, + "logits/chosen": -0.414547860622406, + "logits/rejected": -0.40775951743125916, + "logps/chosen": -377.7965087890625, + "logps/ref_chosen": -63.75574493408203, + "logps/ref_rejected": -95.04411315917969, + "logps/rejected": -623.3143310546875, + "loss": 1.0872, + "margin_dpo/margin_mean": 214.22946166992188, + "margin_dpo/margin_std": 293.54949951171875, + "step": 333 + }, + { + "KL/chosen_KL_mean": -301.93621826171875, + "KL/mean": -397.41796875, + "KL/rejected_KL_mean": -492.8997802734375, + "KL/std": 265.2902526855469, + "epoch": 0.49045521292217326, + "fcm_dpo/beta": 0.0020114602521061897, + "fcm_dpo/delta": 0.016515735536813736, + "fcm_dpo/margin": 190.96356201171875, + "fcm_dpo/q_t": 0.4135018587112427, + "grad_norm": 25.199310302734375, + "learning_rate": 3.034832708016243e-07, + "logits/chosen": -0.4625867009162903, + "logits/rejected": -0.4604346752166748, + "logps/chosen": -368.91595458984375, + "logps/ref_chosen": -66.97975158691406, + "logps/ref_rejected": -95.31692504882812, + "logps/rejected": -588.2166748046875, + "loss": 1.1255, + "margin_dpo/margin_mean": 190.9635467529297, + "margin_dpo/margin_std": 301.6986389160156, + "step": 334 + }, + { + "KL/chosen_KL_mean": -332.12335205078125, + "KL/mean": -407.17266845703125, + "KL/rejected_KL_mean": -482.2219543457031, + "KL/std": 248.314453125, + "epoch": 0.4919236417033774, + "fcm_dpo/beta": 0.0020141825079917908, + "fcm_dpo/delta": -0.004568679258227348, + "fcm_dpo/margin": 150.0985870361328, + "fcm_dpo/q_t": 0.4313260614871979, + "grad_norm": 32.44525146484375, + "learning_rate": 3.022289525640531e-07, + "logits/chosen": -0.43714985251426697, + "logits/rejected": -0.4139357805252075, + "logps/chosen": -394.66583251953125, + "logps/ref_chosen": -62.54248046875, + "logps/ref_rejected": -87.61770629882812, + "logps/rejected": -569.8396606445312, + "loss": 1.1788, + "margin_dpo/margin_mean": 150.0985870361328, + "margin_dpo/margin_std": 268.7898254394531, + "step": 335 + }, + { + "KL/chosen_KL_mean": -334.3876647949219, + "KL/mean": -455.1687927246094, + "KL/rejected_KL_mean": -575.949951171875, + "KL/std": 287.1412353515625, + "epoch": 0.4933920704845815, + "fcm_dpo/beta": 0.0019955080933868885, + "fcm_dpo/delta": -0.08617211878299713, + "fcm_dpo/margin": 241.56228637695312, + "fcm_dpo/q_t": 0.39297786355018616, + "grad_norm": 29.619176864624023, + "learning_rate": 3.009732580450086e-07, + "logits/chosen": -0.399771511554718, + "logits/rejected": -0.3994015157222748, + "logps/chosen": -388.9188232421875, + "logps/ref_chosen": -54.53115463256836, + "logps/ref_rejected": -104.40424346923828, + "logps/rejected": -680.3541870117188, + "loss": 1.0672, + "margin_dpo/margin_mean": 241.56228637695312, + "margin_dpo/margin_std": 339.20538330078125, + "step": 336 + }, + { + "KL/chosen_KL_mean": -299.7122802734375, + "KL/mean": -419.55731201171875, + "KL/rejected_KL_mean": -539.40234375, + "KL/std": 230.1120147705078, + "epoch": 0.4948604992657856, + "fcm_dpo/beta": 0.0019532032310962677, + "fcm_dpo/delta": -0.07179627567529678, + "fcm_dpo/margin": 239.69003295898438, + "fcm_dpo/q_t": 0.3924025893211365, + "grad_norm": 31.691265106201172, + "learning_rate": 2.9971622033320914e-07, + "logits/chosen": -0.44581082463264465, + "logits/rejected": -0.4338313341140747, + "logps/chosen": -364.8409729003906, + "logps/ref_chosen": -65.12869262695312, + "logps/ref_rejected": -101.72701263427734, + "logps/rejected": -641.1293334960938, + "loss": 1.0365, + "margin_dpo/margin_mean": 239.69003295898438, + "margin_dpo/margin_std": 272.0997314453125, + "step": 337 + }, + { + "KL/chosen_KL_mean": -265.2712707519531, + "KL/mean": -381.5762634277344, + "KL/rejected_KL_mean": -497.88128662109375, + "KL/std": 221.42172241210938, + "epoch": 0.49632892804698975, + "fcm_dpo/beta": 0.0019332608208060265, + "fcm_dpo/delta": -0.052127446979284286, + "fcm_dpo/margin": 232.6099853515625, + "fcm_dpo/q_t": 0.3942795991897583, + "grad_norm": 33.03255081176758, + "learning_rate": 2.984578725527675e-07, + "logits/chosen": -0.44652998447418213, + "logits/rejected": -0.4394975006580353, + "logps/chosen": -323.6939697265625, + "logps/ref_chosen": -58.422706604003906, + "logps/ref_rejected": -89.06854248046875, + "logps/rejected": -586.9498291015625, + "loss": 1.0325, + "margin_dpo/margin_mean": 232.6099853515625, + "margin_dpo/margin_std": 229.81480407714844, + "step": 338 + }, + { + "KL/chosen_KL_mean": -288.043701171875, + "KL/mean": -402.6147155761719, + "KL/rejected_KL_mean": -517.1857299804688, + "KL/std": 232.51951599121094, + "epoch": 0.4977973568281938, + "fcm_dpo/beta": 0.0019332109950482845, + "fcm_dpo/delta": -0.045980703085660934, + "fcm_dpo/margin": 229.14202880859375, + "fcm_dpo/q_t": 0.3964860439300537, + "grad_norm": 26.379383087158203, + "learning_rate": 2.9719824786231796e-07, + "logits/chosen": -0.4995361864566803, + "logits/rejected": -0.486974835395813, + "logps/chosen": -348.03900146484375, + "logps/ref_chosen": -59.99531555175781, + "logps/ref_rejected": -103.9109115600586, + "logps/rejected": -621.0966186523438, + "loss": 1.0459, + "margin_dpo/margin_mean": 229.14202880859375, + "margin_dpo/margin_std": 232.25975036621094, + "step": 339 + }, + { + "KL/chosen_KL_mean": -305.80303955078125, + "KL/mean": -403.6094665527344, + "KL/rejected_KL_mean": -501.4158935546875, + "KL/std": 221.99676513671875, + "epoch": 0.49926578560939794, + "fcm_dpo/beta": 0.001913035404868424, + "fcm_dpo/delta": 0.026591314002871513, + "fcm_dpo/margin": 195.61285400390625, + "fcm_dpo/q_t": 0.4138457477092743, + "grad_norm": 24.018463134765625, + "learning_rate": 2.959373794541426e-07, + "logits/chosen": -0.41224145889282227, + "logits/rejected": -0.3901046812534332, + "logps/chosen": -358.63323974609375, + "logps/ref_chosen": -52.83022689819336, + "logps/ref_rejected": -73.10723114013672, + "logps/rejected": -574.5231323242188, + "loss": 1.1118, + "margin_dpo/margin_mean": 195.61285400390625, + "margin_dpo/margin_std": 273.03375244140625, + "step": 340 + }, + { + "KL/chosen_KL_mean": -299.90802001953125, + "KL/mean": -412.6207580566406, + "KL/rejected_KL_mean": -525.3335571289062, + "KL/std": 242.2279510498047, + "epoch": 0.5007342143906021, + "fcm_dpo/beta": 0.0019069017143920064, + "fcm_dpo/delta": -0.03159831091761589, + "fcm_dpo/margin": 225.42550659179688, + "fcm_dpo/q_t": 0.4006652235984802, + "grad_norm": 25.791650772094727, + "learning_rate": 2.946753005532965e-07, + "logits/chosen": -0.40798383951187134, + "logits/rejected": -0.4071720838546753, + "logps/chosen": -347.8078308105469, + "logps/ref_chosen": -47.899803161621094, + "logps/ref_rejected": -101.80987548828125, + "logps/rejected": -627.1434326171875, + "loss": 1.0601, + "margin_dpo/margin_mean": 225.42550659179688, + "margin_dpo/margin_std": 256.30413818359375, + "step": 341 + }, + { + "KL/chosen_KL_mean": -308.8389892578125, + "KL/mean": -408.7991638183594, + "KL/rejected_KL_mean": -508.7593078613281, + "KL/std": 230.74411010742188, + "epoch": 0.5022026431718062, + "fcm_dpo/beta": 0.001922906143590808, + "fcm_dpo/delta": 0.01581621915102005, + "fcm_dpo/margin": 199.9203338623047, + "fcm_dpo/q_t": 0.4113759696483612, + "grad_norm": 25.002784729003906, + "learning_rate": 2.934120444167326e-07, + "logits/chosen": -0.4692263603210449, + "logits/rejected": -0.44338518381118774, + "logps/chosen": -380.83563232421875, + "logps/ref_chosen": -71.99664306640625, + "logps/ref_rejected": -92.58959197998047, + "logps/rejected": -601.348876953125, + "loss": 1.1027, + "margin_dpo/margin_mean": 199.92034912109375, + "margin_dpo/margin_std": 271.4499206542969, + "step": 342 + }, + { + "KL/chosen_KL_mean": -296.18927001953125, + "KL/mean": -417.68475341796875, + "KL/rejected_KL_mean": -539.18017578125, + "KL/std": 236.11985778808594, + "epoch": 0.5036710719530103, + "fcm_dpo/beta": 0.001898743212223053, + "fcm_dpo/delta": -0.06434239447116852, + "fcm_dpo/margin": 242.99090576171875, + "fcm_dpo/q_t": 0.39045825600624084, + "grad_norm": 23.37291145324707, + "learning_rate": 2.9214764433242476e-07, + "logits/chosen": -0.4716584086418152, + "logits/rejected": -0.4760972261428833, + "logps/chosen": -350.59490966796875, + "logps/ref_chosen": -54.405616760253906, + "logps/ref_rejected": -111.04142761230469, + "logps/rejected": -650.2216186523438, + "loss": 1.0162, + "margin_dpo/margin_mean": 242.99090576171875, + "margin_dpo/margin_std": 209.3662109375, + "step": 343 + }, + { + "KL/chosen_KL_mean": -303.44384765625, + "KL/mean": -406.74298095703125, + "KL/rejected_KL_mean": -510.04217529296875, + "KL/std": 256.48602294921875, + "epoch": 0.5051395007342144, + "fcm_dpo/beta": 0.0019109161803498864, + "fcm_dpo/delta": 0.0036756470799446106, + "fcm_dpo/margin": 206.59829711914062, + "fcm_dpo/q_t": 0.41091352701187134, + "grad_norm": 31.523855209350586, + "learning_rate": 2.9088213361849126e-07, + "logits/chosen": -0.4483766555786133, + "logits/rejected": -0.45180875062942505, + "logps/chosen": -357.40850830078125, + "logps/ref_chosen": -53.96466827392578, + "logps/ref_rejected": -90.62336730957031, + "logps/rejected": -600.66552734375, + "loss": 1.0951, + "margin_dpo/margin_mean": 206.59829711914062, + "margin_dpo/margin_std": 264.9103698730469, + "step": 344 + }, + { + "KL/chosen_KL_mean": -361.2244567871094, + "KL/mean": -478.6163024902344, + "KL/rejected_KL_mean": -596.0081787109375, + "KL/std": 251.1361083984375, + "epoch": 0.5066079295154186, + "fcm_dpo/beta": 0.0018856715178117156, + "fcm_dpo/delta": -0.04471251741051674, + "fcm_dpo/margin": 234.78369140625, + "fcm_dpo/q_t": 0.3976198434829712, + "grad_norm": 22.779315948486328, + "learning_rate": 2.896155456223163e-07, + "logits/chosen": -0.4647873342037201, + "logits/rejected": -0.4615704417228699, + "logps/chosen": -422.91015625, + "logps/ref_chosen": -61.685699462890625, + "logps/ref_rejected": -99.49041748046875, + "logps/rejected": -695.4985961914062, + "loss": 1.0557, + "margin_dpo/margin_mean": 234.78372192382812, + "margin_dpo/margin_std": 277.4552307128906, + "step": 345 + }, + { + "KL/chosen_KL_mean": -358.7704162597656, + "KL/mean": -470.405517578125, + "KL/rejected_KL_mean": -582.0405883789062, + "KL/std": 246.00839233398438, + "epoch": 0.5080763582966226, + "fcm_dpo/beta": 0.0018714326433837414, + "fcm_dpo/delta": -0.018616080284118652, + "fcm_dpo/margin": 223.2701873779297, + "fcm_dpo/q_t": 0.4019482135772705, + "grad_norm": 22.303312301635742, + "learning_rate": 2.883479137196714e-07, + "logits/chosen": -0.42917925119400024, + "logits/rejected": -0.41765835881233215, + "logps/chosen": -414.02667236328125, + "logps/ref_chosen": -55.256263732910156, + "logps/ref_rejected": -77.41532135009766, + "logps/rejected": -659.4559326171875, + "loss": 1.0682, + "margin_dpo/margin_mean": 223.27017211914062, + "margin_dpo/margin_std": 262.06787109375, + "step": 346 + }, + { + "KL/chosen_KL_mean": -364.54046630859375, + "KL/mean": -475.5876770019531, + "KL/rejected_KL_mean": -586.6348876953125, + "KL/std": 260.4476318359375, + "epoch": 0.5095447870778267, + "fcm_dpo/beta": 0.001866616541519761, + "fcm_dpo/delta": -0.015192577615380287, + "fcm_dpo/margin": 222.09446716308594, + "fcm_dpo/q_t": 0.4048117995262146, + "grad_norm": 26.2041015625, + "learning_rate": 2.8707927131383614e-07, + "logits/chosen": -0.4826762080192566, + "logits/rejected": -0.47840413451194763, + "logps/chosen": -422.106689453125, + "logps/ref_chosen": -57.56623840332031, + "logps/ref_rejected": -92.35509490966797, + "logps/rejected": -678.989990234375, + "loss": 1.0814, + "margin_dpo/margin_mean": 222.094482421875, + "margin_dpo/margin_std": 288.15325927734375, + "step": 347 + }, + { + "KL/chosen_KL_mean": -328.40960693359375, + "KL/mean": -422.31512451171875, + "KL/rejected_KL_mean": -516.2205810546875, + "KL/std": 226.09664916992188, + "epoch": 0.5110132158590308, + "fcm_dpo/beta": 0.0018670517019927502, + "fcm_dpo/delta": 0.05069158226251602, + "fcm_dpo/margin": 187.81101989746094, + "fcm_dpo/q_t": 0.41940367221832275, + "grad_norm": 27.389596939086914, + "learning_rate": 2.858096518347179e-07, + "logits/chosen": -0.47834187746047974, + "logits/rejected": -0.47976285219192505, + "logps/chosen": -384.727294921875, + "logps/ref_chosen": -56.31770324707031, + "logps/ref_rejected": -89.13836669921875, + "logps/rejected": -605.3590087890625, + "loss": 1.1255, + "margin_dpo/margin_mean": 187.81101989746094, + "margin_dpo/margin_std": 261.6010437011719, + "step": 348 + }, + { + "KL/chosen_KL_mean": -309.04058837890625, + "KL/mean": -417.7922668457031, + "KL/rejected_KL_mean": -526.5439453125, + "KL/std": 246.7399139404297, + "epoch": 0.5124816446402349, + "fcm_dpo/beta": 0.0018880900461226702, + "fcm_dpo/delta": -0.011443812400102615, + "fcm_dpo/margin": 217.50335693359375, + "fcm_dpo/q_t": 0.4071810245513916, + "grad_norm": 21.567838668823242, + "learning_rate": 2.845390887379706e-07, + "logits/chosen": -0.48450082540512085, + "logits/rejected": -0.4876035153865814, + "logps/chosen": -367.06610107421875, + "logps/ref_chosen": -58.025516510009766, + "logps/ref_rejected": -97.50515747070312, + "logps/rejected": -624.0491333007812, + "loss": 1.0928, + "margin_dpo/margin_mean": 217.50335693359375, + "margin_dpo/margin_std": 300.6847839355469, + "step": 349 + }, + { + "KL/chosen_KL_mean": -325.901611328125, + "KL/mean": -431.23809814453125, + "KL/rejected_KL_mean": -536.5746459960938, + "KL/std": 234.06008911132812, + "epoch": 0.5139500734214391, + "fcm_dpo/beta": 0.0018717560451477766, + "fcm_dpo/delta": 0.00548534095287323, + "fcm_dpo/margin": 210.67298889160156, + "fcm_dpo/q_t": 0.40805143117904663, + "grad_norm": 34.51575469970703, + "learning_rate": 2.8326761550411346e-07, + "logits/chosen": -0.5076397657394409, + "logits/rejected": -0.510471761226654, + "logps/chosen": -390.2320861816406, + "logps/ref_chosen": -64.33049011230469, + "logps/ref_rejected": -89.87164306640625, + "logps/rejected": -626.4462890625, + "loss": 1.0998, + "margin_dpo/margin_mean": 210.67300415039062, + "margin_dpo/margin_std": 285.7344055175781, + "step": 350 + }, + { + "KL/chosen_KL_mean": -306.2958984375, + "KL/mean": -430.8505859375, + "KL/rejected_KL_mean": -555.4052734375, + "KL/std": 270.62652587890625, + "epoch": 0.5154185022026432, + "fcm_dpo/beta": 0.0018583099590614438, + "fcm_dpo/delta": -0.06623544543981552, + "fcm_dpo/margin": 249.109375, + "fcm_dpo/q_t": 0.39529091119766235, + "grad_norm": 33.712581634521484, + "learning_rate": 2.819952656376487e-07, + "logits/chosen": -0.5161840915679932, + "logits/rejected": -0.5157532095909119, + "logps/chosen": -366.968017578125, + "logps/ref_chosen": -60.6721305847168, + "logps/ref_rejected": -101.5654296875, + "logps/rejected": -656.970703125, + "loss": 1.0536, + "margin_dpo/margin_mean": 249.109375, + "margin_dpo/margin_std": 312.37847900390625, + "step": 351 + }, + { + "KL/chosen_KL_mean": -353.2392578125, + "KL/mean": -434.8507385253906, + "KL/rejected_KL_mean": -516.4622802734375, + "KL/std": 244.72021484375, + "epoch": 0.5168869309838473, + "fcm_dpo/beta": 0.0018763558473438025, + "fcm_dpo/delta": 0.09677629917860031, + "fcm_dpo/margin": 163.22299194335938, + "fcm_dpo/q_t": 0.429553359746933, + "grad_norm": 34.96100997924805, + "learning_rate": 2.8072207266617854e-07, + "logits/chosen": -0.470248281955719, + "logits/rejected": -0.4367384910583496, + "logps/chosen": -424.18267822265625, + "logps/ref_chosen": -70.9434585571289, + "logps/ref_rejected": -76.6419677734375, + "logps/rejected": -593.104248046875, + "loss": 1.1643, + "margin_dpo/margin_mean": 163.22300720214844, + "margin_dpo/margin_std": 265.39306640625, + "step": 352 + }, + { + "KL/chosen_KL_mean": -322.45953369140625, + "KL/mean": -425.41156005859375, + "KL/rejected_KL_mean": -528.363525390625, + "KL/std": 244.48275756835938, + "epoch": 0.5183553597650514, + "fcm_dpo/beta": 0.0018868569750338793, + "fcm_dpo/delta": 0.01154874637722969, + "fcm_dpo/margin": 205.90402221679688, + "fcm_dpo/q_t": 0.4107271432876587, + "grad_norm": 26.08613395690918, + "learning_rate": 2.794480701395219e-07, + "logits/chosen": -0.5393311977386475, + "logits/rejected": -0.5296196937561035, + "logps/chosen": -380.8548583984375, + "logps/ref_chosen": -58.39533996582031, + "logps/ref_rejected": -80.33553314208984, + "logps/rejected": -608.6990966796875, + "loss": 1.0991, + "margin_dpo/margin_mean": 205.90402221679688, + "margin_dpo/margin_std": 272.3714294433594, + "step": 353 + }, + { + "KL/chosen_KL_mean": -268.17083740234375, + "KL/mean": -381.08599853515625, + "KL/rejected_KL_mean": -494.00115966796875, + "KL/std": 217.59808349609375, + "epoch": 0.5198237885462555, + "fcm_dpo/beta": 0.0018875201931223273, + "fcm_dpo/delta": -0.02746821939945221, + "fcm_dpo/margin": 225.830322265625, + "fcm_dpo/q_t": 0.39921072125434875, + "grad_norm": 40.203250885009766, + "learning_rate": 2.781732916288303e-07, + "logits/chosen": -0.49349963665008545, + "logits/rejected": -0.4861293137073517, + "logps/chosen": -327.97381591796875, + "logps/ref_chosen": -59.80299377441406, + "logps/ref_rejected": -88.75750732421875, + "logps/rejected": -582.7586669921875, + "loss": 1.0442, + "margin_dpo/margin_mean": 225.83033752441406, + "margin_dpo/margin_std": 216.87001037597656, + "step": 354 + }, + { + "KL/chosen_KL_mean": -275.6778564453125, + "KL/mean": -384.2642822265625, + "KL/rejected_KL_mean": -492.8507080078125, + "KL/std": 227.14535522460938, + "epoch": 0.5212922173274597, + "fcm_dpo/beta": 0.0018801202531903982, + "fcm_dpo/delta": -0.00877285934984684, + "fcm_dpo/margin": 217.17283630371094, + "fcm_dpo/q_t": 0.40371406078338623, + "grad_norm": 44.118072509765625, + "learning_rate": 2.7689777072570284e-07, + "logits/chosen": -0.5707640647888184, + "logits/rejected": -0.5644550323486328, + "logps/chosen": -329.806396484375, + "logps/ref_chosen": -54.12849807739258, + "logps/ref_rejected": -82.40606689453125, + "logps/rejected": -575.2567749023438, + "loss": 1.0634, + "margin_dpo/margin_mean": 217.17282104492188, + "margin_dpo/margin_std": 228.13082885742188, + "step": 355 + }, + { + "KL/chosen_KL_mean": -340.9484558105469, + "KL/mean": -402.58343505859375, + "KL/rejected_KL_mean": -464.2183837890625, + "KL/std": 240.2225341796875, + "epoch": 0.5227606461086637, + "fcm_dpo/beta": 0.0018893997184932232, + "fcm_dpo/delta": 0.03272160887718201, + "fcm_dpo/margin": 123.2698974609375, + "fcm_dpo/q_t": 0.44711410999298096, + "grad_norm": 34.34591293334961, + "learning_rate": 2.7562154104130176e-07, + "logits/chosen": -0.47087016701698303, + "logits/rejected": -0.4466116726398468, + "logps/chosen": -405.62225341796875, + "logps/ref_chosen": -64.6738052368164, + "logps/ref_rejected": -75.89926147460938, + "logps/rejected": -540.11767578125, + "loss": 1.2434, + "margin_dpo/margin_mean": 123.2698974609375, + "margin_dpo/margin_std": 296.36102294921875, + "step": 356 + }, + { + "KL/chosen_KL_mean": -309.8677978515625, + "KL/mean": -412.3357849121094, + "KL/rejected_KL_mean": -514.8037109375, + "KL/std": 240.03021240234375, + "epoch": 0.5242290748898678, + "fcm_dpo/beta": 0.0018900984432548285, + "fcm_dpo/delta": 0.012749601155519485, + "fcm_dpo/margin": 204.93594360351562, + "fcm_dpo/q_t": 0.40922728180885315, + "grad_norm": 30.455440521240234, + "learning_rate": 2.7434463620546594e-07, + "logits/chosen": -0.5195204019546509, + "logits/rejected": -0.5092127323150635, + "logps/chosen": -362.5935974121094, + "logps/ref_chosen": -52.725799560546875, + "logps/ref_rejected": -86.84115600585938, + "logps/rejected": -601.6448974609375, + "loss": 1.0905, + "margin_dpo/margin_mean": 204.93594360351562, + "margin_dpo/margin_std": 249.49462890625, + "step": 357 + }, + { + "KL/chosen_KL_mean": -282.8341369628906, + "KL/mean": -379.4464111328125, + "KL/rejected_KL_mean": -476.05865478515625, + "KL/std": 233.26222229003906, + "epoch": 0.5256975036710719, + "fcm_dpo/beta": 0.0019095418974757195, + "fcm_dpo/delta": 0.032218970358371735, + "fcm_dpo/margin": 193.22451782226562, + "fcm_dpo/q_t": 0.4149158000946045, + "grad_norm": 24.4883975982666, + "learning_rate": 2.730670898658255e-07, + "logits/chosen": -0.5249518156051636, + "logits/rejected": -0.5085197687149048, + "logps/chosen": -346.0395812988281, + "logps/ref_chosen": -63.20543670654297, + "logps/ref_rejected": -88.373291015625, + "logps/rejected": -564.4319458007812, + "loss": 1.1049, + "margin_dpo/margin_mean": 193.22451782226562, + "margin_dpo/margin_std": 251.6090087890625, + "step": 358 + }, + { + "KL/chosen_KL_mean": -319.8917541503906, + "KL/mean": -433.31768798828125, + "KL/rejected_KL_mean": -546.74365234375, + "KL/std": 235.4507293701172, + "epoch": 0.527165932452276, + "fcm_dpo/beta": 0.0019002794288098812, + "fcm_dpo/delta": -0.03259321302175522, + "fcm_dpo/margin": 226.85186767578125, + "fcm_dpo/q_t": 0.4016547203063965, + "grad_norm": 27.346670150756836, + "learning_rate": 2.717889356869146e-07, + "logits/chosen": -0.4491726756095886, + "logits/rejected": -0.43380558490753174, + "logps/chosen": -376.261962890625, + "logps/ref_chosen": -56.370216369628906, + "logps/ref_rejected": -82.17375183105469, + "logps/rejected": -628.9173583984375, + "loss": 1.0712, + "margin_dpo/margin_mean": 226.85186767578125, + "margin_dpo/margin_std": 285.25946044921875, + "step": 359 + }, + { + "KL/chosen_KL_mean": -322.7139587402344, + "KL/mean": -408.2466735839844, + "KL/rejected_KL_mean": -493.77935791015625, + "KL/std": 200.92642211914062, + "epoch": 0.5286343612334802, + "fcm_dpo/beta": 0.0019250914920121431, + "fcm_dpo/delta": 0.07284688949584961, + "fcm_dpo/margin": 171.06541442871094, + "fcm_dpo/q_t": 0.4223863184452057, + "grad_norm": 37.464805603027344, + "learning_rate": 2.7051020734928443e-07, + "logits/chosen": -0.43620049953460693, + "logits/rejected": -0.4205591678619385, + "logps/chosen": -374.1743469238281, + "logps/ref_chosen": -51.460384368896484, + "logps/ref_rejected": -69.83892059326172, + "logps/rejected": -563.6182861328125, + "loss": 1.1224, + "margin_dpo/margin_mean": 171.06541442871094, + "margin_dpo/margin_std": 206.2929229736328, + "step": 360 + }, + { + "KL/chosen_KL_mean": -337.85443115234375, + "KL/mean": -423.9326477050781, + "KL/rejected_KL_mean": -510.0108337402344, + "KL/std": 233.32354736328125, + "epoch": 0.5301027900146843, + "fcm_dpo/beta": 0.0019522447837516665, + "fcm_dpo/delta": 0.06582384556531906, + "fcm_dpo/margin": 172.15640258789062, + "fcm_dpo/q_t": 0.4221458435058594, + "grad_norm": 30.787109375, + "learning_rate": 2.6923093854861593e-07, + "logits/chosen": -0.474801242351532, + "logits/rejected": -0.47210389375686646, + "logps/chosen": -391.72393798828125, + "logps/ref_chosen": -53.86951446533203, + "logps/ref_rejected": -90.7692642211914, + "logps/rejected": -600.7801513671875, + "loss": 1.1377, + "margin_dpo/margin_mean": 172.15640258789062, + "margin_dpo/margin_std": 255.71853637695312, + "step": 361 + }, + { + "KL/chosen_KL_mean": -293.02099609375, + "KL/mean": -428.1585693359375, + "KL/rejected_KL_mean": -563.296142578125, + "KL/std": 246.54940795898438, + "epoch": 0.5315712187958884, + "fcm_dpo/beta": 0.0019115547183901072, + "fcm_dpo/delta": -0.12366791069507599, + "fcm_dpo/margin": 270.275146484375, + "fcm_dpo/q_t": 0.3806150555610657, + "grad_norm": 25.072677612304688, + "learning_rate": 2.679511629948319e-07, + "logits/chosen": -0.461614191532135, + "logits/rejected": -0.471387654542923, + "logps/chosen": -351.6600341796875, + "logps/ref_chosen": -58.639060974121094, + "logps/ref_rejected": -105.58195495605469, + "logps/rejected": -668.8780517578125, + "loss": 0.9921, + "margin_dpo/margin_mean": 270.275146484375, + "margin_dpo/margin_std": 262.97430419921875, + "step": 362 + }, + { + "KL/chosen_KL_mean": -270.0954284667969, + "KL/mean": -409.5333251953125, + "KL/rejected_KL_mean": -548.97119140625, + "KL/std": 249.90145874023438, + "epoch": 0.5330396475770925, + "fcm_dpo/beta": 0.0018741564126685262, + "fcm_dpo/delta": -0.12923146784305573, + "fcm_dpo/margin": 278.8757629394531, + "fcm_dpo/q_t": 0.37928664684295654, + "grad_norm": 25.539962768554688, + "learning_rate": 2.6667091441120816e-07, + "logits/chosen": -0.45062029361724854, + "logits/rejected": -0.445356547832489, + "logps/chosen": -314.65380859375, + "logps/ref_chosen": -44.558380126953125, + "logps/ref_rejected": -74.69496154785156, + "logps/rejected": -623.6661376953125, + "loss": 0.9901, + "margin_dpo/margin_mean": 278.87579345703125, + "margin_dpo/margin_std": 272.97113037109375, + "step": 363 + }, + { + "KL/chosen_KL_mean": -301.60052490234375, + "KL/mean": -405.09051513671875, + "KL/rejected_KL_mean": -508.58050537109375, + "KL/std": 244.21636962890625, + "epoch": 0.5345080763582967, + "fcm_dpo/beta": 0.0018688710406422615, + "fcm_dpo/delta": 0.013297256082296371, + "fcm_dpo/margin": 206.97998046875, + "fcm_dpo/q_t": 0.4103432595729828, + "grad_norm": 24.92216682434082, + "learning_rate": 2.6539022653348575e-07, + "logits/chosen": -0.49542540311813354, + "logits/rejected": -0.5060637593269348, + "logps/chosen": -350.4951171875, + "logps/ref_chosen": -48.894622802734375, + "logps/ref_rejected": -91.395751953125, + "logps/rejected": -599.9762573242188, + "loss": 1.1024, + "margin_dpo/margin_mean": 206.97999572753906, + "margin_dpo/margin_std": 280.64617919921875, + "step": 364 + }, + { + "KL/chosen_KL_mean": -285.0455322265625, + "KL/mean": -395.47760009765625, + "KL/rejected_KL_mean": -505.90972900390625, + "KL/std": 253.05226135253906, + "epoch": 0.5359765051395007, + "fcm_dpo/beta": 0.0018601326737552881, + "fcm_dpo/delta": -0.011300592683255672, + "fcm_dpo/margin": 220.8641815185547, + "fcm_dpo/q_t": 0.4056779444217682, + "grad_norm": 25.717348098754883, + "learning_rate": 2.641091331089811e-07, + "logits/chosen": -0.44014662504196167, + "logits/rejected": -0.45165252685546875, + "logps/chosen": -336.53826904296875, + "logps/ref_chosen": -51.49274444580078, + "logps/ref_rejected": -92.70166778564453, + "logps/rejected": -598.6113891601562, + "loss": 1.0716, + "margin_dpo/margin_mean": 220.8641815185547, + "margin_dpo/margin_std": 261.02288818359375, + "step": 365 + }, + { + "KL/chosen_KL_mean": -266.95123291015625, + "KL/mean": -379.23828125, + "KL/rejected_KL_mean": -491.52532958984375, + "KL/std": 242.17172241210938, + "epoch": 0.5374449339207048, + "fcm_dpo/beta": 0.0018460990395396948, + "fcm_dpo/delta": -0.015634853392839432, + "fcm_dpo/margin": 224.57406616210938, + "fcm_dpo/q_t": 0.40484321117401123, + "grad_norm": 22.693359375, + "learning_rate": 2.6282766789569736e-07, + "logits/chosen": -0.4734732210636139, + "logits/rejected": -0.4902943968772888, + "logps/chosen": -311.67181396484375, + "logps/ref_chosen": -44.7205696105957, + "logps/ref_rejected": -83.31040954589844, + "logps/rejected": -574.835693359375, + "loss": 1.0809, + "margin_dpo/margin_mean": 224.57406616210938, + "margin_dpo/margin_std": 287.28973388671875, + "step": 366 + }, + { + "KL/chosen_KL_mean": -279.1624450683594, + "KL/mean": -372.0902404785156, + "KL/rejected_KL_mean": -465.01806640625, + "KL/std": 217.56224060058594, + "epoch": 0.5389133627019089, + "fcm_dpo/beta": 0.0018712931778281927, + "fcm_dpo/delta": 0.05387837439775467, + "fcm_dpo/margin": 185.85562133789062, + "fcm_dpo/q_t": 0.4180119037628174, + "grad_norm": 22.13631820678711, + "learning_rate": 2.615458646614349e-07, + "logits/chosen": -0.47421082854270935, + "logits/rejected": -0.45852982997894287, + "logps/chosen": -337.56787109375, + "logps/ref_chosen": -58.405418395996094, + "logps/ref_rejected": -76.75132751464844, + "logps/rejected": -541.7694091796875, + "loss": 1.1149, + "margin_dpo/margin_mean": 185.85562133789062, + "margin_dpo/margin_std": 234.53326416015625, + "step": 367 + }, + { + "KL/chosen_KL_mean": -260.6910400390625, + "KL/mean": -406.54931640625, + "KL/rejected_KL_mean": -552.4075317382812, + "KL/std": 245.8842010498047, + "epoch": 0.540381791483113, + "fcm_dpo/beta": 0.0018334980122745037, + "fcm_dpo/delta": -0.14245912432670593, + "fcm_dpo/margin": 291.716552734375, + "fcm_dpo/q_t": 0.37282127141952515, + "grad_norm": 36.742767333984375, + "learning_rate": 2.6026375718290083e-07, + "logits/chosen": -0.48603498935699463, + "logits/rejected": -0.4929526448249817, + "logps/chosen": -305.1435546875, + "logps/ref_chosen": -44.452518463134766, + "logps/ref_rejected": -98.55526733398438, + "logps/rejected": -650.9627685546875, + "loss": 0.9603, + "margin_dpo/margin_mean": 291.7165222167969, + "margin_dpo/margin_std": 222.715087890625, + "step": 368 + }, + { + "KL/chosen_KL_mean": -353.1409606933594, + "KL/mean": -428.58447265625, + "KL/rejected_KL_mean": -504.0279235839844, + "KL/std": 253.20993041992188, + "epoch": 0.5418502202643172, + "fcm_dpo/beta": 0.0018251673318445683, + "fcm_dpo/delta": 0.02955966256558895, + "fcm_dpo/margin": 150.8870086669922, + "fcm_dpo/q_t": 0.43544578552246094, + "grad_norm": 26.54815673828125, + "learning_rate": 2.589813792448196e-07, + "logits/chosen": -0.49702537059783936, + "logits/rejected": -0.47865352034568787, + "logps/chosen": -424.5224609375, + "logps/ref_chosen": -71.38150024414062, + "logps/ref_rejected": -91.29582214355469, + "logps/rejected": -595.32373046875, + "loss": 1.1922, + "margin_dpo/margin_mean": 150.88702392578125, + "margin_dpo/margin_std": 279.23529052734375, + "step": 369 + }, + { + "KL/chosen_KL_mean": -370.0912170410156, + "KL/mean": -444.4624938964844, + "KL/rejected_KL_mean": -518.833740234375, + "KL/std": 263.0101318359375, + "epoch": 0.5433186490455213, + "fcm_dpo/beta": 0.0018681371584534645, + "fcm_dpo/delta": 0.1253683865070343, + "fcm_dpo/margin": 148.7425537109375, + "fcm_dpo/q_t": 0.43673175573349, + "grad_norm": 31.06038475036621, + "learning_rate": 2.5769876463904263e-07, + "logits/chosen": -0.5078903436660767, + "logits/rejected": -0.5006515979766846, + "logps/chosen": -441.69873046875, + "logps/ref_chosen": -71.60749816894531, + "logps/ref_rejected": -97.25978088378906, + "logps/rejected": -616.093505859375, + "loss": 1.1989, + "margin_dpo/margin_mean": 148.74256896972656, + "margin_dpo/margin_std": 292.08331298828125, + "step": 370 + }, + { + "KL/chosen_KL_mean": -357.910888671875, + "KL/mean": -464.66607666015625, + "KL/rejected_KL_mean": -571.4212646484375, + "KL/std": 263.6308288574219, + "epoch": 0.5447870778267254, + "fcm_dpo/beta": 0.0018815153744071722, + "fcm_dpo/delta": -0.0019350722432136536, + "fcm_dpo/margin": 213.51040649414062, + "fcm_dpo/q_t": 0.40850624442100525, + "grad_norm": 33.49449920654297, + "learning_rate": 2.5641594716365744e-07, + "logits/chosen": -0.557892918586731, + "logits/rejected": -0.5437754392623901, + "logps/chosen": -427.32537841796875, + "logps/ref_chosen": -69.41448974609375, + "logps/ref_rejected": -99.17217254638672, + "logps/rejected": -670.593505859375, + "loss": 1.0989, + "margin_dpo/margin_mean": 213.51040649414062, + "margin_dpo/margin_std": 299.0671081542969, + "step": 371 + }, + { + "KL/chosen_KL_mean": -353.14971923828125, + "KL/mean": -480.6755676269531, + "KL/rejected_KL_mean": -608.201416015625, + "KL/std": 304.527099609375, + "epoch": 0.5462555066079295, + "fcm_dpo/beta": 0.0018529519438743591, + "fcm_dpo/delta": -0.07631818950176239, + "fcm_dpo/margin": 255.05174255371094, + "fcm_dpo/q_t": 0.395152747631073, + "grad_norm": 28.089242935180664, + "learning_rate": 2.551329606220976e-07, + "logits/chosen": -0.5017907619476318, + "logits/rejected": -0.4822356402873993, + "logps/chosen": -414.96771240234375, + "logps/ref_chosen": -61.8179931640625, + "logps/ref_rejected": -78.53948974609375, + "logps/rejected": -686.740966796875, + "loss": 1.0575, + "margin_dpo/margin_mean": 255.0517578125, + "margin_dpo/margin_std": 344.2037353515625, + "step": 372 + }, + { + "KL/chosen_KL_mean": -380.80279541015625, + "KL/mean": -503.22076416015625, + "KL/rejected_KL_mean": -625.6387939453125, + "KL/std": 292.56231689453125, + "epoch": 0.5477239353891337, + "fcm_dpo/beta": 0.001842833822593093, + "fcm_dpo/delta": -0.0538918599486351, + "fcm_dpo/margin": 244.8360137939453, + "fcm_dpo/q_t": 0.39565837383270264, + "grad_norm": 25.535263061523438, + "learning_rate": 2.538498388222517e-07, + "logits/chosen": -0.4881356954574585, + "logits/rejected": -0.46633967757225037, + "logps/chosen": -445.0198974609375, + "logps/ref_chosen": -64.21713256835938, + "logps/ref_rejected": -85.95960998535156, + "logps/rejected": -711.598388671875, + "loss": 1.0544, + "margin_dpo/margin_mean": 244.83599853515625, + "margin_dpo/margin_std": 285.15032958984375, + "step": 373 + }, + { + "KL/chosen_KL_mean": -344.94671630859375, + "KL/mean": -455.2513427734375, + "KL/rejected_KL_mean": -565.5560302734375, + "KL/std": 316.259765625, + "epoch": 0.5491923641703378, + "fcm_dpo/beta": 0.0018177898600697517, + "fcm_dpo/delta": -0.0016709566116333008, + "fcm_dpo/margin": 220.6092529296875, + "fcm_dpo/q_t": 0.4120628535747528, + "grad_norm": 27.090904235839844, + "learning_rate": 2.525666155755725e-07, + "logits/chosen": -0.5761805772781372, + "logits/rejected": -0.5577331781387329, + "logps/chosen": -415.596923828125, + "logps/ref_chosen": -70.65018463134766, + "logps/ref_rejected": -93.64016723632812, + "logps/rejected": -659.1961669921875, + "loss": 1.1193, + "margin_dpo/margin_mean": 220.60926818847656, + "margin_dpo/margin_std": 354.3055419921875, + "step": 374 + }, + { + "KL/chosen_KL_mean": -349.4085693359375, + "KL/mean": -457.72344970703125, + "KL/rejected_KL_mean": -566.038330078125, + "KL/std": 255.31455993652344, + "epoch": 0.5506607929515418, + "fcm_dpo/beta": 0.00181809242349118, + "fcm_dpo/delta": 0.005695123225450516, + "fcm_dpo/margin": 216.62973022460938, + "fcm_dpo/q_t": 0.4092579185962677, + "grad_norm": 36.25385665893555, + "learning_rate": 2.512833246961859e-07, + "logits/chosen": -0.5407253503799438, + "logits/rejected": -0.5411194562911987, + "logps/chosen": -409.48876953125, + "logps/ref_chosen": -60.080223083496094, + "logps/ref_rejected": -88.93830871582031, + "logps/rejected": -654.9766235351562, + "loss": 1.1045, + "margin_dpo/margin_mean": 216.62973022460938, + "margin_dpo/margin_std": 301.60943603515625, + "step": 375 + }, + { + "KL/chosen_KL_mean": -342.84234619140625, + "KL/mean": -473.917724609375, + "KL/rejected_KL_mean": -604.9931030273438, + "KL/std": 271.80767822265625, + "epoch": 0.5521292217327459, + "fcm_dpo/beta": 0.001806009327992797, + "fcm_dpo/delta": -0.0772535428404808, + "fcm_dpo/margin": 262.1507568359375, + "fcm_dpo/q_t": 0.3911965489387512, + "grad_norm": 23.30925750732422, + "learning_rate": 2.5e-07, + "logits/chosen": -0.5112703442573547, + "logits/rejected": -0.5029022693634033, + "logps/chosen": -405.50262451171875, + "logps/ref_chosen": -62.660308837890625, + "logps/ref_rejected": -105.52660369873047, + "logps/rejected": -710.5196533203125, + "loss": 1.0426, + "margin_dpo/margin_mean": 262.1507568359375, + "margin_dpo/margin_std": 314.1611328125, + "step": 376 + }, + { + "KL/chosen_KL_mean": -344.38580322265625, + "KL/mean": -467.46221923828125, + "KL/rejected_KL_mean": -590.5386352539062, + "KL/std": 280.3377685546875, + "epoch": 0.55359765051395, + "fcm_dpo/beta": 0.0017937154043465853, + "fcm_dpo/delta": -0.043446458876132965, + "fcm_dpo/margin": 246.15281677246094, + "fcm_dpo/q_t": 0.3983774781227112, + "grad_norm": 29.571670532226562, + "learning_rate": 2.487166753038141e-07, + "logits/chosen": -0.4719467759132385, + "logits/rejected": -0.47322678565979004, + "logps/chosen": -398.8645324707031, + "logps/ref_chosen": -54.478736877441406, + "logps/ref_rejected": -98.70335388183594, + "logps/rejected": -689.241943359375, + "loss": 1.0577, + "margin_dpo/margin_mean": 246.15280151367188, + "margin_dpo/margin_std": 293.776123046875, + "step": 377 + }, + { + "KL/chosen_KL_mean": -327.5260925292969, + "KL/mean": -458.94586181640625, + "KL/rejected_KL_mean": -590.3656005859375, + "KL/std": 256.42523193359375, + "epoch": 0.5550660792951542, + "fcm_dpo/beta": 0.0017670897068455815, + "fcm_dpo/delta": -0.0676431879401207, + "fcm_dpo/margin": 262.839599609375, + "fcm_dpo/q_t": 0.3910645842552185, + "grad_norm": 30.750337600708008, + "learning_rate": 2.4743338442442754e-07, + "logits/chosen": -0.48483866453170776, + "logits/rejected": -0.505121648311615, + "logps/chosen": -372.546630859375, + "logps/ref_chosen": -45.02053451538086, + "logps/ref_rejected": -88.0469741821289, + "logps/rejected": -678.41259765625, + "loss": 1.0324, + "margin_dpo/margin_mean": 262.8395690917969, + "margin_dpo/margin_std": 280.932861328125, + "step": 378 + }, + { + "KL/chosen_KL_mean": -355.95745849609375, + "KL/mean": -482.84857177734375, + "KL/rejected_KL_mean": -609.73974609375, + "KL/std": 260.3935546875, + "epoch": 0.5565345080763583, + "fcm_dpo/beta": 0.0017432118766009808, + "fcm_dpo/delta": -0.04486480727791786, + "fcm_dpo/margin": 253.78221130371094, + "fcm_dpo/q_t": 0.39824381470680237, + "grad_norm": 28.86778450012207, + "learning_rate": 2.461501611777483e-07, + "logits/chosen": -0.4996095299720764, + "logits/rejected": -0.5224326848983765, + "logps/chosen": -409.1395568847656, + "logps/ref_chosen": -53.182098388671875, + "logps/ref_rejected": -114.3001708984375, + "logps/rejected": -724.0399169921875, + "loss": 1.0587, + "margin_dpo/margin_mean": 253.78219604492188, + "margin_dpo/margin_std": 305.1265563964844, + "step": 379 + }, + { + "KL/chosen_KL_mean": -341.1017761230469, + "KL/mean": -481.23809814453125, + "KL/rejected_KL_mean": -621.3743896484375, + "KL/std": 289.7486267089844, + "epoch": 0.5580029368575624, + "fcm_dpo/beta": 0.001726464950479567, + "fcm_dpo/delta": -0.08805520087480545, + "fcm_dpo/margin": 280.27264404296875, + "fcm_dpo/q_t": 0.38729774951934814, + "grad_norm": 27.250988006591797, + "learning_rate": 2.4486703937790243e-07, + "logits/chosen": -0.47446852922439575, + "logits/rejected": -0.5016424655914307, + "logps/chosen": -392.4548034667969, + "logps/ref_chosen": -51.3530387878418, + "logps/ref_rejected": -104.19169616699219, + "logps/rejected": -725.5660400390625, + "loss": 1.027, + "margin_dpo/margin_mean": 280.27264404296875, + "margin_dpo/margin_std": 311.29791259765625, + "step": 380 + }, + { + "KL/chosen_KL_mean": -358.0355529785156, + "KL/mean": -458.411376953125, + "KL/rejected_KL_mean": -558.7872314453125, + "KL/std": 245.9998016357422, + "epoch": 0.5594713656387665, + "fcm_dpo/beta": 0.001726742135360837, + "fcm_dpo/delta": 0.05530242994427681, + "fcm_dpo/margin": 200.75161743164062, + "fcm_dpo/q_t": 0.4205264449119568, + "grad_norm": 25.71771812438965, + "learning_rate": 2.435840528363426e-07, + "logits/chosen": -0.45564746856689453, + "logits/rejected": -0.43560299277305603, + "logps/chosen": -415.838623046875, + "logps/ref_chosen": -57.80306625366211, + "logps/ref_rejected": -79.21940612792969, + "logps/rejected": -638.006591796875, + "loss": 1.1465, + "margin_dpo/margin_mean": 200.75160217285156, + "margin_dpo/margin_std": 330.10223388671875, + "step": 381 + }, + { + "KL/chosen_KL_mean": -339.0993347167969, + "KL/mean": -461.7208557128906, + "KL/rejected_KL_mean": -584.34228515625, + "KL/std": 235.19894409179688, + "epoch": 0.5609397944199707, + "fcm_dpo/beta": 0.0017278792802244425, + "fcm_dpo/delta": -0.024810172617435455, + "fcm_dpo/margin": 245.24298095703125, + "fcm_dpo/q_t": 0.4007778763771057, + "grad_norm": 29.356277465820312, + "learning_rate": 2.4230123536095745e-07, + "logits/chosen": -0.4832066297531128, + "logits/rejected": -0.4879419803619385, + "logps/chosen": -405.11962890625, + "logps/ref_chosen": -66.02030181884766, + "logps/ref_rejected": -110.71016693115234, + "logps/rejected": -695.052490234375, + "loss": 1.0551, + "margin_dpo/margin_mean": 245.24298095703125, + "margin_dpo/margin_std": 262.1793518066406, + "step": 382 + }, + { + "KL/chosen_KL_mean": -346.09075927734375, + "KL/mean": -468.5663757324219, + "KL/rejected_KL_mean": -591.0419921875, + "KL/std": 261.1944274902344, + "epoch": 0.5624082232011748, + "fcm_dpo/beta": 0.0017200370784848928, + "fcm_dpo/delta": -0.022270262241363525, + "fcm_dpo/margin": 244.95123291015625, + "fcm_dpo/q_t": 0.4029914140701294, + "grad_norm": 25.48250961303711, + "learning_rate": 2.4101862075518037e-07, + "logits/chosen": -0.46931326389312744, + "logits/rejected": -0.47857019305229187, + "logps/chosen": -396.48223876953125, + "logps/ref_chosen": -50.39148712158203, + "logps/ref_rejected": -93.71589660644531, + "logps/rejected": -684.7578735351562, + "loss": 1.0874, + "margin_dpo/margin_mean": 244.95123291015625, + "margin_dpo/margin_std": 338.13385009765625, + "step": 383 + }, + { + "KL/chosen_KL_mean": -348.53369140625, + "KL/mean": -444.30133056640625, + "KL/rejected_KL_mean": -540.0689697265625, + "KL/std": 233.04791259765625, + "epoch": 0.5638766519823789, + "fcm_dpo/beta": 0.001742619788274169, + "fcm_dpo/delta": 0.06796430051326752, + "fcm_dpo/margin": 191.53521728515625, + "fcm_dpo/q_t": 0.4206535220146179, + "grad_norm": 25.28335952758789, + "learning_rate": 2.397362428170992e-07, + "logits/chosen": -0.5027947425842285, + "logits/rejected": -0.4957225024700165, + "logps/chosen": -400.5798034667969, + "logps/ref_chosen": -52.046104431152344, + "logps/ref_rejected": -85.76089477539062, + "logps/rejected": -625.829833984375, + "loss": 1.1164, + "margin_dpo/margin_mean": 191.53521728515625, + "margin_dpo/margin_std": 219.35858154296875, + "step": 384 + }, + { + "KL/chosen_KL_mean": -316.4886474609375, + "KL/mean": -433.07574462890625, + "KL/rejected_KL_mean": -549.6627807617188, + "KL/std": 208.34341430664062, + "epoch": 0.5653450807635829, + "fcm_dpo/beta": 0.0017409389838576317, + "fcm_dpo/delta": -0.006193262059241533, + "fcm_dpo/margin": 233.17413330078125, + "fcm_dpo/q_t": 0.4035521149635315, + "grad_norm": 30.516326904296875, + "learning_rate": 2.3845413533856514e-07, + "logits/chosen": -0.5098952054977417, + "logits/rejected": -0.4792426824569702, + "logps/chosen": -382.04083251953125, + "logps/ref_chosen": -65.55215454101562, + "logps/ref_rejected": -77.82792663574219, + "logps/rejected": -627.49072265625, + "loss": 1.0601, + "margin_dpo/margin_mean": 233.17413330078125, + "margin_dpo/margin_std": 232.32888793945312, + "step": 385 + }, + { + "KL/chosen_KL_mean": -332.295654296875, + "KL/mean": -461.0802307128906, + "KL/rejected_KL_mean": -589.8648071289062, + "KL/std": 266.24664306640625, + "epoch": 0.566813509544787, + "fcm_dpo/beta": 0.0017288768431171775, + "fcm_dpo/delta": -0.047393690794706345, + "fcm_dpo/margin": 257.5691833496094, + "fcm_dpo/q_t": 0.3971378803253174, + "grad_norm": 22.08116912841797, + "learning_rate": 2.3717233210430254e-07, + "logits/chosen": -0.5011097192764282, + "logits/rejected": -0.49594956636428833, + "logps/chosen": -390.5174865722656, + "logps/ref_chosen": -58.22185516357422, + "logps/ref_rejected": -92.32742309570312, + "logps/rejected": -682.1922607421875, + "loss": 1.0541, + "margin_dpo/margin_mean": 257.5691833496094, + "margin_dpo/margin_std": 305.404052734375, + "step": 386 + }, + { + "KL/chosen_KL_mean": -353.61395263671875, + "KL/mean": -457.56298828125, + "KL/rejected_KL_mean": -561.5120849609375, + "KL/std": 239.767333984375, + "epoch": 0.5682819383259912, + "fcm_dpo/beta": 0.0017271433025598526, + "fcm_dpo/delta": 0.042273543775081635, + "fcm_dpo/margin": 207.89810180664062, + "fcm_dpo/q_t": 0.41534334421157837, + "grad_norm": 29.83814239501953, + "learning_rate": 2.3589086689101889e-07, + "logits/chosen": -0.5749870538711548, + "logits/rejected": -0.5577224493026733, + "logps/chosen": -420.0334167480469, + "logps/ref_chosen": -66.41944885253906, + "logps/ref_rejected": -92.16915893554688, + "logps/rejected": -653.6812744140625, + "loss": 1.1016, + "margin_dpo/margin_mean": 207.89810180664062, + "margin_dpo/margin_std": 238.62474060058594, + "step": 387 + }, + { + "KL/chosen_KL_mean": -342.80242919921875, + "KL/mean": -484.3439636230469, + "KL/rejected_KL_mean": -625.8855590820312, + "KL/std": 283.77276611328125, + "epoch": 0.5697503671071953, + "fcm_dpo/beta": 0.0017118379473686218, + "fcm_dpo/delta": -0.08915528655052185, + "fcm_dpo/margin": 283.0830993652344, + "fcm_dpo/q_t": 0.3895290791988373, + "grad_norm": 24.234586715698242, + "learning_rate": 2.3460977346651428e-07, + "logits/chosen": -0.4718579649925232, + "logits/rejected": -0.48248615860939026, + "logps/chosen": -392.931884765625, + "logps/ref_chosen": -50.129459381103516, + "logps/ref_rejected": -104.43305969238281, + "logps/rejected": -730.318603515625, + "loss": 1.0286, + "margin_dpo/margin_mean": 283.08306884765625, + "margin_dpo/margin_std": 323.8463134765625, + "step": 388 + }, + { + "KL/chosen_KL_mean": -386.37750244140625, + "KL/mean": -505.4602966308594, + "KL/rejected_KL_mean": -624.5430908203125, + "KL/std": 282.40252685546875, + "epoch": 0.5712187958883994, + "fcm_dpo/beta": 0.0017030881717801094, + "fcm_dpo/delta": -0.005919036455452442, + "fcm_dpo/margin": 238.1655731201172, + "fcm_dpo/q_t": 0.40635746717453003, + "grad_norm": 30.227901458740234, + "learning_rate": 2.3332908558879177e-07, + "logits/chosen": -0.5446810722351074, + "logits/rejected": -0.5356224775314331, + "logps/chosen": -444.28411865234375, + "logps/ref_chosen": -57.906593322753906, + "logps/ref_rejected": -77.91454315185547, + "logps/rejected": -702.4576416015625, + "loss": 1.0864, + "margin_dpo/margin_mean": 238.1655731201172, + "margin_dpo/margin_std": 309.348876953125, + "step": 389 + }, + { + "KL/chosen_KL_mean": -377.35198974609375, + "KL/mean": -499.4270935058594, + "KL/rejected_KL_mean": -621.502197265625, + "KL/std": 282.41424560546875, + "epoch": 0.5726872246696035, + "fcm_dpo/beta": 0.0016950219869613647, + "fcm_dpo/delta": -0.014803212136030197, + "fcm_dpo/margin": 244.15020751953125, + "fcm_dpo/q_t": 0.40823304653167725, + "grad_norm": 25.220243453979492, + "learning_rate": 2.320488370051681e-07, + "logits/chosen": -0.4850386083126068, + "logits/rejected": -0.4768223166465759, + "logps/chosen": -426.5779113769531, + "logps/ref_chosen": -49.22591781616211, + "logps/ref_rejected": -85.5281982421875, + "logps/rejected": -707.0303955078125, + "loss": 1.0979, + "margin_dpo/margin_mean": 244.15020751953125, + "margin_dpo/margin_std": 357.4918212890625, + "step": 390 + }, + { + "KL/chosen_KL_mean": -382.5862731933594, + "KL/mean": -454.3216552734375, + "KL/rejected_KL_mean": -526.0570068359375, + "KL/std": 272.00946044921875, + "epoch": 0.5741556534508077, + "fcm_dpo/beta": 0.0017408509738743305, + "fcm_dpo/delta": 0.15400069952011108, + "fcm_dpo/margin": 143.47076416015625, + "fcm_dpo/q_t": 0.4426559805870056, + "grad_norm": 46.24735641479492, + "learning_rate": 2.3076906145138405e-07, + "logits/chosen": -0.5240955352783203, + "logits/rejected": -0.5136964321136475, + "logps/chosen": -446.9159240722656, + "logps/ref_chosen": -64.32965087890625, + "logps/ref_rejected": -86.73820495605469, + "logps/rejected": -612.7952270507812, + "loss": 1.2128, + "margin_dpo/margin_mean": 143.4707794189453, + "margin_dpo/margin_std": 285.8855285644531, + "step": 391 + }, + { + "KL/chosen_KL_mean": -327.81842041015625, + "KL/mean": -468.28118896484375, + "KL/rejected_KL_mean": -608.7439575195312, + "KL/std": 280.24822998046875, + "epoch": 0.5756240822320118, + "fcm_dpo/beta": 0.0017360819038003683, + "fcm_dpo/delta": -0.09214547276496887, + "fcm_dpo/margin": 280.925537109375, + "fcm_dpo/q_t": 0.3868769109249115, + "grad_norm": 26.776010513305664, + "learning_rate": 2.294897926507156e-07, + "logits/chosen": -0.4836753010749817, + "logits/rejected": -0.47832804918289185, + "logps/chosen": -381.3224182128906, + "logps/ref_chosen": -53.50397872924805, + "logps/ref_rejected": -102.34584045410156, + "logps/rejected": -711.0897827148438, + "loss": 1.0105, + "margin_dpo/margin_mean": 280.9255065917969, + "margin_dpo/margin_std": 274.7219543457031, + "step": 392 + }, + { + "KL/chosen_KL_mean": -320.5147399902344, + "KL/mean": -430.2720031738281, + "KL/rejected_KL_mean": -540.029296875, + "KL/std": 273.57257080078125, + "epoch": 0.5770925110132159, + "fcm_dpo/beta": 0.001724720699712634, + "fcm_dpo/delta": 0.022230474278330803, + "fcm_dpo/margin": 219.5145263671875, + "fcm_dpo/q_t": 0.4159342050552368, + "grad_norm": 22.34610939025879, + "learning_rate": 2.2821106431308543e-07, + "logits/chosen": -0.4929124414920807, + "logits/rejected": -0.4912495017051697, + "logps/chosen": -366.9886474609375, + "logps/ref_chosen": -46.473915100097656, + "logps/ref_rejected": -71.96885681152344, + "logps/rejected": -611.9981689453125, + "loss": 1.1217, + "margin_dpo/margin_mean": 219.51451110839844, + "margin_dpo/margin_std": 345.4471740722656, + "step": 393 + }, + { + "KL/chosen_KL_mean": -359.45440673828125, + "KL/mean": -476.61602783203125, + "KL/rejected_KL_mean": -593.777587890625, + "KL/std": 293.0263671875, + "epoch": 0.57856093979442, + "fcm_dpo/beta": 0.0017247963696718216, + "fcm_dpo/delta": -0.004414796829223633, + "fcm_dpo/margin": 234.32321166992188, + "fcm_dpo/q_t": 0.40792059898376465, + "grad_norm": 22.500810623168945, + "learning_rate": 2.2693291013417452e-07, + "logits/chosen": -0.477075457572937, + "logits/rejected": -0.47646427154541016, + "logps/chosen": -412.365966796875, + "logps/ref_chosen": -52.91154861450195, + "logps/ref_rejected": -90.8226318359375, + "logps/rejected": -684.6002197265625, + "loss": 1.0862, + "margin_dpo/margin_mean": 234.3231964111328, + "margin_dpo/margin_std": 305.0433349609375, + "step": 394 + }, + { + "KL/chosen_KL_mean": -354.402587890625, + "KL/mean": -479.6373596191406, + "KL/rejected_KL_mean": -604.8721923828125, + "KL/std": 283.220458984375, + "epoch": 0.580029368575624, + "fcm_dpo/beta": 0.0017139037372544408, + "fcm_dpo/delta": -0.03093145042657852, + "fcm_dpo/margin": 250.46954345703125, + "fcm_dpo/q_t": 0.4019749164581299, + "grad_norm": 24.562576293945312, + "learning_rate": 2.2565536379453404e-07, + "logits/chosen": -0.545151948928833, + "logits/rejected": -0.540196418762207, + "logps/chosen": -416.94866943359375, + "logps/ref_chosen": -62.546112060546875, + "logps/ref_rejected": -83.78262329101562, + "logps/rejected": -688.65478515625, + "loss": 1.0727, + "margin_dpo/margin_mean": 250.46954345703125, + "margin_dpo/margin_std": 320.5044250488281, + "step": 395 + }, + { + "KL/chosen_KL_mean": -372.64703369140625, + "KL/mean": -483.7370910644531, + "KL/rejected_KL_mean": -594.8271484375, + "KL/std": 283.7913818359375, + "epoch": 0.5814977973568282, + "fcm_dpo/beta": 0.0017187923658639193, + "fcm_dpo/delta": 0.01880822703242302, + "fcm_dpo/margin": 222.18011474609375, + "fcm_dpo/q_t": 0.4109712243080139, + "grad_norm": 26.06972312927246, + "learning_rate": 2.2437845895869825e-07, + "logits/chosen": -0.5321019887924194, + "logits/rejected": -0.514782726764679, + "logps/chosen": -441.6429443359375, + "logps/ref_chosen": -68.99594116210938, + "logps/ref_rejected": -88.64665985107422, + "logps/rejected": -683.4737548828125, + "loss": 1.0874, + "margin_dpo/margin_mean": 222.1800994873047, + "margin_dpo/margin_std": 261.53839111328125, + "step": 396 + }, + { + "KL/chosen_KL_mean": -350.56103515625, + "KL/mean": -496.0792236328125, + "KL/rejected_KL_mean": -641.597412109375, + "KL/std": 283.2353820800781, + "epoch": 0.5829662261380323, + "fcm_dpo/beta": 0.0016919523477554321, + "fcm_dpo/delta": -0.09789647161960602, + "fcm_dpo/margin": 291.03643798828125, + "fcm_dpo/q_t": 0.3857801556587219, + "grad_norm": 34.39192581176758, + "learning_rate": 2.2310222927429716e-07, + "logits/chosen": -0.4891834259033203, + "logits/rejected": -0.4951217472553253, + "logps/chosen": -411.83819580078125, + "logps/ref_chosen": -61.27716827392578, + "logps/ref_rejected": -103.11612701416016, + "logps/rejected": -744.713623046875, + "loss": 1.0122, + "margin_dpo/margin_mean": 291.03643798828125, + "margin_dpo/margin_std": 297.6669616699219, + "step": 397 + }, + { + "KL/chosen_KL_mean": -382.7860412597656, + "KL/mean": -513.4688110351562, + "KL/rejected_KL_mean": -644.151611328125, + "KL/std": 299.1744384765625, + "epoch": 0.5844346549192364, + "fcm_dpo/beta": 0.001679509412497282, + "fcm_dpo/delta": -0.04076296463608742, + "fcm_dpo/margin": 261.36553955078125, + "fcm_dpo/q_t": 0.4011620283126831, + "grad_norm": 22.932445526123047, + "learning_rate": 2.2182670837116972e-07, + "logits/chosen": -0.5526921153068542, + "logits/rejected": -0.5512826442718506, + "logps/chosen": -450.9375915527344, + "logps/ref_chosen": -68.15155029296875, + "logps/ref_rejected": -108.52360534667969, + "logps/rejected": -752.6752319335938, + "loss": 1.0711, + "margin_dpo/margin_mean": 261.36553955078125, + "margin_dpo/margin_std": 347.5543518066406, + "step": 398 + }, + { + "KL/chosen_KL_mean": -337.339111328125, + "KL/mean": -454.7564392089844, + "KL/rejected_KL_mean": -572.1737060546875, + "KL/std": 264.2313232421875, + "epoch": 0.5859030837004405, + "fcm_dpo/beta": 0.0016741682775318623, + "fcm_dpo/delta": 0.0070329029113054276, + "fcm_dpo/margin": 234.8346405029297, + "fcm_dpo/q_t": 0.4100215435028076, + "grad_norm": 32.84918975830078, + "learning_rate": 2.2055192986047804e-07, + "logits/chosen": -0.5098556280136108, + "logits/rejected": -0.47407281398773193, + "logps/chosen": -398.22894287109375, + "logps/ref_chosen": -60.889801025390625, + "logps/ref_rejected": -77.965576171875, + "logps/rejected": -650.1392822265625, + "loss": 1.1039, + "margin_dpo/margin_mean": 234.8346405029297, + "margin_dpo/margin_std": 332.3253173828125, + "step": 399 + }, + { + "KL/chosen_KL_mean": -326.5251159667969, + "KL/mean": -493.1486511230469, + "KL/rejected_KL_mean": -659.772216796875, + "KL/std": 275.30633544921875, + "epoch": 0.5873715124816447, + "fcm_dpo/beta": 0.001637051347643137, + "fcm_dpo/delta": -0.15437571704387665, + "fcm_dpo/margin": 333.2471008300781, + "fcm_dpo/q_t": 0.3719968795776367, + "grad_norm": 22.400123596191406, + "learning_rate": 2.192779273338215e-07, + "logits/chosen": -0.523221492767334, + "logits/rejected": -0.5234454870223999, + "logps/chosen": -390.168701171875, + "logps/ref_chosen": -63.64359664916992, + "logps/ref_rejected": -105.252685546875, + "logps/rejected": -765.02490234375, + "loss": 0.9719, + "margin_dpo/margin_mean": 333.24713134765625, + "margin_dpo/margin_std": 307.0823974609375, + "step": 400 + }, + { + "KL/chosen_KL_mean": -377.3856201171875, + "KL/mean": -472.60052490234375, + "KL/rejected_KL_mean": -567.8154296875, + "KL/std": 292.94207763671875, + "epoch": 0.5888399412628488, + "fcm_dpo/beta": 0.001645284821279347, + "fcm_dpo/delta": 0.0895879864692688, + "fcm_dpo/margin": 190.42982482910156, + "fcm_dpo/q_t": 0.4307780861854553, + "grad_norm": 29.05838394165039, + "learning_rate": 2.1800473436235136e-07, + "logits/chosen": -0.47050148248672485, + "logits/rejected": -0.4605827033519745, + "logps/chosen": -434.54864501953125, + "logps/ref_chosen": -57.16303253173828, + "logps/ref_rejected": -83.79249572753906, + "logps/rejected": -651.60791015625, + "loss": 1.1966, + "margin_dpo/margin_mean": 190.42984008789062, + "margin_dpo/margin_std": 395.08624267578125, + "step": 401 + }, + { + "KL/chosen_KL_mean": -287.9420166015625, + "KL/mean": -462.9802551269531, + "KL/rejected_KL_mean": -638.0184936523438, + "KL/std": 305.24725341796875, + "epoch": 0.5903083700440529, + "fcm_dpo/beta": 0.0016145255649462342, + "fcm_dpo/delta": -0.17511004209518433, + "fcm_dpo/margin": 350.07647705078125, + "fcm_dpo/q_t": 0.3686904013156891, + "grad_norm": 31.056983947753906, + "learning_rate": 2.1673238449588665e-07, + "logits/chosen": -0.5516624450683594, + "logits/rejected": -0.5455505847930908, + "logps/chosen": -338.682373046875, + "logps/ref_chosen": -50.74037170410156, + "logps/ref_rejected": -81.0460433959961, + "logps/rejected": -719.0645751953125, + "loss": 0.955, + "margin_dpo/margin_mean": 350.07647705078125, + "margin_dpo/margin_std": 310.8368225097656, + "step": 402 + }, + { + "KL/chosen_KL_mean": -327.7877197265625, + "KL/mean": -463.3001403808594, + "KL/rejected_KL_mean": -598.8125, + "KL/std": 292.3427429199219, + "epoch": 0.591776798825257, + "fcm_dpo/beta": 0.0015922733582556248, + "fcm_dpo/delta": -0.032966844737529755, + "fcm_dpo/margin": 271.0248107910156, + "fcm_dpo/q_t": 0.40002089738845825, + "grad_norm": 26.145050048828125, + "learning_rate": 2.154609112620295e-07, + "logits/chosen": -0.5232188701629639, + "logits/rejected": -0.525371789932251, + "logps/chosen": -374.93505859375, + "logps/ref_chosen": -47.14731216430664, + "logps/ref_rejected": -77.2666015625, + "logps/rejected": -676.0791015625, + "loss": 1.0561, + "margin_dpo/margin_mean": 271.0248107910156, + "margin_dpo/margin_std": 303.0567932128906, + "step": 403 + }, + { + "KL/chosen_KL_mean": -357.9871520996094, + "KL/mean": -491.9744873046875, + "KL/rejected_KL_mean": -625.9617919921875, + "KL/std": 283.893310546875, + "epoch": 0.593245227606461, + "fcm_dpo/beta": 0.0015823390567675233, + "fcm_dpo/delta": -0.025122996419668198, + "fcm_dpo/margin": 267.9746398925781, + "fcm_dpo/q_t": 0.4035566449165344, + "grad_norm": 32.78266906738281, + "learning_rate": 2.1419034816528218e-07, + "logits/chosen": -0.5385224223136902, + "logits/rejected": -0.533818244934082, + "logps/chosen": -405.8624267578125, + "logps/ref_chosen": -47.875274658203125, + "logps/ref_rejected": -77.15499877929688, + "logps/rejected": -703.1168212890625, + "loss": 1.0866, + "margin_dpo/margin_mean": 267.97467041015625, + "margin_dpo/margin_std": 370.1915283203125, + "step": 404 + }, + { + "KL/chosen_KL_mean": -402.0423278808594, + "KL/mean": -512.295166015625, + "KL/rejected_KL_mean": -622.5479125976562, + "KL/std": 303.5537414550781, + "epoch": 0.5947136563876652, + "fcm_dpo/beta": 0.0015738653019070625, + "fcm_dpo/delta": -0.04883524030447006, + "fcm_dpo/margin": 220.505615234375, + "fcm_dpo/q_t": 0.42195454239845276, + "grad_norm": 32.378849029541016, + "learning_rate": 2.129207286861638e-07, + "logits/chosen": -0.5482072830200195, + "logits/rejected": -0.5404790639877319, + "logps/chosen": -467.20526123046875, + "logps/ref_chosen": -65.16290283203125, + "logps/ref_rejected": -87.18678283691406, + "logps/rejected": -709.7347412109375, + "loss": 1.1557, + "margin_dpo/margin_mean": 220.505615234375, + "margin_dpo/margin_std": 375.1750183105469, + "step": 405 + }, + { + "KL/chosen_KL_mean": -362.15118408203125, + "KL/mean": -505.815185546875, + "KL/rejected_KL_mean": -649.479248046875, + "KL/std": 306.6311950683594, + "epoch": 0.5961820851688693, + "fcm_dpo/beta": 0.001560859032906592, + "fcm_dpo/delta": -0.050896137952804565, + "fcm_dpo/margin": 287.3280334472656, + "fcm_dpo/q_t": 0.3976570963859558, + "grad_norm": 26.965667724609375, + "learning_rate": 2.1165208628032861e-07, + "logits/chosen": -0.5607165098190308, + "logits/rejected": -0.5739609599113464, + "logps/chosen": -411.8919982910156, + "logps/ref_chosen": -49.740814208984375, + "logps/ref_rejected": -92.07862854003906, + "logps/rejected": -741.557861328125, + "loss": 1.0558, + "margin_dpo/margin_mean": 287.3280334472656, + "margin_dpo/margin_std": 347.114990234375, + "step": 406 + }, + { + "KL/chosen_KL_mean": -390.5394287109375, + "KL/mean": -479.0431823730469, + "KL/rejected_KL_mean": -567.546875, + "KL/std": 242.9478759765625, + "epoch": 0.5976505139500734, + "fcm_dpo/beta": 0.0015518320724368095, + "fcm_dpo/delta": 0.009759590961039066, + "fcm_dpo/margin": 177.0074462890625, + "fcm_dpo/q_t": 0.43519794940948486, + "grad_norm": 27.866214752197266, + "learning_rate": 2.1038445437768375e-07, + "logits/chosen": -0.5981777310371399, + "logits/rejected": -0.5707347393035889, + "logps/chosen": -446.8701477050781, + "logps/ref_chosen": -56.33069610595703, + "logps/ref_rejected": -77.51209259033203, + "logps/rejected": -645.0589599609375, + "loss": 1.1933, + "margin_dpo/margin_mean": 177.0074462890625, + "margin_dpo/margin_std": 323.0165710449219, + "step": 407 + }, + { + "KL/chosen_KL_mean": -390.214599609375, + "KL/mean": -492.9902038574219, + "KL/rejected_KL_mean": -595.7658081054688, + "KL/std": 239.10140991210938, + "epoch": 0.5991189427312775, + "fcm_dpo/beta": 0.0015722161624580622, + "fcm_dpo/delta": 0.07937593758106232, + "fcm_dpo/margin": 205.55116271972656, + "fcm_dpo/q_t": 0.4239957928657532, + "grad_norm": 26.215208053588867, + "learning_rate": 2.0911786638150872e-07, + "logits/chosen": -0.5892931222915649, + "logits/rejected": -0.5651265382766724, + "logps/chosen": -460.0039367675781, + "logps/ref_chosen": -69.789306640625, + "logps/ref_rejected": -90.09693908691406, + "logps/rejected": -685.8627319335938, + "loss": 1.1321, + "margin_dpo/margin_mean": 205.55116271972656, + "margin_dpo/margin_std": 269.0951232910156, + "step": 408 + }, + { + "KL/chosen_KL_mean": -384.1105041503906, + "KL/mean": -484.8536682128906, + "KL/rejected_KL_mean": -585.5968017578125, + "KL/std": 270.78424072265625, + "epoch": 0.6005873715124816, + "fcm_dpo/beta": 0.00160063779912889, + "fcm_dpo/delta": 0.07980034500360489, + "fcm_dpo/margin": 201.486328125, + "fcm_dpo/q_t": 0.4251343607902527, + "grad_norm": 37.040836334228516, + "learning_rate": 2.0785235566757517e-07, + "logits/chosen": -0.5418749451637268, + "logits/rejected": -0.5251990556716919, + "logps/chosen": -451.42791748046875, + "logps/ref_chosen": -67.31744384765625, + "logps/ref_rejected": -84.904296875, + "logps/rejected": -670.5010986328125, + "loss": 1.1421, + "margin_dpo/margin_mean": 201.48635864257812, + "margin_dpo/margin_std": 292.822998046875, + "step": 409 + }, + { + "KL/chosen_KL_mean": -369.51556396484375, + "KL/mean": -493.2908630371094, + "KL/rejected_KL_mean": -617.066162109375, + "KL/std": 272.4659423828125, + "epoch": 0.6020558002936858, + "fcm_dpo/beta": 0.0016031713457778096, + "fcm_dpo/delta": 0.003254479728639126, + "fcm_dpo/margin": 247.55059814453125, + "fcm_dpo/q_t": 0.4068432152271271, + "grad_norm": 28.998903274536133, + "learning_rate": 2.065879555832674e-07, + "logits/chosen": -0.5895723104476929, + "logits/rejected": -0.5948858857154846, + "logps/chosen": -420.98089599609375, + "logps/ref_chosen": -51.465354919433594, + "logps/ref_rejected": -83.198974609375, + "logps/rejected": -700.26513671875, + "loss": 1.0854, + "margin_dpo/margin_mean": 247.5506134033203, + "margin_dpo/margin_std": 306.04541015625, + "step": 410 + }, + { + "KL/chosen_KL_mean": -385.24688720703125, + "KL/mean": -502.3451843261719, + "KL/rejected_KL_mean": -619.4434204101562, + "KL/std": 290.84130859375, + "epoch": 0.6035242290748899, + "fcm_dpo/beta": 0.001580849289894104, + "fcm_dpo/delta": -0.0746782049536705, + "fcm_dpo/margin": 234.19656372070312, + "fcm_dpo/q_t": 0.41524988412857056, + "grad_norm": 33.94599533081055, + "learning_rate": 2.0532469944670343e-07, + "logits/chosen": -0.5782661437988281, + "logits/rejected": -0.589745819568634, + "logps/chosen": -437.5541687011719, + "logps/ref_chosen": -52.30727005004883, + "logps/ref_rejected": -80.69495391845703, + "logps/rejected": -700.138427734375, + "loss": 1.1204, + "margin_dpo/margin_mean": 234.19654846191406, + "margin_dpo/margin_std": 333.4945068359375, + "step": 411 + }, + { + "KL/chosen_KL_mean": -381.74725341796875, + "KL/mean": -506.9585266113281, + "KL/rejected_KL_mean": -632.1698608398438, + "KL/std": 275.69854736328125, + "epoch": 0.604992657856094, + "fcm_dpo/beta": 0.001582764321938157, + "fcm_dpo/delta": 0.0037793107330799103, + "fcm_dpo/margin": 250.42257690429688, + "fcm_dpo/q_t": 0.4081728160381317, + "grad_norm": 30.582490921020508, + "learning_rate": 2.0406262054585738e-07, + "logits/chosen": -0.6569205522537231, + "logits/rejected": -0.6889303922653198, + "logps/chosen": -434.891357421875, + "logps/ref_chosen": -53.144126892089844, + "logps/ref_rejected": -100.0608139038086, + "logps/rejected": -732.2306518554688, + "loss": 1.0898, + "margin_dpo/margin_mean": 250.42259216308594, + "margin_dpo/margin_std": 321.5178527832031, + "step": 412 + }, + { + "KL/chosen_KL_mean": -417.2314453125, + "KL/mean": -536.9945068359375, + "KL/rejected_KL_mean": -656.7576293945312, + "KL/std": 290.6329345703125, + "epoch": 0.6064610866372981, + "fcm_dpo/beta": 0.001589751336723566, + "fcm_dpo/delta": 0.01986226998269558, + "fcm_dpo/margin": 239.52615356445312, + "fcm_dpo/q_t": 0.410112202167511, + "grad_norm": 35.33984375, + "learning_rate": 2.0280175213768205e-07, + "logits/chosen": -0.6048665046691895, + "logits/rejected": -0.6170832514762878, + "logps/chosen": -478.81341552734375, + "logps/ref_chosen": -61.58196258544922, + "logps/ref_rejected": -99.47340393066406, + "logps/rejected": -756.2310180664062, + "loss": 1.1012, + "margin_dpo/margin_mean": 239.52615356445312, + "margin_dpo/margin_std": 314.6912841796875, + "step": 413 + }, + { + "KL/chosen_KL_mean": -384.1409912109375, + "KL/mean": -512.2708129882812, + "KL/rejected_KL_mean": -640.400634765625, + "KL/std": 261.5974426269531, + "epoch": 0.6079295154185022, + "fcm_dpo/beta": 0.0015965222846716642, + "fcm_dpo/delta": -0.010348714888095856, + "fcm_dpo/margin": 256.2596435546875, + "fcm_dpo/q_t": 0.40425509214401245, + "grad_norm": 36.57903289794922, + "learning_rate": 2.0154212744723247e-07, + "logits/chosen": -0.5525113344192505, + "logits/rejected": -0.5497645139694214, + "logps/chosen": -430.7724609375, + "logps/ref_chosen": -46.63148498535156, + "logps/ref_rejected": -87.64653015136719, + "logps/rejected": -728.047119140625, + "loss": 1.0772, + "margin_dpo/margin_mean": 256.2596435546875, + "margin_dpo/margin_std": 301.134033203125, + "step": 414 + }, + { + "KL/chosen_KL_mean": -396.94146728515625, + "KL/mean": -496.112548828125, + "KL/rejected_KL_mean": -595.28369140625, + "KL/std": 266.48736572265625, + "epoch": 0.6093979441997063, + "fcm_dpo/beta": 0.0015998441958799958, + "fcm_dpo/delta": 0.08544344455003738, + "fcm_dpo/margin": 198.34219360351562, + "fcm_dpo/q_t": 0.4255606532096863, + "grad_norm": 29.083877563476562, + "learning_rate": 2.002837796667909e-07, + "logits/chosen": -0.5818853974342346, + "logits/rejected": -0.5800877809524536, + "logps/chosen": -475.55975341796875, + "logps/ref_chosen": -78.6182861328125, + "logps/ref_rejected": -100.47752380371094, + "logps/rejected": -695.7611694335938, + "loss": 1.1493, + "margin_dpo/margin_mean": 198.34219360351562, + "margin_dpo/margin_std": 297.89312744140625, + "step": 415 + }, + { + "KL/chosen_KL_mean": -360.2153015136719, + "KL/mean": -515.66943359375, + "KL/rejected_KL_mean": -671.12353515625, + "KL/std": 292.68878173828125, + "epoch": 0.6108663729809104, + "fcm_dpo/beta": 0.0015928513603284955, + "fcm_dpo/delta": -0.10007157921791077, + "fcm_dpo/margin": 310.9083251953125, + "fcm_dpo/q_t": 0.38390302658081055, + "grad_norm": 30.425918579101562, + "learning_rate": 1.990267419549914e-07, + "logits/chosen": -0.5869680643081665, + "logits/rejected": -0.5953909754753113, + "logps/chosen": -418.49444580078125, + "logps/ref_chosen": -58.27912521362305, + "logps/ref_rejected": -90.56871795654297, + "logps/rejected": -761.6922607421875, + "loss": 1.0009, + "margin_dpo/margin_mean": 310.9082946777344, + "margin_dpo/margin_std": 287.4730224609375, + "step": 416 + }, + { + "KL/chosen_KL_mean": -359.5291748046875, + "KL/mean": -491.1758117675781, + "KL/rejected_KL_mean": -622.8224487304688, + "KL/std": 265.6627197265625, + "epoch": 0.6123348017621145, + "fcm_dpo/beta": 0.0015787691809237003, + "fcm_dpo/delta": -0.016375936567783356, + "fcm_dpo/margin": 263.29327392578125, + "fcm_dpo/q_t": 0.4014412462711334, + "grad_norm": 36.952362060546875, + "learning_rate": 1.9777104743594686e-07, + "logits/chosen": -0.5713890790939331, + "logits/rejected": -0.5573433637619019, + "logps/chosen": -409.7278747558594, + "logps/ref_chosen": -50.1987190246582, + "logps/ref_rejected": -68.15184020996094, + "logps/rejected": -690.9742431640625, + "loss": 1.0514, + "margin_dpo/margin_mean": 263.2933044433594, + "margin_dpo/margin_std": 253.69268798828125, + "step": 417 + }, + { + "KL/chosen_KL_mean": -388.34796142578125, + "KL/mean": -523.8869018554688, + "KL/rejected_KL_mean": -659.4258422851562, + "KL/std": 309.9178466796875, + "epoch": 0.6138032305433186, + "fcm_dpo/beta": 0.0015799949178472161, + "fcm_dpo/delta": -0.030489690601825714, + "fcm_dpo/margin": 271.077880859375, + "fcm_dpo/q_t": 0.40348464250564575, + "grad_norm": 22.66263771057129, + "learning_rate": 1.965167291983757e-07, + "logits/chosen": -0.6504318714141846, + "logits/rejected": -0.634763240814209, + "logps/chosen": -470.326416015625, + "logps/ref_chosen": -81.97846984863281, + "logps/ref_rejected": -104.69148254394531, + "logps/rejected": -764.1173095703125, + "loss": 1.082, + "margin_dpo/margin_mean": 271.077880859375, + "margin_dpo/margin_std": 359.14801025390625, + "step": 418 + }, + { + "KL/chosen_KL_mean": -358.2254638671875, + "KL/mean": -507.04156494140625, + "KL/rejected_KL_mean": -655.857666015625, + "KL/std": 277.0826110839844, + "epoch": 0.6152716593245228, + "fcm_dpo/beta": 0.001551083056256175, + "fcm_dpo/delta": -0.06462173163890839, + "fcm_dpo/margin": 297.6322021484375, + "fcm_dpo/q_t": 0.3922021687030792, + "grad_norm": 32.60568618774414, + "learning_rate": 1.9526382029472988e-07, + "logits/chosen": -0.5689761638641357, + "logits/rejected": -0.5716375708580017, + "logps/chosen": -411.17413330078125, + "logps/ref_chosen": -52.948646545410156, + "logps/ref_rejected": -91.58309936523438, + "logps/rejected": -747.4407958984375, + "loss": 1.0358, + "margin_dpo/margin_mean": 297.6322021484375, + "margin_dpo/margin_std": 322.8487854003906, + "step": 419 + }, + { + "KL/chosen_KL_mean": -455.716064453125, + "KL/mean": -545.5599365234375, + "KL/rejected_KL_mean": -635.4039306640625, + "KL/std": 290.76470947265625, + "epoch": 0.6167400881057269, + "fcm_dpo/beta": 0.0015754573978483677, + "fcm_dpo/delta": 0.1201419085264206, + "fcm_dpo/margin": 179.68783569335938, + "fcm_dpo/q_t": 0.43682652711868286, + "grad_norm": 52.570884704589844, + "learning_rate": 1.9401235374032425e-07, + "logits/chosen": -0.5963464975357056, + "logits/rejected": -0.5628973245620728, + "logps/chosen": -533.4859619140625, + "logps/ref_chosen": -77.7699203491211, + "logps/ref_rejected": -69.31985473632812, + "logps/rejected": -704.7237548828125, + "loss": 1.2113, + "margin_dpo/margin_mean": 179.68783569335938, + "margin_dpo/margin_std": 389.99383544921875, + "step": 420 + }, + { + "KL/chosen_KL_mean": -387.1592102050781, + "KL/mean": -489.2259216308594, + "KL/rejected_KL_mean": -591.2926025390625, + "KL/std": 287.3149719238281, + "epoch": 0.618208516886931, + "fcm_dpo/beta": 0.0016072317957878113, + "fcm_dpo/delta": 0.07370726019144058, + "fcm_dpo/margin": 204.13345336914062, + "fcm_dpo/q_t": 0.42217308282852173, + "grad_norm": 27.965801239013672, + "learning_rate": 1.9276236251246653e-07, + "logits/chosen": -0.6343262195587158, + "logits/rejected": -0.6225738525390625, + "logps/chosen": -440.9250793457031, + "logps/ref_chosen": -53.765865325927734, + "logps/ref_rejected": -89.28144836425781, + "logps/rejected": -680.5740966796875, + "loss": 1.1435, + "margin_dpo/margin_mean": 204.13345336914062, + "margin_dpo/margin_std": 298.565185546875, + "step": 421 + }, + { + "KL/chosen_KL_mean": -434.88446044921875, + "KL/mean": -556.2061157226562, + "KL/rejected_KL_mean": -677.5277099609375, + "KL/std": 292.18963623046875, + "epoch": 0.6196769456681351, + "fcm_dpo/beta": 0.0016083747614175081, + "fcm_dpo/delta": 0.010134613141417503, + "fcm_dpo/margin": 242.6432647705078, + "fcm_dpo/q_t": 0.40930503606796265, + "grad_norm": 36.293704986572266, + "learning_rate": 1.9151387954958792e-07, + "logits/chosen": -0.6203078031539917, + "logits/rejected": -0.6245821714401245, + "logps/chosen": -503.51824951171875, + "logps/ref_chosen": -68.6337661743164, + "logps/ref_rejected": -87.86351013183594, + "logps/rejected": -765.3912353515625, + "loss": 1.1034, + "margin_dpo/margin_mean": 242.6432647705078, + "margin_dpo/margin_std": 339.7408142089844, + "step": 422 + }, + { + "KL/chosen_KL_mean": -425.70440673828125, + "KL/mean": -560.471923828125, + "KL/rejected_KL_mean": -695.239501953125, + "KL/std": 282.6575012207031, + "epoch": 0.6211453744493393, + "fcm_dpo/beta": 0.0016007790109142661, + "fcm_dpo/delta": -0.032924652099609375, + "fcm_dpo/margin": 269.53515625, + "fcm_dpo/q_t": 0.40004196763038635, + "grad_norm": 34.40102005004883, + "learning_rate": 1.902669377503756e-07, + "logits/chosen": -0.613274097442627, + "logits/rejected": -0.6200335025787354, + "logps/chosen": -480.6947021484375, + "logps/ref_chosen": -54.99030303955078, + "logps/ref_rejected": -86.30654907226562, + "logps/rejected": -781.5460815429688, + "loss": 1.0598, + "margin_dpo/margin_mean": 269.53515625, + "margin_dpo/margin_std": 315.0151672363281, + "step": 423 + }, + { + "KL/chosen_KL_mean": -387.79833984375, + "KL/mean": -513.0828247070312, + "KL/rejected_KL_mean": -638.3672485351562, + "KL/std": 285.3108215332031, + "epoch": 0.6226138032305433, + "fcm_dpo/beta": 0.0015928398352116346, + "fcm_dpo/delta": 0.0005655810236930847, + "fcm_dpo/margin": 250.56887817382812, + "fcm_dpo/q_t": 0.40977025032043457, + "grad_norm": 35.320560455322266, + "learning_rate": 1.890215699729057e-07, + "logits/chosen": -0.6114071011543274, + "logits/rejected": -0.5866736769676208, + "logps/chosen": -443.8102722167969, + "logps/ref_chosen": -56.01192092895508, + "logps/ref_rejected": -66.47896575927734, + "logps/rejected": -704.84619140625, + "loss": 1.0981, + "margin_dpo/margin_mean": 250.56887817382812, + "margin_dpo/margin_std": 348.552978515625, + "step": 424 + }, + { + "KL/chosen_KL_mean": -441.84747314453125, + "KL/mean": -535.390380859375, + "KL/rejected_KL_mean": -628.933349609375, + "KL/std": 266.4034423828125, + "epoch": 0.6240822320117474, + "fcm_dpo/beta": 0.00162741937674582, + "fcm_dpo/delta": 0.09813511371612549, + "fcm_dpo/margin": 187.08584594726562, + "fcm_dpo/q_t": 0.42789530754089355, + "grad_norm": 35.080318450927734, + "learning_rate": 1.8777780903377732e-07, + "logits/chosen": -0.6289581060409546, + "logits/rejected": -0.630668044090271, + "logps/chosen": -488.7164306640625, + "logps/ref_chosen": -46.86899948120117, + "logps/ref_rejected": -95.92545318603516, + "logps/rejected": -724.8587646484375, + "loss": 1.1745, + "margin_dpo/margin_mean": 187.0858612060547, + "margin_dpo/margin_std": 329.364990234375, + "step": 425 + }, + { + "KL/chosen_KL_mean": -400.1252136230469, + "KL/mean": -519.4071044921875, + "KL/rejected_KL_mean": -638.68896484375, + "KL/std": 272.60137939453125, + "epoch": 0.6255506607929515, + "fcm_dpo/beta": 0.0016408449737355113, + "fcm_dpo/delta": 0.008432462811470032, + "fcm_dpo/margin": 238.56373596191406, + "fcm_dpo/q_t": 0.4089978337287903, + "grad_norm": 30.283872604370117, + "learning_rate": 1.8653568770724803e-07, + "logits/chosen": -0.6305758953094482, + "logits/rejected": -0.5998473167419434, + "logps/chosen": -476.708740234375, + "logps/ref_chosen": -76.58354187011719, + "logps/ref_rejected": -81.26658630371094, + "logps/rejected": -719.95556640625, + "loss": 1.0926, + "margin_dpo/margin_mean": 238.56373596191406, + "margin_dpo/margin_std": 298.9935607910156, + "step": 426 + }, + { + "KL/chosen_KL_mean": -372.81451416015625, + "KL/mean": -467.484130859375, + "KL/rejected_KL_mean": -562.1536865234375, + "KL/std": 240.36863708496094, + "epoch": 0.6270190895741556, + "fcm_dpo/beta": 0.0016517244512215257, + "fcm_dpo/delta": 0.09017623960971832, + "fcm_dpo/margin": 189.3392333984375, + "fcm_dpo/q_t": 0.42767786979675293, + "grad_norm": 27.282377243041992, + "learning_rate": 1.8529523872436977e-07, + "logits/chosen": -0.6336793899536133, + "logits/rejected": -0.6121144890785217, + "logps/chosen": -437.66839599609375, + "logps/ref_chosen": -64.8538818359375, + "logps/ref_rejected": -78.5660171508789, + "logps/rejected": -640.7197265625, + "loss": 1.1522, + "margin_dpo/margin_mean": 189.33921813964844, + "margin_dpo/margin_std": 286.495849609375, + "step": 427 + }, + { + "KL/chosen_KL_mean": -453.0958251953125, + "KL/mean": -580.8818359375, + "KL/rejected_KL_mean": -708.6678466796875, + "KL/std": 316.7676696777344, + "epoch": 0.6284875183553598, + "fcm_dpo/beta": 0.0016518604243174195, + "fcm_dpo/delta": -0.02350825071334839, + "fcm_dpo/margin": 255.57199096679688, + "fcm_dpo/q_t": 0.40379005670547485, + "grad_norm": 31.117158889770508, + "learning_rate": 1.8405649477212697e-07, + "logits/chosen": -0.6245919466018677, + "logits/rejected": -0.6301891803741455, + "logps/chosen": -515.7324829101562, + "logps/ref_chosen": -62.63666534423828, + "logps/ref_rejected": -103.28181457519531, + "logps/rejected": -811.9496459960938, + "loss": 1.1038, + "margin_dpo/margin_mean": 255.57199096679688, + "margin_dpo/margin_std": 387.40362548828125, + "step": 428 + }, + { + "KL/chosen_KL_mean": -449.23046875, + "KL/mean": -552.645263671875, + "KL/rejected_KL_mean": -656.06005859375, + "KL/std": 283.962158203125, + "epoch": 0.6299559471365639, + "fcm_dpo/beta": 0.001649289857596159, + "fcm_dpo/delta": -0.040331680327653885, + "fcm_dpo/margin": 206.8295440673828, + "fcm_dpo/q_t": 0.42213714122772217, + "grad_norm": 32.68413543701172, + "learning_rate": 1.828194884925749e-07, + "logits/chosen": -0.652100682258606, + "logits/rejected": -0.6345040798187256, + "logps/chosen": -530.4644775390625, + "logps/ref_chosen": -81.23401641845703, + "logps/ref_rejected": -91.79493713378906, + "logps/rejected": -747.85498046875, + "loss": 1.1566, + "margin_dpo/margin_mean": 206.82952880859375, + "margin_dpo/margin_std": 348.011474609375, + "step": 429 + }, + { + "KL/chosen_KL_mean": -376.54345703125, + "KL/mean": -479.85723876953125, + "KL/rejected_KL_mean": -583.1709594726562, + "KL/std": 259.899169921875, + "epoch": 0.631424375917768, + "fcm_dpo/beta": 0.0016597865615040064, + "fcm_dpo/delta": 0.058922089636325836, + "fcm_dpo/margin": 206.62753295898438, + "fcm_dpo/q_t": 0.4206964075565338, + "grad_norm": 29.323801040649414, + "learning_rate": 1.8158425248197928e-07, + "logits/chosen": -0.5997954607009888, + "logits/rejected": -0.598332941532135, + "logps/chosen": -437.4637451171875, + "logps/ref_chosen": -60.920326232910156, + "logps/ref_rejected": -104.42280578613281, + "logps/rejected": -687.59375, + "loss": 1.1256, + "margin_dpo/margin_mean": 206.62753295898438, + "margin_dpo/margin_std": 286.5379638671875, + "step": 430 + }, + { + "KL/chosen_KL_mean": -349.5550537109375, + "KL/mean": -498.06231689453125, + "KL/rejected_KL_mean": -646.569580078125, + "KL/std": 281.20587158203125, + "epoch": 0.6328928046989721, + "fcm_dpo/beta": 0.0016381596215069294, + "fcm_dpo/delta": -0.09109188616275787, + "fcm_dpo/margin": 297.0145263671875, + "fcm_dpo/q_t": 0.38691771030426025, + "grad_norm": 30.228837966918945, + "learning_rate": 1.8035081928995788e-07, + "logits/chosen": -0.59189772605896, + "logits/rejected": -0.5967794060707092, + "logps/chosen": -406.90380859375, + "logps/ref_chosen": -57.34874725341797, + "logps/ref_rejected": -92.84022521972656, + "logps/rejected": -739.4097900390625, + "loss": 1.0216, + "margin_dpo/margin_mean": 297.0145568847656, + "margin_dpo/margin_std": 313.81036376953125, + "step": 431 + }, + { + "KL/chosen_KL_mean": -339.9703674316406, + "KL/mean": -485.2652893066406, + "KL/rejected_KL_mean": -630.5601806640625, + "KL/std": 279.104736328125, + "epoch": 0.6343612334801763, + "fcm_dpo/beta": 0.0016237597446888685, + "fcm_dpo/delta": -0.07570492476224899, + "fcm_dpo/margin": 290.58984375, + "fcm_dpo/q_t": 0.39008086919784546, + "grad_norm": 34.41071701049805, + "learning_rate": 1.791192214186223e-07, + "logits/chosen": -0.5879085063934326, + "logits/rejected": -0.5795068740844727, + "logps/chosen": -411.045166015625, + "logps/ref_chosen": -71.07479095458984, + "logps/ref_rejected": -98.57952880859375, + "logps/rejected": -729.1397705078125, + "loss": 1.0208, + "margin_dpo/margin_mean": 290.58984375, + "margin_dpo/margin_std": 279.1397705078125, + "step": 432 + }, + { + "KL/chosen_KL_mean": -436.6034851074219, + "KL/mean": -530.861083984375, + "KL/rejected_KL_mean": -625.1187133789062, + "KL/std": 284.5777587890625, + "epoch": 0.6358296622613803, + "fcm_dpo/beta": 0.0016333262901753187, + "fcm_dpo/delta": 0.09479224681854248, + "fcm_dpo/margin": 188.51522827148438, + "fcm_dpo/q_t": 0.4265033006668091, + "grad_norm": 33.45824432373047, + "learning_rate": 1.7788949132172193e-07, + "logits/chosen": -0.6549836993217468, + "logits/rejected": -0.6436095237731934, + "logps/chosen": -494.8766784667969, + "logps/ref_chosen": -58.273193359375, + "logps/ref_rejected": -95.95089721679688, + "logps/rejected": -721.069580078125, + "loss": 1.1745, + "margin_dpo/margin_mean": 188.51522827148438, + "margin_dpo/margin_std": 336.2310485839844, + "step": 433 + }, + { + "KL/chosen_KL_mean": -377.69427490234375, + "KL/mean": -490.43060302734375, + "KL/rejected_KL_mean": -603.1669921875, + "KL/std": 274.6502685546875, + "epoch": 0.6372980910425844, + "fcm_dpo/beta": 0.0016410250682383776, + "fcm_dpo/delta": 0.031055327504873276, + "fcm_dpo/margin": 225.47265625, + "fcm_dpo/q_t": 0.41886186599731445, + "grad_norm": 22.17997932434082, + "learning_rate": 1.7666166140378853e-07, + "logits/chosen": -0.6539719104766846, + "logits/rejected": -0.6557008624076843, + "logps/chosen": -439.6679992675781, + "logps/ref_chosen": -61.97370147705078, + "logps/ref_rejected": -78.49861145019531, + "logps/rejected": -681.6655883789062, + "loss": 1.1216, + "margin_dpo/margin_mean": 225.47265625, + "margin_dpo/margin_std": 347.4930725097656, + "step": 434 + }, + { + "KL/chosen_KL_mean": -346.69720458984375, + "KL/mean": -469.296875, + "KL/rejected_KL_mean": -591.896484375, + "KL/std": 269.6709289550781, + "epoch": 0.6387665198237885, + "fcm_dpo/beta": 0.0016461058985441923, + "fcm_dpo/delta": -0.003797288052737713, + "fcm_dpo/margin": 245.1993408203125, + "fcm_dpo/q_t": 0.4061928987503052, + "grad_norm": 26.81846046447754, + "learning_rate": 1.7543576401928218e-07, + "logits/chosen": -0.6933680772781372, + "logits/rejected": -0.6857917308807373, + "logps/chosen": -398.1992492675781, + "logps/ref_chosen": -51.502052307128906, + "logps/ref_rejected": -87.56689453125, + "logps/rejected": -679.46337890625, + "loss": 1.084, + "margin_dpo/margin_mean": 245.1993408203125, + "margin_dpo/margin_std": 302.0467529296875, + "step": 435 + }, + { + "KL/chosen_KL_mean": -362.07177734375, + "KL/mean": -472.35260009765625, + "KL/rejected_KL_mean": -582.6334838867188, + "KL/std": 247.13198852539062, + "epoch": 0.6402349486049926, + "fcm_dpo/beta": 0.001651562051847577, + "fcm_dpo/delta": 0.03701151907444, + "fcm_dpo/margin": 220.5616912841797, + "fcm_dpo/q_t": 0.41518351435661316, + "grad_norm": 44.578636169433594, + "learning_rate": 1.742118314717391e-07, + "logits/chosen": -0.6292225122451782, + "logits/rejected": -0.5981835126876831, + "logps/chosen": -433.4754943847656, + "logps/ref_chosen": -71.40371704101562, + "logps/ref_rejected": -82.72775268554688, + "logps/rejected": -665.3612060546875, + "loss": 1.1089, + "margin_dpo/margin_mean": 220.5616912841797, + "margin_dpo/margin_std": 287.855224609375, + "step": 436 + }, + { + "KL/chosen_KL_mean": -370.03875732421875, + "KL/mean": -483.41693115234375, + "KL/rejected_KL_mean": -596.7950439453125, + "KL/std": 239.50619506835938, + "epoch": 0.6417033773861968, + "fcm_dpo/beta": 0.0016632757615298033, + "fcm_dpo/delta": 0.02373369038105011, + "fcm_dpo/margin": 226.75625610351562, + "fcm_dpo/q_t": 0.41175174713134766, + "grad_norm": 30.342931747436523, + "learning_rate": 1.7298989601292036e-07, + "logits/chosen": -0.6698806881904602, + "logits/rejected": -0.64947509765625, + "logps/chosen": -434.78302001953125, + "logps/ref_chosen": -64.7442626953125, + "logps/ref_rejected": -82.04356384277344, + "logps/rejected": -678.838623046875, + "loss": 1.098, + "margin_dpo/margin_mean": 226.75625610351562, + "margin_dpo/margin_std": 285.7284851074219, + "step": 437 + }, + { + "KL/chosen_KL_mean": -386.3381652832031, + "KL/mean": -517.0180053710938, + "KL/rejected_KL_mean": -647.6978759765625, + "KL/std": 274.5076904296875, + "epoch": 0.6431718061674009, + "fcm_dpo/beta": 0.0016521508805453777, + "fcm_dpo/delta": -0.033741071820259094, + "fcm_dpo/margin": 261.35968017578125, + "fcm_dpo/q_t": 0.39868584275245667, + "grad_norm": 32.4385871887207, + "learning_rate": 1.7176998984196144e-07, + "logits/chosen": -0.6905832290649414, + "logits/rejected": -0.6742756366729736, + "logps/chosen": -445.3568115234375, + "logps/ref_chosen": -59.0186653137207, + "logps/ref_rejected": -83.07682800292969, + "logps/rejected": -730.774658203125, + "loss": 1.0551, + "margin_dpo/margin_mean": 261.35968017578125, + "margin_dpo/margin_std": 287.7158508300781, + "step": 438 + }, + { + "KL/chosen_KL_mean": -398.215087890625, + "KL/mean": -503.7099914550781, + "KL/rejected_KL_mean": -609.204833984375, + "KL/std": 279.349609375, + "epoch": 0.644640234948605, + "fcm_dpo/beta": 0.001632839790545404, + "fcm_dpo/delta": -0.0710936188697815, + "fcm_dpo/margin": 210.98980712890625, + "fcm_dpo/q_t": 0.4208637773990631, + "grad_norm": 35.499446868896484, + "learning_rate": 1.7055214510452458e-07, + "logits/chosen": -0.6591403484344482, + "logits/rejected": -0.662467360496521, + "logps/chosen": -451.9991760253906, + "logps/ref_chosen": -53.78407669067383, + "logps/ref_rejected": -83.98545837402344, + "logps/rejected": -693.1903076171875, + "loss": 1.1405, + "margin_dpo/margin_mean": 210.98980712890625, + "margin_dpo/margin_std": 313.9350891113281, + "step": 439 + }, + { + "KL/chosen_KL_mean": -421.02459716796875, + "KL/mean": -543.773681640625, + "KL/rejected_KL_mean": -666.5227661132812, + "KL/std": 341.6332092285156, + "epoch": 0.6461086637298091, + "fcm_dpo/beta": 0.001635729568079114, + "fcm_dpo/delta": -0.0017141718417406082, + "fcm_dpo/margin": 245.49813842773438, + "fcm_dpo/q_t": 0.410071462392807, + "grad_norm": 42.761192321777344, + "learning_rate": 1.6933639389195134e-07, + "logits/chosen": -0.705498218536377, + "logits/rejected": -0.7023329138755798, + "logps/chosen": -499.59130859375, + "logps/ref_chosen": -78.56671905517578, + "logps/ref_rejected": -96.49775695800781, + "logps/rejected": -763.0205078125, + "loss": 1.0949, + "margin_dpo/margin_mean": 245.49813842773438, + "margin_dpo/margin_std": 340.76123046875, + "step": 440 + }, + { + "KL/chosen_KL_mean": -490.50604248046875, + "KL/mean": -611.987548828125, + "KL/rejected_KL_mean": -733.4691162109375, + "KL/std": 356.1271667480469, + "epoch": 0.6475770925110133, + "fcm_dpo/beta": 0.00163905113004148, + "fcm_dpo/delta": 0.001527242362499237, + "fcm_dpo/margin": 242.963134765625, + "fcm_dpo/q_t": 0.41395312547683716, + "grad_norm": 46.89374923706055, + "learning_rate": 1.681227682404166e-07, + "logits/chosen": -0.7099902629852295, + "logits/rejected": -0.6972057223320007, + "logps/chosen": -551.3304443359375, + "logps/ref_chosen": -60.824440002441406, + "logps/ref_rejected": -96.47080993652344, + "logps/rejected": -829.93994140625, + "loss": 1.1424, + "margin_dpo/margin_mean": 242.963134765625, + "margin_dpo/margin_std": 429.1325988769531, + "step": 441 + }, + { + "KL/chosen_KL_mean": -414.8262939453125, + "KL/mean": -562.6405029296875, + "KL/rejected_KL_mean": -710.4547119140625, + "KL/std": 340.4912109375, + "epoch": 0.6490455212922174, + "fcm_dpo/beta": 0.0016239210963249207, + "fcm_dpo/delta": -0.08454307168722153, + "fcm_dpo/margin": 295.62847900390625, + "fcm_dpo/q_t": 0.3937837481498718, + "grad_norm": 32.3649787902832, + "learning_rate": 1.669113001300851e-07, + "logits/chosen": -0.6760904788970947, + "logits/rejected": -0.6695461273193359, + "logps/chosen": -461.8374938964844, + "logps/ref_chosen": -47.01121520996094, + "logps/ref_rejected": -76.53926086425781, + "logps/rejected": -786.9940185546875, + "loss": 1.0551, + "margin_dpo/margin_mean": 295.6284484863281, + "margin_dpo/margin_std": 383.97320556640625, + "step": 442 + }, + { + "KL/chosen_KL_mean": -478.28509521484375, + "KL/mean": -572.2210693359375, + "KL/rejected_KL_mean": -666.1571044921875, + "KL/std": 346.36370849609375, + "epoch": 0.6505139500734214, + "fcm_dpo/beta": 0.0016035648295655847, + "fcm_dpo/delta": -0.005727704148739576, + "fcm_dpo/margin": 187.87205505371094, + "fcm_dpo/q_t": 0.43321898579597473, + "grad_norm": 35.34192657470703, + "learning_rate": 1.6570202148426815e-07, + "logits/chosen": -0.6955731511116028, + "logits/rejected": -0.6718583106994629, + "logps/chosen": -549.55810546875, + "logps/ref_chosen": -71.27301788330078, + "logps/ref_rejected": -86.679931640625, + "logps/rejected": -752.8370361328125, + "loss": 1.2151, + "margin_dpo/margin_mean": 187.87205505371094, + "margin_dpo/margin_std": 419.6290283203125, + "step": 443 + }, + { + "KL/chosen_KL_mean": -469.6700439453125, + "KL/mean": -620.1004638671875, + "KL/rejected_KL_mean": -770.5308837890625, + "KL/std": 358.370849609375, + "epoch": 0.6519823788546255, + "fcm_dpo/beta": 0.0015793245984241366, + "fcm_dpo/delta": -0.07941662520170212, + "fcm_dpo/margin": 300.8608093261719, + "fcm_dpo/q_t": 0.3940558433532715, + "grad_norm": 29.70403480529785, + "learning_rate": 1.6449496416858282e-07, + "logits/chosen": -0.6584120988845825, + "logits/rejected": -0.6683436632156372, + "logps/chosen": -526.8837280273438, + "logps/ref_chosen": -57.213706970214844, + "logps/ref_rejected": -97.25489807128906, + "logps/rejected": -867.7857666015625, + "loss": 1.0553, + "margin_dpo/margin_mean": 300.8608093261719, + "margin_dpo/margin_std": 400.1059265136719, + "step": 444 + }, + { + "KL/chosen_KL_mean": -416.6427917480469, + "KL/mean": -557.0631103515625, + "KL/rejected_KL_mean": -697.4834594726562, + "KL/std": 289.13763427734375, + "epoch": 0.6534508076358296, + "fcm_dpo/beta": 0.0015705095138400793, + "fcm_dpo/delta": -0.04293816536664963, + "fcm_dpo/margin": 280.84063720703125, + "fcm_dpo/q_t": 0.3998865485191345, + "grad_norm": 34.561370849609375, + "learning_rate": 1.6329015999011182e-07, + "logits/chosen": -0.666840672492981, + "logits/rejected": -0.6581023931503296, + "logps/chosen": -483.9425964355469, + "logps/ref_chosen": -67.29979705810547, + "logps/ref_rejected": -92.68267059326172, + "logps/rejected": -790.1661376953125, + "loss": 1.067, + "margin_dpo/margin_mean": 280.84063720703125, + "margin_dpo/margin_std": 354.77947998046875, + "step": 445 + }, + { + "KL/chosen_KL_mean": -381.79669189453125, + "KL/mean": -531.6203002929688, + "KL/rejected_KL_mean": -681.4439086914062, + "KL/std": 308.4264221191406, + "epoch": 0.6549192364170338, + "fcm_dpo/beta": 0.0015590311959385872, + "fcm_dpo/delta": -0.07091644406318665, + "fcm_dpo/margin": 299.64715576171875, + "fcm_dpo/q_t": 0.39108988642692566, + "grad_norm": 32.68497085571289, + "learning_rate": 1.6208764069656578e-07, + "logits/chosen": -0.6791973114013672, + "logits/rejected": -0.6917370557785034, + "logps/chosen": -440.89520263671875, + "logps/ref_chosen": -59.098487854003906, + "logps/ref_rejected": -101.26419067382812, + "logps/rejected": -782.7080688476562, + "loss": 1.0346, + "margin_dpo/margin_mean": 299.64715576171875, + "margin_dpo/margin_std": 317.11669921875, + "step": 446 + }, + { + "KL/chosen_KL_mean": -396.80615234375, + "KL/mean": -554.9542236328125, + "KL/rejected_KL_mean": -713.1023559570312, + "KL/std": 362.07012939453125, + "epoch": 0.6563876651982379, + "fcm_dpo/beta": 0.0015180823393166065, + "fcm_dpo/delta": -0.08475878089666367, + "fcm_dpo/margin": 316.29620361328125, + "fcm_dpo/q_t": 0.3928487300872803, + "grad_norm": 32.39216613769531, + "learning_rate": 1.608874379754465e-07, + "logits/chosen": -0.7233532667160034, + "logits/rejected": -0.7404334545135498, + "logps/chosen": -452.8814697265625, + "logps/ref_chosen": -56.07533264160156, + "logps/ref_rejected": -98.69475555419922, + "logps/rejected": -811.797119140625, + "loss": 1.0442, + "margin_dpo/margin_mean": 316.2962341308594, + "margin_dpo/margin_std": 406.5645751953125, + "step": 447 + }, + { + "KL/chosen_KL_mean": -435.0650939941406, + "KL/mean": -581.727294921875, + "KL/rejected_KL_mean": -728.3895263671875, + "KL/std": 313.0533447265625, + "epoch": 0.657856093979442, + "fcm_dpo/beta": 0.001512328744865954, + "fcm_dpo/delta": -0.04575078934431076, + "fcm_dpo/margin": 293.324462890625, + "fcm_dpo/q_t": 0.398230642080307, + "grad_norm": 37.61946105957031, + "learning_rate": 1.5968958345321177e-07, + "logits/chosen": -0.6439417600631714, + "logits/rejected": -0.6513484716415405, + "logps/chosen": -495.0689392089844, + "logps/ref_chosen": -60.00384521484375, + "logps/ref_rejected": -102.26465606689453, + "logps/rejected": -830.6541748046875, + "loss": 1.058, + "margin_dpo/margin_mean": 293.324462890625, + "margin_dpo/margin_std": 352.77362060546875, + "step": 448 + }, + { + "KL/chosen_KL_mean": -438.35260009765625, + "KL/mean": -588.6976318359375, + "KL/rejected_KL_mean": -739.042724609375, + "KL/std": 370.13311767578125, + "epoch": 0.6593245227606461, + "fcm_dpo/beta": 0.0014895712956786156, + "fcm_dpo/delta": -0.05026708170771599, + "fcm_dpo/margin": 300.6900939941406, + "fcm_dpo/q_t": 0.4015531539916992, + "grad_norm": 27.254419326782227, + "learning_rate": 1.584941086944423e-07, + "logits/chosen": -0.6604284048080444, + "logits/rejected": -0.6543349623680115, + "logps/chosen": -505.8792419433594, + "logps/ref_chosen": -67.52661895751953, + "logps/ref_rejected": -88.59690856933594, + "logps/rejected": -827.6396484375, + "loss": 1.0842, + "margin_dpo/margin_mean": 300.6900939941406, + "margin_dpo/margin_std": 448.55859375, + "step": 449 + }, + { + "KL/chosen_KL_mean": -353.40374755859375, + "KL/mean": -518.1417236328125, + "KL/rejected_KL_mean": -682.8796997070312, + "KL/std": 330.11187744140625, + "epoch": 0.6607929515418502, + "fcm_dpo/beta": 0.001471104216761887, + "fcm_dpo/delta": -0.08891390264034271, + "fcm_dpo/margin": 329.4759216308594, + "fcm_dpo/q_t": 0.38630813360214233, + "grad_norm": 44.962730407714844, + "learning_rate": 1.573010452010098e-07, + "logits/chosen": -0.6974214315414429, + "logits/rejected": -0.7095401883125305, + "logps/chosen": -410.5118713378906, + "logps/ref_chosen": -57.10811996459961, + "logps/ref_rejected": -102.75494384765625, + "logps/rejected": -785.6346435546875, + "loss": 1.0136, + "margin_dpo/margin_mean": 329.4759521484375, + "margin_dpo/margin_std": 328.4858703613281, + "step": 450 + }, + { + "KL/chosen_KL_mean": -480.493408203125, + "KL/mean": -596.1444702148438, + "KL/rejected_KL_mean": -711.7955322265625, + "KL/std": 380.8516540527344, + "epoch": 0.6622613803230544, + "fcm_dpo/beta": 0.0014787260442972183, + "fcm_dpo/delta": 0.05972132831811905, + "fcm_dpo/margin": 231.30206298828125, + "fcm_dpo/q_t": 0.41907864809036255, + "grad_norm": 39.84464645385742, + "learning_rate": 1.5611042441124687e-07, + "logits/chosen": -0.7402889728546143, + "logits/rejected": -0.7134509086608887, + "logps/chosen": -538.9622192382812, + "logps/ref_chosen": -58.46883010864258, + "logps/ref_rejected": -72.92941284179688, + "logps/rejected": -784.7249145507812, + "loss": 1.1722, + "margin_dpo/margin_mean": 231.3020782470703, + "margin_dpo/margin_std": 440.70330810546875, + "step": 451 + }, + { + "KL/chosen_KL_mean": -340.0597229003906, + "KL/mean": -483.1123046875, + "KL/rejected_KL_mean": -626.1648559570312, + "KL/std": 296.316162109375, + "epoch": 0.6637298091042585, + "fcm_dpo/beta": 0.0014726007357239723, + "fcm_dpo/delta": -0.022314528003335, + "fcm_dpo/margin": 286.1051330566406, + "fcm_dpo/q_t": 0.400713711977005, + "grad_norm": 21.744396209716797, + "learning_rate": 1.549222776991186e-07, + "logits/chosen": -0.6544848680496216, + "logits/rejected": -0.6705133318901062, + "logps/chosen": -390.4502868652344, + "logps/ref_chosen": -50.39055252075195, + "logps/ref_rejected": -97.77142333984375, + "logps/rejected": -723.936279296875, + "loss": 1.0562, + "margin_dpo/margin_mean": 286.10516357421875, + "margin_dpo/margin_std": 303.95037841796875, + "step": 452 + }, + { + "KL/chosen_KL_mean": -392.181396484375, + "KL/mean": -527.95263671875, + "KL/rejected_KL_mean": -663.723876953125, + "KL/std": 302.70947265625, + "epoch": 0.6651982378854625, + "fcm_dpo/beta": 0.0014694023411720991, + "fcm_dpo/delta": 0.0009205006062984467, + "fcm_dpo/margin": 271.54254150390625, + "fcm_dpo/q_t": 0.40955957770347595, + "grad_norm": 30.068889617919922, + "learning_rate": 1.5373663637339584e-07, + "logits/chosen": -0.7128562331199646, + "logits/rejected": -0.6981433629989624, + "logps/chosen": -449.896240234375, + "logps/ref_chosen": -57.71485137939453, + "logps/ref_rejected": -82.20741271972656, + "logps/rejected": -745.9313354492188, + "loss": 1.0919, + "margin_dpo/margin_mean": 271.54254150390625, + "margin_dpo/margin_std": 364.5362548828125, + "step": 453 + }, + { + "KL/chosen_KL_mean": -482.7806701660156, + "KL/mean": -633.9759521484375, + "KL/rejected_KL_mean": -785.1713256835938, + "KL/std": 346.2369384765625, + "epoch": 0.6666666666666666, + "fcm_dpo/beta": 0.0014605964533984661, + "fcm_dpo/delta": -0.043758489191532135, + "fcm_dpo/margin": 302.390625, + "fcm_dpo/q_t": 0.39894014596939087, + "grad_norm": 31.32180404663086, + "learning_rate": 1.5255353167683017e-07, + "logits/chosen": -0.7373279333114624, + "logits/rejected": -0.725917637348175, + "logps/chosen": -543.726318359375, + "logps/ref_chosen": -60.945648193359375, + "logps/ref_rejected": -84.95079040527344, + "logps/rejected": -870.1220703125, + "loss": 1.067, + "margin_dpo/margin_mean": 302.390625, + "margin_dpo/margin_std": 393.43988037109375, + "step": 454 + }, + { + "KL/chosen_KL_mean": -409.9599609375, + "KL/mean": -590.746826171875, + "KL/rejected_KL_mean": -771.53369140625, + "KL/std": 357.16259765625, + "epoch": 0.6681350954478708, + "fcm_dpo/beta": 0.0014415888581424952, + "fcm_dpo/delta": -0.12789547443389893, + "fcm_dpo/margin": 361.5738220214844, + "fcm_dpo/q_t": 0.3820483982563019, + "grad_norm": 35.17242431640625, + "learning_rate": 1.5137299478533064e-07, + "logits/chosen": -0.6761902570724487, + "logits/rejected": -0.6945501565933228, + "logps/chosen": -454.8466491699219, + "logps/ref_chosen": -44.88671112060547, + "logps/ref_rejected": -115.30147552490234, + "logps/rejected": -886.835205078125, + "loss": 1.014, + "margin_dpo/margin_mean": 361.5738220214844, + "margin_dpo/margin_std": 407.70379638671875, + "step": 455 + }, + { + "KL/chosen_KL_mean": -441.02471923828125, + "KL/mean": -611.8526000976562, + "KL/rejected_KL_mean": -782.6804809570312, + "KL/std": 368.62628173828125, + "epoch": 0.6696035242290749, + "fcm_dpo/beta": 0.0014074372593313456, + "fcm_dpo/delta": -0.08486473560333252, + "fcm_dpo/margin": 341.6557312011719, + "fcm_dpo/q_t": 0.38988521695137024, + "grad_norm": 24.956411361694336, + "learning_rate": 1.5019505680714232e-07, + "logits/chosen": -0.7183812260627747, + "logits/rejected": -0.7420483827590942, + "logps/chosen": -498.0615234375, + "logps/ref_chosen": -57.036781311035156, + "logps/ref_rejected": -105.21784210205078, + "logps/rejected": -887.8983154296875, + "loss": 1.0206, + "margin_dpo/margin_mean": 341.65576171875, + "margin_dpo/margin_std": 363.93701171875, + "step": 456 + }, + { + "KL/chosen_KL_mean": -427.92364501953125, + "KL/mean": -595.2412719726562, + "KL/rejected_KL_mean": -762.558837890625, + "KL/std": 349.4745178222656, + "epoch": 0.671071953010279, + "fcm_dpo/beta": 0.0013823909685015678, + "fcm_dpo/delta": -0.06590519100427628, + "fcm_dpo/margin": 334.6352233886719, + "fcm_dpo/q_t": 0.39179015159606934, + "grad_norm": 29.466060638427734, + "learning_rate": 1.4901974878202627e-07, + "logits/chosen": -0.6991932392120361, + "logits/rejected": -0.7005044221878052, + "logps/chosen": -482.16619873046875, + "logps/ref_chosen": -54.24253845214844, + "logps/ref_rejected": -85.10956573486328, + "logps/rejected": -847.66845703125, + "loss": 1.0307, + "margin_dpo/margin_mean": 334.6352233886719, + "margin_dpo/margin_std": 347.06805419921875, + "step": 457 + }, + { + "KL/chosen_KL_mean": -452.7229309082031, + "KL/mean": -606.5921020507812, + "KL/rejected_KL_mean": -760.4613037109375, + "KL/std": 326.22540283203125, + "epoch": 0.6725403817914831, + "fcm_dpo/beta": 0.001369113102555275, + "fcm_dpo/delta": -0.022949304431676865, + "fcm_dpo/margin": 307.73828125, + "fcm_dpo/q_t": 0.4038897454738617, + "grad_norm": 30.682571411132812, + "learning_rate": 1.4784710168044212e-07, + "logits/chosen": -0.751007616519928, + "logits/rejected": -0.7468206286430359, + "logps/chosen": -508.1318359375, + "logps/ref_chosen": -55.40888214111328, + "logps/ref_rejected": -97.68325805664062, + "logps/rejected": -858.14453125, + "loss": 1.0701, + "margin_dpo/margin_mean": 307.7383117675781, + "margin_dpo/margin_std": 368.6239929199219, + "step": 458 + }, + { + "KL/chosen_KL_mean": -485.3470458984375, + "KL/mean": -654.956298828125, + "KL/rejected_KL_mean": -824.5655517578125, + "KL/std": 385.02587890625, + "epoch": 0.6740088105726872, + "fcm_dpo/beta": 0.001356898806989193, + "fcm_dpo/delta": -0.0636778175830841, + "fcm_dpo/margin": 339.2184753417969, + "fcm_dpo/q_t": 0.3953377604484558, + "grad_norm": 32.14663314819336, + "learning_rate": 1.466771464027316e-07, + "logits/chosen": -0.7115896940231323, + "logits/rejected": -0.7303779125213623, + "logps/chosen": -531.904541015625, + "logps/ref_chosen": -46.55748748779297, + "logps/ref_rejected": -86.16854095458984, + "logps/rejected": -910.7340698242188, + "loss": 1.0571, + "margin_dpo/margin_mean": 339.218505859375, + "margin_dpo/margin_std": 428.857177734375, + "step": 459 + }, + { + "KL/chosen_KL_mean": -530.1577758789062, + "KL/mean": -716.4615478515625, + "KL/rejected_KL_mean": -902.7651977539062, + "KL/std": 376.7371826171875, + "epoch": 0.6754772393538914, + "fcm_dpo/beta": 0.001337511232122779, + "fcm_dpo/delta": -0.10346446931362152, + "fcm_dpo/margin": 372.607421875, + "fcm_dpo/q_t": 0.3863397240638733, + "grad_norm": 35.66582107543945, + "learning_rate": 1.4550991377830423e-07, + "logits/chosen": -0.7767213582992554, + "logits/rejected": -0.8095457553863525, + "logps/chosen": -581.792724609375, + "logps/ref_chosen": -51.63489532470703, + "logps/ref_rejected": -104.11935424804688, + "logps/rejected": -1006.884521484375, + "loss": 1.0181, + "margin_dpo/margin_mean": 372.607421875, + "margin_dpo/margin_std": 416.0313415527344, + "step": 460 + }, + { + "KL/chosen_KL_mean": -561.940673828125, + "KL/mean": -709.7664794921875, + "KL/rejected_KL_mean": -857.5922241210938, + "KL/std": 384.3741149902344, + "epoch": 0.6769456681350955, + "fcm_dpo/beta": 0.001335039036348462, + "fcm_dpo/delta": 0.00526130385696888, + "fcm_dpo/margin": 295.65155029296875, + "fcm_dpo/q_t": 0.4116092324256897, + "grad_norm": 29.899106979370117, + "learning_rate": 1.4434543456482518e-07, + "logits/chosen": -0.7882189750671387, + "logits/rejected": -0.8021144270896912, + "logps/chosen": -617.1226196289062, + "logps/ref_chosen": -55.18195724487305, + "logps/ref_rejected": -86.47689819335938, + "logps/rejected": -944.069091796875, + "loss": 1.1054, + "margin_dpo/margin_mean": 295.65155029296875, + "margin_dpo/margin_std": 430.65924072265625, + "step": 461 + }, + { + "KL/chosen_KL_mean": -578.3634033203125, + "KL/mean": -700.3892822265625, + "KL/rejected_KL_mean": -822.4152221679688, + "KL/std": 376.43658447265625, + "epoch": 0.6784140969162996, + "fcm_dpo/beta": 0.0013445301447063684, + "fcm_dpo/delta": 0.07435386627912521, + "fcm_dpo/margin": 244.05184936523438, + "fcm_dpo/q_t": 0.42686232924461365, + "grad_norm": 39.60346984863281, + "learning_rate": 1.4318373944740484e-07, + "logits/chosen": -0.8968935012817383, + "logits/rejected": -0.8908392190933228, + "logps/chosen": -648.2914428710938, + "logps/ref_chosen": -69.92803192138672, + "logps/ref_rejected": -78.84111022949219, + "logps/rejected": -901.25634765625, + "loss": 1.1632, + "margin_dpo/margin_mean": 244.05184936523438, + "margin_dpo/margin_std": 432.3650817871094, + "step": 462 + }, + { + "KL/chosen_KL_mean": -589.15576171875, + "KL/mean": -753.0881958007812, + "KL/rejected_KL_mean": -917.0206298828125, + "KL/std": 411.322021484375, + "epoch": 0.6798825256975036, + "fcm_dpo/beta": 0.0013473678845912218, + "fcm_dpo/delta": -0.04382871836423874, + "fcm_dpo/margin": 327.86480712890625, + "fcm_dpo/q_t": 0.401495099067688, + "grad_norm": 44.926334381103516, + "learning_rate": 1.4202485903778976e-07, + "logits/chosen": -0.8459576964378357, + "logits/rejected": -0.8583585023880005, + "logps/chosen": -644.43017578125, + "logps/ref_chosen": -55.27437210083008, + "logps/ref_rejected": -89.02497863769531, + "logps/rejected": -1006.0455932617188, + "loss": 1.0814, + "margin_dpo/margin_mean": 327.86480712890625, + "margin_dpo/margin_std": 460.8346862792969, + "step": 463 + }, + { + "KL/chosen_KL_mean": -577.5390625, + "KL/mean": -824.5910034179688, + "KL/rejected_KL_mean": -1071.6429443359375, + "KL/std": 482.9642333984375, + "epoch": 0.6813509544787077, + "fcm_dpo/beta": 0.0012808447936549783, + "fcm_dpo/delta": -0.25145474076271057, + "fcm_dpo/margin": 494.1038818359375, + "fcm_dpo/q_t": 0.35675048828125, + "grad_norm": 50.258697509765625, + "learning_rate": 1.4086882387355658e-07, + "logits/chosen": -0.8322213888168335, + "logits/rejected": -0.8996630311012268, + "logps/chosen": -628.4513549804688, + "logps/ref_chosen": -50.91230010986328, + "logps/ref_rejected": -102.4893798828125, + "logps/rejected": -1174.13232421875, + "loss": 0.9347, + "margin_dpo/margin_mean": 494.1038818359375, + "margin_dpo/margin_std": 484.71392822265625, + "step": 464 + }, + { + "KL/chosen_KL_mean": -601.5679321289062, + "KL/mean": -797.656982421875, + "KL/rejected_KL_mean": -993.74609375, + "KL/std": 499.0876159667969, + "epoch": 0.6828193832599119, + "fcm_dpo/beta": 0.0012595669832080603, + "fcm_dpo/delta": -0.09877628087997437, + "fcm_dpo/margin": 392.17816162109375, + "fcm_dpo/q_t": 0.3856554627418518, + "grad_norm": 39.39644241333008, + "learning_rate": 1.3971566441730714e-07, + "logits/chosen": -0.8465057015419006, + "logits/rejected": -0.8691214323043823, + "logps/chosen": -661.684814453125, + "logps/ref_chosen": -60.116851806640625, + "logps/ref_rejected": -113.94602966308594, + "logps/rejected": -1107.692138671875, + "loss": 1.0558, + "margin_dpo/margin_mean": 392.17816162109375, + "margin_dpo/margin_std": 528.9464111328125, + "step": 465 + }, + { + "KL/chosen_KL_mean": -660.0999755859375, + "KL/mean": -833.5242919921875, + "KL/rejected_KL_mean": -1006.94873046875, + "KL/std": 460.6139221191406, + "epoch": 0.684287812041116, + "fcm_dpo/beta": 0.0012364451540634036, + "fcm_dpo/delta": -0.03083794191479683, + "fcm_dpo/margin": 346.8487854003906, + "fcm_dpo/q_t": 0.4021310806274414, + "grad_norm": 38.076751708984375, + "learning_rate": 1.3856541105586545e-07, + "logits/chosen": -0.8880220651626587, + "logits/rejected": -0.8946011662483215, + "logps/chosen": -713.0208740234375, + "logps/ref_chosen": -52.920921325683594, + "logps/ref_rejected": -90.3154296875, + "logps/rejected": -1097.26416015625, + "loss": 1.0962, + "margin_dpo/margin_mean": 346.84881591796875, + "margin_dpo/margin_std": 507.0184326171875, + "step": 466 + }, + { + "KL/chosen_KL_mean": -823.9940185546875, + "KL/mean": -1010.909912109375, + "KL/rejected_KL_mean": -1197.8258056640625, + "KL/std": 614.4384155273438, + "epoch": 0.6857562408223201, + "fcm_dpo/beta": 0.0012187270913273096, + "fcm_dpo/delta": -0.060437288135290146, + "fcm_dpo/margin": 373.8318786621094, + "fcm_dpo/q_t": 0.40229350328445435, + "grad_norm": 62.107765197753906, + "learning_rate": 1.3741809409947729e-07, + "logits/chosen": -0.9601616859436035, + "logits/rejected": -0.9515029788017273, + "logps/chosen": -902.7098388671875, + "logps/ref_chosen": -78.7158203125, + "logps/ref_rejected": -102.86019897460938, + "logps/rejected": -1300.68603515625, + "loss": 1.1519, + "margin_dpo/margin_mean": 373.8318786621094, + "margin_dpo/margin_std": 707.5423583984375, + "step": 467 + }, + { + "KL/chosen_KL_mean": -662.00439453125, + "KL/mean": -897.078857421875, + "KL/rejected_KL_mean": -1132.1533203125, + "KL/std": 548.8661499023438, + "epoch": 0.6872246696035242, + "fcm_dpo/beta": 0.001194630516692996, + "fcm_dpo/delta": -0.17134541273117065, + "fcm_dpo/margin": 470.14892578125, + "fcm_dpo/q_t": 0.3789057433605194, + "grad_norm": 45.27859878540039, + "learning_rate": 1.362737437810114e-07, + "logits/chosen": -0.9165079593658447, + "logits/rejected": -0.9324535131454468, + "logps/chosen": -731.9397583007812, + "logps/ref_chosen": -69.93536376953125, + "logps/ref_rejected": -101.02880859375, + "logps/rejected": -1233.18212890625, + "loss": 1.0136, + "margin_dpo/margin_mean": 470.14892578125, + "margin_dpo/margin_std": 618.6029663085938, + "step": 468 + }, + { + "KL/chosen_KL_mean": -682.61962890625, + "KL/mean": -917.685546875, + "KL/rejected_KL_mean": -1152.75146484375, + "KL/std": 480.97088623046875, + "epoch": 0.6886930983847284, + "fcm_dpo/beta": 0.0011510958429425955, + "fcm_dpo/delta": -0.150983989238739, + "fcm_dpo/margin": 470.13177490234375, + "fcm_dpo/q_t": 0.37772035598754883, + "grad_norm": 44.61856460571289, + "learning_rate": 1.351323902551631e-07, + "logits/chosen": -0.9298604726791382, + "logits/rejected": -0.9497323036193848, + "logps/chosen": -750.744384765625, + "logps/ref_chosen": -68.12469482421875, + "logps/ref_rejected": -104.78640747070312, + "logps/rejected": -1257.537841796875, + "loss": 1.0078, + "margin_dpo/margin_mean": 470.13177490234375, + "margin_dpo/margin_std": 545.7459716796875, + "step": 469 + }, + { + "KL/chosen_KL_mean": -585.1773071289062, + "KL/mean": -789.4063720703125, + "KL/rejected_KL_mean": -993.6353759765625, + "KL/std": 501.39984130859375, + "epoch": 0.6901615271659325, + "fcm_dpo/beta": 0.0011422440875321627, + "fcm_dpo/delta": -0.06988134980201721, + "fcm_dpo/margin": 408.4580993652344, + "fcm_dpo/q_t": 0.39377570152282715, + "grad_norm": 27.795562744140625, + "learning_rate": 1.339940635976592e-07, + "logits/chosen": -0.9022126197814941, + "logits/rejected": -0.9164737462997437, + "logps/chosen": -628.96923828125, + "logps/ref_chosen": -43.791927337646484, + "logps/ref_rejected": -82.70285034179688, + "logps/rejected": -1076.3382568359375, + "loss": 1.0646, + "margin_dpo/margin_mean": 408.45806884765625, + "margin_dpo/margin_std": 547.7627563476562, + "step": 470 + }, + { + "KL/chosen_KL_mean": -710.951416015625, + "KL/mean": -879.2623291015625, + "KL/rejected_KL_mean": -1047.5733642578125, + "KL/std": 499.705810546875, + "epoch": 0.6916299559471366, + "fcm_dpo/beta": 0.0011305524967610836, + "fcm_dpo/delta": 0.019698694348335266, + "fcm_dpo/margin": 336.62200927734375, + "fcm_dpo/q_t": 0.41612815856933594, + "grad_norm": 53.5421142578125, + "learning_rate": 1.3285879380446563e-07, + "logits/chosen": -0.9983842968940735, + "logits/rejected": -1.0077528953552246, + "logps/chosen": -774.2908935546875, + "logps/ref_chosen": -63.33952331542969, + "logps/ref_rejected": -83.61048126220703, + "logps/rejected": -1131.183837890625, + "loss": 1.1312, + "margin_dpo/margin_mean": 336.62200927734375, + "margin_dpo/margin_std": 550.3225708007812, + "step": 471 + }, + { + "KL/chosen_KL_mean": -691.177001953125, + "KL/mean": -886.953857421875, + "KL/rejected_KL_mean": -1082.730712890625, + "KL/std": 583.6131591796875, + "epoch": 0.6930983847283406, + "fcm_dpo/beta": 0.0011264560744166374, + "fcm_dpo/delta": -0.04338788241147995, + "fcm_dpo/margin": 391.5537109375, + "fcm_dpo/q_t": 0.40537628531455994, + "grad_norm": 36.24667739868164, + "learning_rate": 1.317266107909975e-07, + "logits/chosen": -0.9884932041168213, + "logits/rejected": -0.9719465970993042, + "logps/chosen": -774.8431396484375, + "logps/ref_chosen": -83.66610717773438, + "logps/ref_rejected": -117.20919799804688, + "logps/rejected": -1199.93994140625, + "loss": 1.1093, + "margin_dpo/margin_mean": 391.5537109375, + "margin_dpo/margin_std": 631.884033203125, + "step": 472 + }, + { + "KL/chosen_KL_mean": -832.517578125, + "KL/mean": -925.3512573242188, + "KL/rejected_KL_mean": -1018.184814453125, + "KL/std": 606.7120361328125, + "epoch": 0.6945668135095447, + "fcm_dpo/beta": 0.0011280329199507833, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 185.667236328125, + "fcm_dpo/q_t": 0.4554038643836975, + "grad_norm": 122.36241912841797, + "learning_rate": 1.3059754439133002e-07, + "logits/chosen": -0.988477349281311, + "logits/rejected": -0.9607559442520142, + "logps/chosen": -896.0145874023438, + "logps/ref_chosen": -63.49696731567383, + "logps/ref_rejected": -81.14657592773438, + "logps/rejected": -1099.3314208984375, + "loss": 1.3563, + "margin_dpo/margin_mean": 185.667236328125, + "margin_dpo/margin_std": 783.8460693359375, + "step": 473 + }, + { + "KL/chosen_KL_mean": -654.71240234375, + "KL/mean": -823.9849853515625, + "KL/rejected_KL_mean": -993.2574462890625, + "KL/std": 526.5281982421875, + "epoch": 0.6960352422907489, + "fcm_dpo/beta": 0.0011190182995051146, + "fcm_dpo/delta": -0.08055973798036575, + "fcm_dpo/margin": 338.5450134277344, + "fcm_dpo/q_t": 0.4132459759712219, + "grad_norm": 37.246849060058594, + "learning_rate": 1.2947162435741277e-07, + "logits/chosen": -0.92474365234375, + "logits/rejected": -0.9312121868133545, + "logps/chosen": -707.3243408203125, + "logps/ref_chosen": -52.6119384765625, + "logps/ref_rejected": -90.08041381835938, + "logps/rejected": -1083.337890625, + "loss": 1.1495, + "margin_dpo/margin_mean": 338.5450439453125, + "margin_dpo/margin_std": 580.7293701171875, + "step": 474 + }, + { + "KL/chosen_KL_mean": -465.27777099609375, + "KL/mean": -679.0506591796875, + "KL/rejected_KL_mean": -892.8236083984375, + "KL/std": 421.060546875, + "epoch": 0.697503671071953, + "fcm_dpo/beta": 0.0010959157953038812, + "fcm_dpo/delta": -0.0721563771367073, + "fcm_dpo/margin": 427.5458679199219, + "fcm_dpo/q_t": 0.39104607701301575, + "grad_norm": 51.85737991333008, + "learning_rate": 1.2834888035828596e-07, + "logits/chosen": -0.9467837810516357, + "logits/rejected": -0.977871835231781, + "logps/chosen": -507.7729797363281, + "logps/ref_chosen": -42.49519348144531, + "logps/ref_rejected": -90.06294250488281, + "logps/rejected": -982.8865966796875, + "loss": 1.0281, + "margin_dpo/margin_mean": 427.5458984375, + "margin_dpo/margin_std": 449.01556396484375, + "step": 475 + }, + { + "KL/chosen_KL_mean": -574.3722534179688, + "KL/mean": -739.1430053710938, + "KL/rejected_KL_mean": -903.913818359375, + "KL/std": 451.5216064453125, + "epoch": 0.6989720998531571, + "fcm_dpo/beta": 0.0010992654133588076, + "fcm_dpo/delta": 0.03918338194489479, + "fcm_dpo/margin": 329.5415344238281, + "fcm_dpo/q_t": 0.41674578189849854, + "grad_norm": 79.2264175415039, + "learning_rate": 1.2722934197929802e-07, + "logits/chosen": -0.9211336970329285, + "logits/rejected": -0.9354342222213745, + "logps/chosen": -617.3216552734375, + "logps/ref_chosen": -42.94938278198242, + "logps/ref_rejected": -73.71023559570312, + "logps/rejected": -977.6240234375, + "loss": 1.1116, + "margin_dpo/margin_mean": 329.54150390625, + "margin_dpo/margin_std": 444.04339599609375, + "step": 476 + }, + { + "KL/chosen_KL_mean": -601.9998779296875, + "KL/mean": -778.6286010742188, + "KL/rejected_KL_mean": -955.2572021484375, + "KL/std": 492.07171630859375, + "epoch": 0.7004405286343612, + "fcm_dpo/beta": 0.0011070938780903816, + "fcm_dpo/delta": 0.009110800921916962, + "fcm_dpo/margin": 353.25738525390625, + "fcm_dpo/q_t": 0.4111800789833069, + "grad_norm": 36.04801940917969, + "learning_rate": 1.2611303872132631e-07, + "logits/chosen": -0.9658732414245605, + "logits/rejected": -0.9317635297775269, + "logps/chosen": -672.7725219726562, + "logps/ref_chosen": -70.77261352539062, + "logps/ref_rejected": -76.13737487792969, + "logps/rejected": -1031.3946533203125, + "loss": 1.1286, + "margin_dpo/margin_mean": 353.2573547363281, + "margin_dpo/margin_std": 582.523193359375, + "step": 477 + }, + { + "KL/chosen_KL_mean": -485.2200927734375, + "KL/mean": -681.3143920898438, + "KL/rejected_KL_mean": -877.40869140625, + "KL/std": 427.36639404296875, + "epoch": 0.7019089574155654, + "fcm_dpo/beta": 0.0011018933728337288, + "fcm_dpo/delta": -0.0336417555809021, + "fcm_dpo/margin": 392.1885986328125, + "fcm_dpo/q_t": 0.40104353427886963, + "grad_norm": 55.68071365356445, + "learning_rate": 1.2500000000000005e-07, + "logits/chosen": -0.8429279327392578, + "logits/rejected": -0.8667222857475281, + "logps/chosen": -526.66064453125, + "logps/ref_chosen": -41.440513610839844, + "logps/ref_rejected": -85.36196899414062, + "logps/rejected": -962.7706298828125, + "loss": 1.0694, + "margin_dpo/margin_mean": 392.1885986328125, + "margin_dpo/margin_std": 496.35369873046875, + "step": 478 + }, + { + "KL/chosen_KL_mean": -646.2515869140625, + "KL/mean": -834.347900390625, + "KL/rejected_KL_mean": -1022.4442138671875, + "KL/std": 526.5473022460938, + "epoch": 0.7033773861967695, + "fcm_dpo/beta": 0.0011019103694707155, + "fcm_dpo/delta": -0.015925616025924683, + "fcm_dpo/margin": 376.192626953125, + "fcm_dpo/q_t": 0.4086768627166748, + "grad_norm": 31.531246185302734, + "learning_rate": 1.2389025514492456e-07, + "logits/chosen": -0.8888028860092163, + "logits/rejected": -0.9207658767700195, + "logps/chosen": -700.1594848632812, + "logps/ref_chosen": -53.907920837402344, + "logps/ref_rejected": -95.1163330078125, + "logps/rejected": -1117.560546875, + "loss": 1.1182, + "margin_dpo/margin_mean": 376.192626953125, + "margin_dpo/margin_std": 613.9232177734375, + "step": 479 + }, + { + "KL/chosen_KL_mean": -766.6184692382812, + "KL/mean": -914.756591796875, + "KL/rejected_KL_mean": -1062.894775390625, + "KL/std": 496.72235107421875, + "epoch": 0.7048458149779736, + "fcm_dpo/beta": 0.0010890522971749306, + "fcm_dpo/delta": -0.04379244148731232, + "fcm_dpo/margin": 296.2763366699219, + "fcm_dpo/q_t": 0.42761245369911194, + "grad_norm": 53.24702453613281, + "learning_rate": 1.227838333989088e-07, + "logits/chosen": -0.9728115797042847, + "logits/rejected": -0.9693245887756348, + "logps/chosen": -825.3011474609375, + "logps/ref_chosen": -58.682701110839844, + "logps/ref_rejected": -82.93248748779297, + "logps/rejected": -1145.827392578125, + "loss": 1.1792, + "margin_dpo/margin_mean": 296.27630615234375, + "margin_dpo/margin_std": 538.973876953125, + "step": 480 + }, + { + "KL/chosen_KL_mean": -635.2621459960938, + "KL/mean": -866.452880859375, + "KL/rejected_KL_mean": -1097.6435546875, + "KL/std": 512.14208984375, + "epoch": 0.7063142437591777, + "fcm_dpo/beta": 0.0010695490054786205, + "fcm_dpo/delta": -0.09937489032745361, + "fcm_dpo/margin": 462.38153076171875, + "fcm_dpo/q_t": 0.38825610280036926, + "grad_norm": 51.05171203613281, + "learning_rate": 1.2168076391719489e-07, + "logits/chosen": -0.9634227752685547, + "logits/rejected": -0.9947628974914551, + "logps/chosen": -690.2264404296875, + "logps/ref_chosen": -54.964271545410156, + "logps/ref_rejected": -92.42044067382812, + "logps/rejected": -1190.0640869140625, + "loss": 1.0295, + "margin_dpo/margin_mean": 462.38153076171875, + "margin_dpo/margin_std": 550.9169311523438, + "step": 481 + }, + { + "KL/chosen_KL_mean": -712.7791137695312, + "KL/mean": -819.776123046875, + "KL/rejected_KL_mean": -926.7730102539062, + "KL/std": 536.703857421875, + "epoch": 0.7077826725403817, + "fcm_dpo/beta": 0.001071346690878272, + "fcm_dpo/delta": 0.07841724902391434, + "fcm_dpo/margin": 213.99391174316406, + "fcm_dpo/q_t": 0.4453909397125244, + "grad_norm": 65.0921859741211, + "learning_rate": 1.2058107576668938e-07, + "logits/chosen": -0.8759874105453491, + "logits/rejected": -0.8658995032310486, + "logps/chosen": -780.3325805664062, + "logps/ref_chosen": -67.553466796875, + "logps/ref_rejected": -87.58953857421875, + "logps/rejected": -1014.362548828125, + "loss": 1.2719, + "margin_dpo/margin_mean": 213.993896484375, + "margin_dpo/margin_std": 620.146484375, + "step": 482 + }, + { + "KL/chosen_KL_mean": -646.241455078125, + "KL/mean": -885.39501953125, + "KL/rejected_KL_mean": -1124.548583984375, + "KL/std": 534.4652099609375, + "epoch": 0.7092511013215859, + "fcm_dpo/beta": 0.0010598013177514076, + "fcm_dpo/delta": -0.11290125548839569, + "fcm_dpo/margin": 478.30712890625, + "fcm_dpo/q_t": 0.38709717988967896, + "grad_norm": 32.96552276611328, + "learning_rate": 1.194847979251979e-07, + "logits/chosen": -0.9770244359970093, + "logits/rejected": -0.9915624856948853, + "logps/chosen": -709.5712890625, + "logps/ref_chosen": -63.32981872558594, + "logps/ref_rejected": -95.78697204589844, + "logps/rejected": -1220.3355712890625, + "loss": 1.0269, + "margin_dpo/margin_mean": 478.30712890625, + "margin_dpo/margin_std": 586.1543579101562, + "step": 483 + }, + { + "KL/chosen_KL_mean": -553.4284057617188, + "KL/mean": -776.1009521484375, + "KL/rejected_KL_mean": -998.7734375, + "KL/std": 526.0048828125, + "epoch": 0.71071953010279, + "fcm_dpo/beta": 0.001048530451953411, + "fcm_dpo/delta": -0.07025562971830368, + "fcm_dpo/margin": 445.3449401855469, + "fcm_dpo/q_t": 0.39412179589271545, + "grad_norm": 53.46791458129883, + "learning_rate": 1.1839195928066101e-07, + "logits/chosen": -0.9660162329673767, + "logits/rejected": -0.993166983127594, + "logps/chosen": -612.5665283203125, + "logps/ref_chosen": -59.13812255859375, + "logps/ref_rejected": -84.37144470214844, + "logps/rejected": -1083.144775390625, + "loss": 1.0446, + "margin_dpo/margin_mean": 445.3449401855469, + "margin_dpo/margin_std": 531.8111572265625, + "step": 484 + }, + { + "KL/chosen_KL_mean": -588.0751342773438, + "KL/mean": -801.0594482421875, + "KL/rejected_KL_mean": -1014.043701171875, + "KL/std": 530.8162841796875, + "epoch": 0.7121879588839941, + "fcm_dpo/beta": 0.001037056790664792, + "fcm_dpo/delta": -0.043720267713069916, + "fcm_dpo/margin": 425.9684753417969, + "fcm_dpo/q_t": 0.40099409222602844, + "grad_norm": 34.87718200683594, + "learning_rate": 1.1730258863039347e-07, + "logits/chosen": -0.8909621238708496, + "logits/rejected": -0.9123867750167847, + "logps/chosen": -646.9246826171875, + "logps/ref_chosen": -58.849571228027344, + "logps/ref_rejected": -103.36408233642578, + "logps/rejected": -1117.40771484375, + "loss": 1.0789, + "margin_dpo/margin_mean": 425.968505859375, + "margin_dpo/margin_std": 593.173583984375, + "step": 485 + }, + { + "KL/chosen_KL_mean": -691.0508422851562, + "KL/mean": -929.311279296875, + "KL/rejected_KL_mean": -1167.5716552734375, + "KL/std": 593.0984497070312, + "epoch": 0.7136563876651982, + "fcm_dpo/beta": 0.0010174668859690428, + "fcm_dpo/delta": -0.08924780786037445, + "fcm_dpo/margin": 476.5207824707031, + "fcm_dpo/q_t": 0.39317959547042847, + "grad_norm": 46.1490364074707, + "learning_rate": 1.1621671468032493e-07, + "logits/chosen": -0.9853001832962036, + "logits/rejected": -0.999464750289917, + "logps/chosen": -746.310546875, + "logps/ref_chosen": -55.25966262817383, + "logps/ref_rejected": -92.13936614990234, + "logps/rejected": -1259.7109375, + "loss": 1.0787, + "margin_dpo/margin_mean": 476.5207824707031, + "margin_dpo/margin_std": 722.4910278320312, + "step": 486 + }, + { + "KL/chosen_KL_mean": -745.9581298828125, + "KL/mean": -923.1339111328125, + "KL/rejected_KL_mean": -1100.3096923828125, + "KL/std": 566.0206298828125, + "epoch": 0.7151248164464024, + "fcm_dpo/beta": 0.0010237495880573988, + "fcm_dpo/delta": 0.03809621185064316, + "fcm_dpo/margin": 354.3516845703125, + "fcm_dpo/q_t": 0.4159180819988251, + "grad_norm": 39.361175537109375, + "learning_rate": 1.1513436604424378e-07, + "logits/chosen": -1.0012881755828857, + "logits/rejected": -1.0079126358032227, + "logps/chosen": -799.0213623046875, + "logps/ref_chosen": -53.06330871582031, + "logps/ref_rejected": -92.41883087158203, + "logps/rejected": -1192.728515625, + "loss": 1.136, + "margin_dpo/margin_mean": 354.3516845703125, + "margin_dpo/margin_std": 571.5254516601562, + "step": 487 + }, + { + "KL/chosen_KL_mean": -629.125732421875, + "KL/mean": -814.207275390625, + "KL/rejected_KL_mean": -999.2886962890625, + "KL/std": 508.64453125, + "epoch": 0.7165932452276065, + "fcm_dpo/beta": 0.0010283133015036583, + "fcm_dpo/delta": 0.019743794575333595, + "fcm_dpo/margin": 370.1629943847656, + "fcm_dpo/q_t": 0.41300255060195923, + "grad_norm": 33.7827033996582, + "learning_rate": 1.1405557124304335e-07, + "logits/chosen": -0.9368076324462891, + "logits/rejected": -0.9440141916275024, + "logps/chosen": -681.3538818359375, + "logps/ref_chosen": -52.22815704345703, + "logps/ref_rejected": -84.00656127929688, + "logps/rejected": -1083.2952880859375, + "loss": 1.1032, + "margin_dpo/margin_mean": 370.16302490234375, + "margin_dpo/margin_std": 498.775390625, + "step": 488 + }, + { + "KL/chosen_KL_mean": -575.0646362304688, + "KL/mean": -758.0647583007812, + "KL/rejected_KL_mean": -941.0648803710938, + "KL/std": 495.6026611328125, + "epoch": 0.7180616740088106, + "fcm_dpo/beta": 0.0010333817917853594, + "fcm_dpo/delta": 0.022133469581604004, + "fcm_dpo/margin": 366.0002136230469, + "fcm_dpo/q_t": 0.4154972434043884, + "grad_norm": 27.7478084564209, + "learning_rate": 1.1298035870396985e-07, + "logits/chosen": -0.9634197354316711, + "logits/rejected": -0.9643290042877197, + "logps/chosen": -631.0543212890625, + "logps/ref_chosen": -55.989627838134766, + "logps/ref_rejected": -79.39812469482422, + "logps/rejected": -1020.4630126953125, + "loss": 1.1184, + "margin_dpo/margin_mean": 366.00018310546875, + "margin_dpo/margin_std": 550.1982421875, + "step": 489 + }, + { + "KL/chosen_KL_mean": -674.772216796875, + "KL/mean": -867.3712158203125, + "KL/rejected_KL_mean": -1059.97021484375, + "KL/std": 602.034912109375, + "epoch": 0.7195301027900147, + "fcm_dpo/beta": 0.001032583648338914, + "fcm_dpo/delta": 0.00223751924932003, + "fcm_dpo/margin": 385.1979064941406, + "fcm_dpo/q_t": 0.41282522678375244, + "grad_norm": 40.58791732788086, + "learning_rate": 1.1190875675987355e-07, + "logits/chosen": -0.9857407808303833, + "logits/rejected": -1.026228427886963, + "logps/chosen": -727.138671875, + "logps/ref_chosen": -52.36639404296875, + "logps/ref_rejected": -110.4090576171875, + "logps/rejected": -1170.379150390625, + "loss": 1.1483, + "margin_dpo/margin_mean": 385.19793701171875, + "margin_dpo/margin_std": 706.33056640625, + "step": 490 + }, + { + "KL/chosen_KL_mean": -634.9647216796875, + "KL/mean": -764.6456298828125, + "KL/rejected_KL_mean": -894.3265380859375, + "KL/std": 501.4613952636719, + "epoch": 0.7209985315712188, + "fcm_dpo/beta": 0.0010518557392060757, + "fcm_dpo/delta": 0.13058799505233765, + "fcm_dpo/margin": 259.36181640625, + "fcm_dpo/q_t": 0.4379756450653076, + "grad_norm": 33.82534408569336, + "learning_rate": 1.1084079364846241e-07, + "logits/chosen": -0.9741504192352295, + "logits/rejected": -0.9680135250091553, + "logps/chosen": -695.0809936523438, + "logps/ref_chosen": -60.11626434326172, + "logps/ref_rejected": -73.27278900146484, + "logps/rejected": -967.599365234375, + "loss": 1.1963, + "margin_dpo/margin_mean": 259.3618469238281, + "margin_dpo/margin_std": 497.3216552734375, + "step": 491 + }, + { + "KL/chosen_KL_mean": -663.180419921875, + "KL/mean": -787.96923828125, + "KL/rejected_KL_mean": -912.758056640625, + "KL/std": 512.7420043945312, + "epoch": 0.7224669603524229, + "fcm_dpo/beta": 0.0010776289273053408, + "fcm_dpo/delta": 0.13482382893562317, + "fcm_dpo/margin": 249.57763671875, + "fcm_dpo/q_t": 0.44000089168548584, + "grad_norm": 44.77201843261719, + "learning_rate": 1.097764975115576e-07, + "logits/chosen": -1.0639835596084595, + "logits/rejected": -1.0435137748718262, + "logps/chosen": -717.174560546875, + "logps/ref_chosen": -53.994178771972656, + "logps/ref_rejected": -72.65962219238281, + "logps/rejected": -985.4176635742188, + "loss": 1.23, + "margin_dpo/margin_mean": 249.57763671875, + "margin_dpo/margin_std": 586.6539306640625, + "step": 492 + }, + { + "KL/chosen_KL_mean": -686.341552734375, + "KL/mean": -834.3265380859375, + "KL/rejected_KL_mean": -982.3115234375, + "KL/std": 570.59423828125, + "epoch": 0.723935389133627, + "fcm_dpo/beta": 0.0010814403649419546, + "fcm_dpo/delta": -0.023665668442845345, + "fcm_dpo/margin": 295.9699401855469, + "fcm_dpo/q_t": 0.4249815344810486, + "grad_norm": 34.14286804199219, + "learning_rate": 1.0871589639435203e-07, + "logits/chosen": -1.0923945903778076, + "logits/rejected": -1.0642774105072021, + "logps/chosen": -761.8388061523438, + "logps/ref_chosen": -75.49723815917969, + "logps/ref_rejected": -87.32301330566406, + "logps/rejected": -1069.634521484375, + "loss": 1.1701, + "margin_dpo/margin_mean": 295.9699401855469, + "margin_dpo/margin_std": 527.4650268554688, + "step": 493 + }, + { + "KL/chosen_KL_mean": -532.9840087890625, + "KL/mean": -762.801513671875, + "KL/rejected_KL_mean": -992.619140625, + "KL/std": 505.04736328125, + "epoch": 0.7254038179148311, + "fcm_dpo/beta": 0.0010658178944140673, + "fcm_dpo/delta": -0.09456932544708252, + "fcm_dpo/margin": 459.6351318359375, + "fcm_dpo/q_t": 0.3873167037963867, + "grad_norm": 52.453407287597656, + "learning_rate": 1.0765901824467166e-07, + "logits/chosen": -0.9276965856552124, + "logits/rejected": -0.9675771594047546, + "logps/chosen": -574.34326171875, + "logps/ref_chosen": -41.35926818847656, + "logps/ref_rejected": -86.09136962890625, + "logps/rejected": -1078.71044921875, + "loss": 1.022, + "margin_dpo/margin_mean": 459.6351318359375, + "margin_dpo/margin_std": 506.0677490234375, + "step": 494 + }, + { + "KL/chosen_KL_mean": -601.6910400390625, + "KL/mean": -798.136962890625, + "KL/rejected_KL_mean": -994.582763671875, + "KL/std": 516.9666137695312, + "epoch": 0.7268722466960352, + "fcm_dpo/beta": 0.0010593379847705364, + "fcm_dpo/delta": -0.016908157616853714, + "fcm_dpo/margin": 392.8917236328125, + "fcm_dpo/q_t": 0.40819650888442993, + "grad_norm": 33.18449020385742, + "learning_rate": 1.0660589091223854e-07, + "logits/chosen": -1.020897388458252, + "logits/rejected": -1.0267536640167236, + "logps/chosen": -665.22607421875, + "logps/ref_chosen": -63.53507995605469, + "logps/ref_rejected": -91.42443084716797, + "logps/rejected": -1086.0072021484375, + "loss": 1.1072, + "margin_dpo/margin_mean": 392.8917236328125, + "margin_dpo/margin_std": 618.4354248046875, + "step": 495 + }, + { + "KL/chosen_KL_mean": -744.3991088867188, + "KL/mean": -843.0897216796875, + "KL/rejected_KL_mean": -941.7803344726562, + "KL/std": 415.9389343261719, + "epoch": 0.7283406754772394, + "fcm_dpo/beta": 0.0010575959458947182, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 197.38121032714844, + "fcm_dpo/q_t": 0.4504912197589874, + "grad_norm": 70.50399017333984, + "learning_rate": 1.0555654214793722e-07, + "logits/chosen": -0.9933898448944092, + "logits/rejected": -0.957461953163147, + "logps/chosen": -816.9910888671875, + "logps/ref_chosen": -72.5919189453125, + "logps/ref_rejected": -84.32933807373047, + "logps/rejected": -1026.109619140625, + "loss": 1.2514, + "margin_dpo/margin_mean": 197.3812255859375, + "margin_dpo/margin_std": 476.3318176269531, + "step": 496 + }, + { + "KL/chosen_KL_mean": -704.3419189453125, + "KL/mean": -808.200439453125, + "KL/rejected_KL_mean": -912.058837890625, + "KL/std": 518.0106201171875, + "epoch": 0.7298091042584435, + "fcm_dpo/beta": 0.0010635224170982838, + "fcm_dpo/delta": 0.02794058434665203, + "fcm_dpo/margin": 207.71685791015625, + "fcm_dpo/q_t": 0.44891273975372314, + "grad_norm": 35.43773651123047, + "learning_rate": 1.0451099960308374e-07, + "logits/chosen": -0.9681912660598755, + "logits/rejected": -0.9516055583953857, + "logps/chosen": -762.9359130859375, + "logps/ref_chosen": -58.59397506713867, + "logps/ref_rejected": -76.28836822509766, + "logps/rejected": -988.34716796875, + "loss": 1.2436, + "margin_dpo/margin_mean": 207.7168731689453, + "margin_dpo/margin_std": 479.673095703125, + "step": 497 + }, + { + "KL/chosen_KL_mean": -667.029541015625, + "KL/mean": -843.3291015625, + "KL/rejected_KL_mean": -1019.628662109375, + "KL/std": 551.8720703125, + "epoch": 0.7312775330396476, + "fcm_dpo/beta": 0.0010668218601495028, + "fcm_dpo/delta": 0.024778790771961212, + "fcm_dpo/margin": 352.5992126464844, + "fcm_dpo/q_t": 0.4150667190551758, + "grad_norm": 37.042057037353516, + "learning_rate": 1.0346929082869641e-07, + "logits/chosen": -0.9754823446273804, + "logits/rejected": -0.9640058279037476, + "logps/chosen": -738.2352294921875, + "logps/ref_chosen": -71.20565795898438, + "logps/ref_rejected": -83.95803833007812, + "logps/rejected": -1103.586669921875, + "loss": 1.145, + "margin_dpo/margin_mean": 352.5992126464844, + "margin_dpo/margin_std": 618.8363647460938, + "step": 498 + }, + { + "KL/chosen_KL_mean": -573.7318115234375, + "KL/mean": -789.2236938476562, + "KL/rejected_KL_mean": -1004.7156982421875, + "KL/std": 525.0882568359375, + "epoch": 0.7327459618208517, + "fcm_dpo/beta": 0.0010582150425761938, + "fcm_dpo/delta": -0.058853406459093094, + "fcm_dpo/margin": 430.98388671875, + "fcm_dpo/q_t": 0.39665037393569946, + "grad_norm": 41.18540573120117, + "learning_rate": 1.0243144327477013e-07, + "logits/chosen": -1.0105154514312744, + "logits/rejected": -1.0502002239227295, + "logps/chosen": -624.9869384765625, + "logps/ref_chosen": -51.25519561767578, + "logps/ref_rejected": -101.07870483398438, + "logps/rejected": -1105.79443359375, + "loss": 1.0651, + "margin_dpo/margin_mean": 430.98388671875, + "margin_dpo/margin_std": 575.690673828125, + "step": 499 + }, + { + "KL/chosen_KL_mean": -679.6956787109375, + "KL/mean": -855.8604125976562, + "KL/rejected_KL_mean": -1032.025146484375, + "KL/std": 456.21795654296875, + "epoch": 0.7342143906020558, + "fcm_dpo/beta": 0.001059262314811349, + "fcm_dpo/delta": 0.02782963030040264, + "fcm_dpo/margin": 352.3294677734375, + "fcm_dpo/q_t": 0.4146909713745117, + "grad_norm": 31.199586868286133, + "learning_rate": 1.0139748428955333e-07, + "logits/chosen": -0.979456901550293, + "logits/rejected": -1.0102999210357666, + "logps/chosen": -736.72314453125, + "logps/ref_chosen": -57.027442932128906, + "logps/ref_rejected": -93.93421173095703, + "logps/rejected": -1125.9593505859375, + "loss": 1.1312, + "margin_dpo/margin_mean": 352.3294677734375, + "margin_dpo/margin_std": 558.6328125, + "step": 500 + }, + { + "KL/chosen_KL_mean": -593.8455810546875, + "KL/mean": -783.6693115234375, + "KL/rejected_KL_mean": -973.4930419921875, + "KL/std": 487.61322021484375, + "epoch": 0.73568281938326, + "fcm_dpo/beta": 0.001063595525920391, + "fcm_dpo/delta": -0.004017947241663933, + "fcm_dpo/margin": 379.6474304199219, + "fcm_dpo/q_t": 0.4093387722969055, + "grad_norm": 32.033573150634766, + "learning_rate": 1.0036744111882672e-07, + "logits/chosen": -0.9334768056869507, + "logits/rejected": -0.9198960065841675, + "logps/chosen": -648.205078125, + "logps/ref_chosen": -54.359527587890625, + "logps/ref_rejected": -80.15670013427734, + "logps/rejected": -1053.649658203125, + "loss": 1.1161, + "margin_dpo/margin_mean": 379.64739990234375, + "margin_dpo/margin_std": 597.58349609375, + "step": 501 + }, + { + "KL/chosen_KL_mean": -542.5103759765625, + "KL/mean": -729.060791015625, + "KL/rejected_KL_mean": -915.6112060546875, + "KL/std": 439.54412841796875, + "epoch": 0.737151248164464, + "fcm_dpo/beta": 0.0010629099560901523, + "fcm_dpo/delta": 0.003547299187630415, + "fcm_dpo/margin": 373.10076904296875, + "fcm_dpo/q_t": 0.4092276096343994, + "grad_norm": 33.64237594604492, + "learning_rate": 9.934134090518592e-08, + "logits/chosen": -0.8644669055938721, + "logits/rejected": -0.8459637761116028, + "logps/chosen": -610.1109008789062, + "logps/ref_chosen": -67.60050964355469, + "logps/ref_rejected": -82.94876098632812, + "logps/rejected": -998.5599365234375, + "loss": 1.0855, + "margin_dpo/margin_mean": 373.1007995605469, + "margin_dpo/margin_std": 468.3107604980469, + "step": 502 + }, + { + "KL/chosen_KL_mean": -537.25927734375, + "KL/mean": -710.22998046875, + "KL/rejected_KL_mean": -883.2006225585938, + "KL/std": 428.82354736328125, + "epoch": 0.7386196769456681, + "fcm_dpo/beta": 0.0010641318513080478, + "fcm_dpo/delta": 0.03300508111715317, + "fcm_dpo/margin": 345.9414367675781, + "fcm_dpo/q_t": 0.41630834341049194, + "grad_norm": 27.046812057495117, + "learning_rate": 9.831921068732571e-08, + "logits/chosen": -0.8558259010314941, + "logits/rejected": -0.8414930105209351, + "logps/chosen": -592.337646484375, + "logps/ref_chosen": -55.078407287597656, + "logps/ref_rejected": -82.50544738769531, + "logps/rejected": -965.7060546875, + "loss": 1.1087, + "margin_dpo/margin_mean": 345.94140625, + "margin_dpo/margin_std": 465.1625671386719, + "step": 503 + }, + { + "KL/chosen_KL_mean": -591.099853515625, + "KL/mean": -802.704345703125, + "KL/rejected_KL_mean": -1014.3087768554688, + "KL/std": 516.8352661132812, + "epoch": 0.7400881057268722, + "fcm_dpo/beta": 0.0010626555886119604, + "fcm_dpo/delta": -0.052036985754966736, + "fcm_dpo/margin": 423.208984375, + "fcm_dpo/q_t": 0.3984360992908478, + "grad_norm": 29.352018356323242, + "learning_rate": 9.730107739932805e-08, + "logits/chosen": -0.917930006980896, + "logits/rejected": -0.9410198926925659, + "logps/chosen": -651.0655517578125, + "logps/ref_chosen": -59.96575164794922, + "logps/ref_rejected": -103.76212310791016, + "logps/rejected": -1118.0709228515625, + "loss": 1.0722, + "margin_dpo/margin_mean": 423.208984375, + "margin_dpo/margin_std": 561.8984375, + "step": 504 + }, + { + "KL/chosen_KL_mean": -648.5, + "KL/mean": -761.6331176757812, + "KL/rejected_KL_mean": -874.7662353515625, + "KL/std": 479.6357421875, + "epoch": 0.7415565345080763, + "fcm_dpo/beta": 0.001084325835108757, + "fcm_dpo/delta": 0.1583971083164215, + "fcm_dpo/margin": 226.2662811279297, + "fcm_dpo/q_t": 0.4433596134185791, + "grad_norm": 47.79065704345703, + "learning_rate": 9.628696786995188e-08, + "logits/chosen": -0.9595932960510254, + "logits/rejected": -0.9328126907348633, + "logps/chosen": -724.6549072265625, + "logps/ref_chosen": -76.1549072265625, + "logps/ref_rejected": -88.58537292480469, + "logps/rejected": -963.3516235351562, + "loss": 1.2142, + "margin_dpo/margin_mean": 226.26626586914062, + "margin_dpo/margin_std": 447.0828857421875, + "step": 505 + }, + { + "KL/chosen_KL_mean": -521.2240600585938, + "KL/mean": -705.6286010742188, + "KL/rejected_KL_mean": -890.033203125, + "KL/std": 477.63641357421875, + "epoch": 0.7430249632892805, + "fcm_dpo/beta": 0.0010876674205064774, + "fcm_dpo/delta": -0.0015001185238361359, + "fcm_dpo/margin": 368.80908203125, + "fcm_dpo/q_t": 0.40826907753944397, + "grad_norm": 31.51370620727539, + "learning_rate": 9.527690882192635e-08, + "logits/chosen": -0.9328019618988037, + "logits/rejected": -0.9463713765144348, + "logps/chosen": -570.1845703125, + "logps/ref_chosen": -48.96050262451172, + "logps/ref_rejected": -78.41505432128906, + "logps/rejected": -968.4482421875, + "loss": 1.0999, + "margin_dpo/margin_mean": 368.8091125488281, + "margin_dpo/margin_std": 518.3751220703125, + "step": 506 + }, + { + "KL/chosen_KL_mean": -618.9168701171875, + "KL/mean": -781.8189697265625, + "KL/rejected_KL_mean": -944.7210693359375, + "KL/std": 559.7078857421875, + "epoch": 0.7444933920704846, + "fcm_dpo/beta": 0.0010977927595376968, + "fcm_dpo/delta": 0.043928615748882294, + "fcm_dpo/margin": 325.8041076660156, + "fcm_dpo/q_t": 0.4235384464263916, + "grad_norm": 29.80331039428711, + "learning_rate": 9.427092687124691e-08, + "logits/chosen": -0.9519898891448975, + "logits/rejected": -0.956289529800415, + "logps/chosen": -685.7183837890625, + "logps/ref_chosen": -66.80149841308594, + "logps/ref_rejected": -95.37289428710938, + "logps/rejected": -1040.093994140625, + "loss": 1.1629, + "margin_dpo/margin_mean": 325.80413818359375, + "margin_dpo/margin_std": 623.13720703125, + "step": 507 + }, + { + "KL/chosen_KL_mean": -653.6170654296875, + "KL/mean": -794.548583984375, + "KL/rejected_KL_mean": -935.4800415039062, + "KL/std": 542.185546875, + "epoch": 0.7459618208516887, + "fcm_dpo/beta": 0.0011185563635081053, + "fcm_dpo/delta": 0.0871460884809494, + "fcm_dpo/margin": 281.86297607421875, + "fcm_dpo/q_t": 0.43127191066741943, + "grad_norm": 37.2701416015625, + "learning_rate": 9.326904852647344e-08, + "logits/chosen": -0.9200087189674377, + "logits/rejected": -0.9178076386451721, + "logps/chosen": -724.9205322265625, + "logps/ref_chosen": -71.303466796875, + "logps/ref_rejected": -95.6275405883789, + "logps/rejected": -1031.107666015625, + "loss": 1.2098, + "margin_dpo/margin_mean": 281.86297607421875, + "margin_dpo/margin_std": 628.1875610351562, + "step": 508 + }, + { + "KL/chosen_KL_mean": -504.98626708984375, + "KL/mean": -655.8489990234375, + "KL/rejected_KL_mean": -806.7117919921875, + "KL/std": 394.4847412109375, + "epoch": 0.7474302496328928, + "fcm_dpo/beta": 0.001136223552748561, + "fcm_dpo/delta": 0.058409832417964935, + "fcm_dpo/margin": 301.7255554199219, + "fcm_dpo/q_t": 0.4222422242164612, + "grad_norm": 36.30656433105469, + "learning_rate": 9.227130018803195e-08, + "logits/chosen": -0.8557006120681763, + "logits/rejected": -0.8502145409584045, + "logps/chosen": -568.8052368164062, + "logps/ref_chosen": -63.81895065307617, + "logps/ref_rejected": -83.25643920898438, + "logps/rejected": -889.96826171875, + "loss": 1.1457, + "margin_dpo/margin_mean": 301.72552490234375, + "margin_dpo/margin_std": 484.4477844238281, + "step": 509 + }, + { + "KL/chosen_KL_mean": -597.9219970703125, + "KL/mean": -796.3427734375, + "KL/rejected_KL_mean": -994.7635498046875, + "KL/std": 451.759521484375, + "epoch": 0.748898678414097, + "fcm_dpo/beta": 0.0011275724973529577, + "fcm_dpo/delta": -0.04966657981276512, + "fcm_dpo/margin": 396.84149169921875, + "fcm_dpo/q_t": 0.3957008123397827, + "grad_norm": 40.821044921875, + "learning_rate": 9.127770814751932e-08, + "logits/chosen": -0.8606332540512085, + "logits/rejected": -0.8811938166618347, + "logps/chosen": -649.8004760742188, + "logps/ref_chosen": -51.878448486328125, + "logps/ref_rejected": -102.7651596069336, + "logps/rejected": -1097.5286865234375, + "loss": 1.0478, + "margin_dpo/margin_mean": 396.84149169921875, + "margin_dpo/margin_std": 446.3687438964844, + "step": 510 + }, + { + "KL/chosen_KL_mean": -562.03466796875, + "KL/mean": -717.6114501953125, + "KL/rejected_KL_mean": -873.188232421875, + "KL/std": 484.3371887207031, + "epoch": 0.750367107195301, + "fcm_dpo/beta": 0.001131793251261115, + "fcm_dpo/delta": 0.04954507201910019, + "fcm_dpo/margin": 311.153564453125, + "fcm_dpo/q_t": 0.4202183187007904, + "grad_norm": 44.68586730957031, + "learning_rate": 9.028829858700973e-08, + "logits/chosen": -0.9428844451904297, + "logits/rejected": -0.9506068825721741, + "logps/chosen": -622.2727661132812, + "logps/ref_chosen": -60.23811721801758, + "logps/ref_rejected": -92.85676574707031, + "logps/rejected": -966.0450439453125, + "loss": 1.165, + "margin_dpo/margin_mean": 311.153564453125, + "margin_dpo/margin_std": 587.4927978515625, + "step": 511 + }, + { + "KL/chosen_KL_mean": -448.8963623046875, + "KL/mean": -666.0167846679688, + "KL/rejected_KL_mean": -883.13720703125, + "KL/std": 444.51080322265625, + "epoch": 0.7518355359765051, + "fcm_dpo/beta": 0.0011163100134581327, + "fcm_dpo/delta": -0.08939085900783539, + "fcm_dpo/margin": 434.24078369140625, + "fcm_dpo/q_t": 0.38778460025787354, + "grad_norm": 60.86901092529297, + "learning_rate": 8.930309757836516e-08, + "logits/chosen": -0.8777337074279785, + "logits/rejected": -0.8991394639015198, + "logps/chosen": -503.8018798828125, + "logps/ref_chosen": -54.905494689941406, + "logps/ref_rejected": -81.87586975097656, + "logps/rejected": -965.0130615234375, + "loss": 1.0178, + "margin_dpo/margin_mean": 434.24078369140625, + "margin_dpo/margin_std": 454.16876220703125, + "step": 512 + }, + { + "KL/chosen_KL_mean": -567.0150146484375, + "KL/mean": -720.166015625, + "KL/rejected_KL_mean": -873.3170166015625, + "KL/std": 418.872802734375, + "epoch": 0.7533039647577092, + "fcm_dpo/beta": 0.00110536755528301, + "fcm_dpo/delta": -0.03816516324877739, + "fcm_dpo/margin": 306.30194091796875, + "fcm_dpo/q_t": 0.422201544046402, + "grad_norm": 50.96452331542969, + "learning_rate": 8.832213108254863e-08, + "logits/chosen": -0.9502312541007996, + "logits/rejected": -0.9348673820495605, + "logps/chosen": -631.9314575195312, + "logps/ref_chosen": -64.91644287109375, + "logps/ref_rejected": -76.06245422363281, + "logps/rejected": -949.3794555664062, + "loss": 1.1469, + "margin_dpo/margin_mean": 306.30194091796875, + "margin_dpo/margin_std": 481.57958984375, + "step": 513 + }, + { + "KL/chosen_KL_mean": -592.06298828125, + "KL/mean": -742.8245239257812, + "KL/rejected_KL_mean": -893.5859985351562, + "KL/std": 463.70751953125, + "epoch": 0.7547723935389133, + "fcm_dpo/beta": 0.0011181586887687445, + "fcm_dpo/delta": 0.06485524028539658, + "fcm_dpo/margin": 301.52301025390625, + "fcm_dpo/q_t": 0.4251614212989807, + "grad_norm": 38.09934616088867, + "learning_rate": 8.734542494893954e-08, + "logits/chosen": -0.8954925537109375, + "logits/rejected": -0.8867564797401428, + "logps/chosen": -666.2926025390625, + "logps/ref_chosen": -74.22957611083984, + "logps/ref_rejected": -78.945556640625, + "logps/rejected": -972.5315551757812, + "loss": 1.1551, + "margin_dpo/margin_mean": 301.52301025390625, + "margin_dpo/margin_std": 522.4912109375, + "step": 514 + }, + { + "KL/chosen_KL_mean": -481.15533447265625, + "KL/mean": -597.14990234375, + "KL/rejected_KL_mean": -713.14453125, + "KL/std": 382.8921813964844, + "epoch": 0.7562408223201175, + "fcm_dpo/beta": 0.0011437044013291597, + "fcm_dpo/delta": 0.13827666640281677, + "fcm_dpo/margin": 231.9891357421875, + "fcm_dpo/q_t": 0.437407523393631, + "grad_norm": 50.51891326904297, + "learning_rate": 8.637300491465272e-08, + "logits/chosen": -0.8350532054901123, + "logits/rejected": -0.846880316734314, + "logps/chosen": -531.556884765625, + "logps/ref_chosen": -50.40156555175781, + "logps/ref_rejected": -87.09774780273438, + "logps/rejected": -800.2422485351562, + "loss": 1.1981, + "margin_dpo/margin_mean": 231.9891357421875, + "margin_dpo/margin_std": 439.46319580078125, + "step": 515 + }, + { + "KL/chosen_KL_mean": -508.79656982421875, + "KL/mean": -676.51318359375, + "KL/rejected_KL_mean": -844.229736328125, + "KL/std": 420.5047607421875, + "epoch": 0.7577092511013216, + "fcm_dpo/beta": 0.0011569425696507096, + "fcm_dpo/delta": 0.012162066996097565, + "fcm_dpo/margin": 335.4332275390625, + "fcm_dpo/q_t": 0.4088793396949768, + "grad_norm": 50.28904724121094, + "learning_rate": 8.540489660386064e-08, + "logits/chosen": -0.910446286201477, + "logits/rejected": -0.9385887384414673, + "logps/chosen": -573.4461669921875, + "logps/ref_chosen": -64.64956665039062, + "logps/ref_rejected": -111.72237396240234, + "logps/rejected": -955.9521484375, + "loss": 1.0912, + "margin_dpo/margin_mean": 335.4331970214844, + "margin_dpo/margin_std": 415.19305419921875, + "step": 516 + }, + { + "KL/chosen_KL_mean": -550.5552978515625, + "KL/mean": -756.6061401367188, + "KL/rejected_KL_mean": -962.656982421875, + "KL/std": 494.27984619140625, + "epoch": 0.7591776798825257, + "fcm_dpo/beta": 0.0011394355678930879, + "fcm_dpo/delta": -0.07319086790084839, + "fcm_dpo/margin": 412.10162353515625, + "fcm_dpo/q_t": 0.39602527022361755, + "grad_norm": 31.643814086914062, + "learning_rate": 8.444112552711752e-08, + "logits/chosen": -0.8593244552612305, + "logits/rejected": -0.8574497699737549, + "logps/chosen": -611.4688720703125, + "logps/ref_chosen": -60.913551330566406, + "logps/ref_rejected": -89.08308410644531, + "logps/rejected": -1051.739990234375, + "loss": 1.0563, + "margin_dpo/margin_mean": 412.10162353515625, + "margin_dpo/margin_std": 548.390380859375, + "step": 517 + }, + { + "KL/chosen_KL_mean": -496.5261535644531, + "KL/mean": -666.101318359375, + "KL/rejected_KL_mean": -835.676513671875, + "KL/std": 392.9124755859375, + "epoch": 0.7606461086637298, + "fcm_dpo/beta": 0.001135983387939632, + "fcm_dpo/delta": 0.015141095966100693, + "fcm_dpo/margin": 339.1503601074219, + "fcm_dpo/q_t": 0.40997710824012756, + "grad_norm": 58.1636848449707, + "learning_rate": 8.348171708068747e-08, + "logits/chosen": -0.8889042139053345, + "logits/rejected": -0.904350221157074, + "logps/chosen": -553.9820556640625, + "logps/ref_chosen": -57.45589065551758, + "logps/ref_rejected": -85.31269836425781, + "logps/rejected": -920.9891357421875, + "loss": 1.0946, + "margin_dpo/margin_mean": 339.1503601074219, + "margin_dpo/margin_std": 427.3458557128906, + "step": 518 + }, + { + "KL/chosen_KL_mean": -510.1135559082031, + "KL/mean": -627.830322265625, + "KL/rejected_KL_mean": -745.547119140625, + "KL/std": 358.55865478515625, + "epoch": 0.762114537444934, + "fcm_dpo/beta": 0.0011437054490670562, + "fcm_dpo/delta": 0.026432547718286514, + "fcm_dpo/margin": 235.43359375, + "fcm_dpo/q_t": 0.43695303797721863, + "grad_norm": 37.28139877319336, + "learning_rate": 8.25266965458755e-08, + "logits/chosen": -0.8758772611618042, + "logits/rejected": -0.859151303768158, + "logps/chosen": -584.1768798828125, + "logps/ref_chosen": -74.06331634521484, + "logps/ref_rejected": -104.44416809082031, + "logps/rejected": -849.9913330078125, + "loss": 1.2004, + "margin_dpo/margin_mean": 235.43359375, + "margin_dpo/margin_std": 449.8384094238281, + "step": 519 + }, + { + "KL/chosen_KL_mean": -533.606689453125, + "KL/mean": -691.7373046875, + "KL/rejected_KL_mean": -849.8678588867188, + "KL/std": 418.42547607421875, + "epoch": 0.7635829662261381, + "fcm_dpo/beta": 0.0011494287755340338, + "fcm_dpo/delta": 0.03775997459888458, + "fcm_dpo/margin": 316.26123046875, + "fcm_dpo/q_t": 0.4179002046585083, + "grad_norm": 45.021728515625, + "learning_rate": 8.15760890883607e-08, + "logits/chosen": -0.8589770793914795, + "logits/rejected": -0.8674265146255493, + "logps/chosen": -603.906494140625, + "logps/ref_chosen": -70.2998275756836, + "logps/ref_rejected": -99.98133850097656, + "logps/rejected": -949.8492431640625, + "loss": 1.1257, + "margin_dpo/margin_mean": 316.26123046875, + "margin_dpo/margin_std": 463.71783447265625, + "step": 520 + }, + { + "KL/chosen_KL_mean": -490.48187255859375, + "KL/mean": -661.524169921875, + "KL/rejected_KL_mean": -832.5665283203125, + "KL/std": 435.0758972167969, + "epoch": 0.7650513950073421, + "fcm_dpo/beta": 0.0011624578619375825, + "fcm_dpo/delta": 0.0017420090734958649, + "fcm_dpo/margin": 342.0847473144531, + "fcm_dpo/q_t": 0.4088232219219208, + "grad_norm": 29.704362869262695, + "learning_rate": 8.062991975753378e-08, + "logits/chosen": -0.9206105470657349, + "logits/rejected": -0.9278547763824463, + "logps/chosen": -548.624755859375, + "logps/ref_chosen": -58.14292526245117, + "logps/ref_rejected": -83.28060913085938, + "logps/rejected": -915.84716796875, + "loss": 1.0938, + "margin_dpo/margin_mean": 342.084716796875, + "margin_dpo/margin_std": 449.4230041503906, + "step": 521 + }, + { + "KL/chosen_KL_mean": -558.9736328125, + "KL/mean": -713.635498046875, + "KL/rejected_KL_mean": -868.29736328125, + "KL/std": 458.9757080078125, + "epoch": 0.7665198237885462, + "fcm_dpo/beta": 0.0011631404049694538, + "fcm_dpo/delta": 0.04171000048518181, + "fcm_dpo/margin": 309.32373046875, + "fcm_dpo/q_t": 0.41798996925354004, + "grad_norm": 31.109495162963867, + "learning_rate": 7.968821348583643e-08, + "logits/chosen": -0.9124878644943237, + "logits/rejected": -0.9153552055358887, + "logps/chosen": -605.5213623046875, + "logps/ref_chosen": -46.54766845703125, + "logps/ref_rejected": -66.01388549804688, + "logps/rejected": -934.311279296875, + "loss": 1.1346, + "margin_dpo/margin_mean": 309.32373046875, + "margin_dpo/margin_std": 488.1703796386719, + "step": 522 + }, + { + "KL/chosen_KL_mean": -593.5350341796875, + "KL/mean": -758.7939453125, + "KL/rejected_KL_mean": -924.0529174804688, + "KL/std": 532.2261962890625, + "epoch": 0.7679882525697503, + "fcm_dpo/beta": 0.001167641719803214, + "fcm_dpo/delta": 0.014641055837273598, + "fcm_dpo/margin": 330.5179443359375, + "fcm_dpo/q_t": 0.4140872061252594, + "grad_norm": 43.91381072998047, + "learning_rate": 7.875099508810484e-08, + "logits/chosen": -0.9662898182868958, + "logits/rejected": -0.9687439203262329, + "logps/chosen": -655.3046264648438, + "logps/ref_chosen": -61.76960372924805, + "logps/ref_rejected": -83.76141357421875, + "logps/rejected": -1007.8143310546875, + "loss": 1.1351, + "margin_dpo/margin_mean": 330.5179443359375, + "margin_dpo/margin_std": 560.5762329101562, + "step": 523 + }, + { + "KL/chosen_KL_mean": -574.9899291992188, + "KL/mean": -747.72412109375, + "KL/rejected_KL_mean": -920.458251953125, + "KL/std": 479.4917297363281, + "epoch": 0.7694566813509545, + "fcm_dpo/beta": 0.0011622272431850433, + "fcm_dpo/delta": -0.002190619707107544, + "fcm_dpo/margin": 345.46832275390625, + "fcm_dpo/q_t": 0.4057931900024414, + "grad_norm": 47.520774841308594, + "learning_rate": 7.781828926091535e-08, + "logits/chosen": -0.9906701445579529, + "logits/rejected": -0.9787558317184448, + "logps/chosen": -653.06201171875, + "logps/ref_chosen": -78.0720443725586, + "logps/ref_rejected": -81.30198669433594, + "logps/rejected": -1001.76025390625, + "loss": 1.1068, + "margin_dpo/margin_mean": 345.46832275390625, + "margin_dpo/margin_std": 491.1083984375, + "step": 524 + }, + { + "KL/chosen_KL_mean": -581.4891357421875, + "KL/mean": -802.2073974609375, + "KL/rejected_KL_mean": -1022.925537109375, + "KL/std": 510.1922607421875, + "epoch": 0.7709251101321586, + "fcm_dpo/beta": 0.001145128975622356, + "fcm_dpo/delta": -0.11176417022943497, + "fcm_dpo/margin": 441.4364929199219, + "fcm_dpo/q_t": 0.38664761185646057, + "grad_norm": 42.60094451904297, + "learning_rate": 7.689012058193384e-08, + "logits/chosen": -0.925658106803894, + "logits/rejected": -0.9625715017318726, + "logps/chosen": -632.3170166015625, + "logps/ref_chosen": -50.827857971191406, + "logps/ref_rejected": -100.05294036865234, + "logps/rejected": -1122.978515625, + "loss": 1.0305, + "margin_dpo/margin_mean": 441.43646240234375, + "margin_dpo/margin_std": 542.1448364257812, + "step": 525 + }, + { + "KL/chosen_KL_mean": -626.0618896484375, + "KL/mean": -843.650390625, + "KL/rejected_KL_mean": -1061.239013671875, + "KL/std": 508.11346435546875, + "epoch": 0.7723935389133627, + "fcm_dpo/beta": 0.0011303203646093607, + "fcm_dpo/delta": -0.09655816853046417, + "fcm_dpo/margin": 435.17706298828125, + "fcm_dpo/q_t": 0.3878590166568756, + "grad_norm": 30.649280548095703, + "learning_rate": 7.596651350926836e-08, + "logits/chosen": -0.9626432657241821, + "logits/rejected": -0.9604432582855225, + "logps/chosen": -689.2291259765625, + "logps/ref_chosen": -63.167236328125, + "logps/ref_rejected": -86.30934143066406, + "logps/rejected": -1147.54833984375, + "loss": 1.0485, + "margin_dpo/margin_mean": 435.17706298828125, + "margin_dpo/margin_std": 561.9842529296875, + "step": 526 + }, + { + "KL/chosen_KL_mean": -637.3934326171875, + "KL/mean": -784.9993896484375, + "KL/rejected_KL_mean": -932.60546875, + "KL/std": 524.7011108398438, + "epoch": 0.7738619676945668, + "fcm_dpo/beta": 0.0011297144228592515, + "fcm_dpo/delta": 0.06883951276540756, + "fcm_dpo/margin": 295.2120361328125, + "fcm_dpo/q_t": 0.4224596917629242, + "grad_norm": 32.22301483154297, + "learning_rate": 7.504749238082414e-08, + "logits/chosen": -1.1435140371322632, + "logits/rejected": -1.1115856170654297, + "logps/chosen": -708.5220947265625, + "logps/ref_chosen": -71.12867736816406, + "logps/ref_rejected": -78.3425521850586, + "logps/rejected": -1010.947998046875, + "loss": 1.1391, + "margin_dpo/margin_mean": 295.2120361328125, + "margin_dpo/margin_std": 437.1567687988281, + "step": 527 + }, + { + "KL/chosen_KL_mean": -652.498046875, + "KL/mean": -834.37255859375, + "KL/rejected_KL_mean": -1016.2471313476562, + "KL/std": 502.570068359375, + "epoch": 0.775330396475771, + "fcm_dpo/beta": 0.001133130630478263, + "fcm_dpo/delta": -0.012725085951387882, + "fcm_dpo/margin": 363.7490539550781, + "fcm_dpo/q_t": 0.40897679328918457, + "grad_norm": 43.005401611328125, + "learning_rate": 7.413308141366254e-08, + "logits/chosen": -1.045546293258667, + "logits/rejected": -1.0283198356628418, + "logps/chosen": -720.5875244140625, + "logps/ref_chosen": -68.0894546508789, + "logps/ref_rejected": -93.91006469726562, + "logps/rejected": -1110.1572265625, + "loss": 1.1151, + "margin_dpo/margin_mean": 363.7490539550781, + "margin_dpo/margin_std": 581.20263671875, + "step": 528 + }, + { + "KL/chosen_KL_mean": -768.2669677734375, + "KL/mean": -888.3395385742188, + "KL/rejected_KL_mean": -1008.412109375, + "KL/std": 458.5384521484375, + "epoch": 0.7767988252569751, + "fcm_dpo/beta": 0.0011390424333512783, + "fcm_dpo/delta": 0.02491956390440464, + "fcm_dpo/margin": 240.14520263671875, + "fcm_dpo/q_t": 0.43738028407096863, + "grad_norm": 47.4577522277832, + "learning_rate": 7.322330470336313e-08, + "logits/chosen": -1.0672532320022583, + "logits/rejected": -1.0765759944915771, + "logps/chosen": -823.8419189453125, + "logps/ref_chosen": -55.57495880126953, + "logps/ref_rejected": -89.20909118652344, + "logps/rejected": -1097.6212158203125, + "loss": 1.234, + "margin_dpo/margin_mean": 240.14520263671875, + "margin_dpo/margin_std": 571.541748046875, + "step": 529 + }, + { + "KL/chosen_KL_mean": -659.3563232421875, + "KL/mean": -857.35400390625, + "KL/rejected_KL_mean": -1055.3516845703125, + "KL/std": 551.8106079101562, + "epoch": 0.7782672540381792, + "fcm_dpo/beta": 0.0011311739217489958, + "fcm_dpo/delta": -0.050166938453912735, + "fcm_dpo/margin": 395.99542236328125, + "fcm_dpo/q_t": 0.40238332748413086, + "grad_norm": 49.10841369628906, + "learning_rate": 7.231818622338822e-08, + "logits/chosen": -0.9842202663421631, + "logits/rejected": -0.980260968208313, + "logps/chosen": -706.957763671875, + "logps/ref_chosen": -47.601417541503906, + "logps/ref_rejected": -87.2845230102539, + "logps/rejected": -1142.63623046875, + "loss": 1.1208, + "margin_dpo/margin_mean": 395.99542236328125, + "margin_dpo/margin_std": 689.074462890625, + "step": 530 + }, + { + "KL/chosen_KL_mean": -727.6012573242188, + "KL/mean": -897.2587280273438, + "KL/rejected_KL_mean": -1066.916259765625, + "KL/std": 580.2291870117188, + "epoch": 0.7797356828193832, + "fcm_dpo/beta": 0.0011335888411849737, + "fcm_dpo/delta": 0.015755577012896538, + "fcm_dpo/margin": 339.31494140625, + "fcm_dpo/q_t": 0.4141680896282196, + "grad_norm": 44.63459777832031, + "learning_rate": 7.141774982445147e-08, + "logits/chosen": -1.0530567169189453, + "logits/rejected": -1.0324784517288208, + "logps/chosen": -782.8472900390625, + "logps/ref_chosen": -55.246063232421875, + "logps/ref_rejected": -70.60598754882812, + "logps/rejected": -1137.522216796875, + "loss": 1.1281, + "margin_dpo/margin_mean": 339.31494140625, + "margin_dpo/margin_std": 551.3843994140625, + "step": 531 + }, + { + "KL/chosen_KL_mean": -740.7996826171875, + "KL/mean": -921.9634399414062, + "KL/rejected_KL_mean": -1103.127197265625, + "KL/std": 562.6202392578125, + "epoch": 0.7812041116005873, + "fcm_dpo/beta": 0.0011215780396014452, + "fcm_dpo/delta": -0.00783345103263855, + "fcm_dpo/margin": 362.3275451660156, + "fcm_dpo/q_t": 0.41034865379333496, + "grad_norm": 86.36136627197266, + "learning_rate": 7.052201923388953e-08, + "logits/chosen": -1.013758897781372, + "logits/rejected": -0.9856699705123901, + "logps/chosen": -811.085693359375, + "logps/ref_chosen": -70.28601837158203, + "logps/ref_rejected": -86.5913314819336, + "logps/rejected": -1189.718505859375, + "loss": 1.1499, + "margin_dpo/margin_mean": 362.3275451660156, + "margin_dpo/margin_std": 658.3814697265625, + "step": 532 + }, + { + "KL/chosen_KL_mean": -646.8947143554688, + "KL/mean": -779.6904907226562, + "KL/rejected_KL_mean": -912.4862670898438, + "KL/std": 475.87249755859375, + "epoch": 0.7826725403817915, + "fcm_dpo/beta": 0.001128980191424489, + "fcm_dpo/delta": -0.004243167117238045, + "fcm_dpo/margin": 265.5915222167969, + "fcm_dpo/q_t": 0.43269672989845276, + "grad_norm": 44.21409606933594, + "learning_rate": 6.963101805503646e-08, + "logits/chosen": -1.0471582412719727, + "logits/rejected": -1.0218884944915771, + "logps/chosen": -711.7498168945312, + "logps/ref_chosen": -64.8551025390625, + "logps/ref_rejected": -76.58805847167969, + "logps/rejected": -989.0743408203125, + "loss": 1.2058, + "margin_dpo/margin_mean": 265.591552734375, + "margin_dpo/margin_std": 571.443359375, + "step": 533 + }, + { + "KL/chosen_KL_mean": -682.74267578125, + "KL/mean": -859.9710693359375, + "KL/rejected_KL_mean": -1037.199462890625, + "KL/std": 517.587890625, + "epoch": 0.7841409691629956, + "fcm_dpo/beta": 0.0011213625548407435, + "fcm_dpo/delta": 0.001744687557220459, + "fcm_dpo/margin": 354.456787109375, + "fcm_dpo/q_t": 0.4103449285030365, + "grad_norm": 40.56071853637695, + "learning_rate": 6.874476976660184e-08, + "logits/chosen": -1.0277010202407837, + "logits/rejected": -1.022787094116211, + "logps/chosen": -742.862060546875, + "logps/ref_chosen": -60.119388580322266, + "logps/ref_rejected": -78.54347229003906, + "logps/rejected": -1115.742919921875, + "loss": 1.1133, + "margin_dpo/margin_mean": 354.456787109375, + "margin_dpo/margin_std": 530.1246337890625, + "step": 534 + }, + { + "KL/chosen_KL_mean": -598.3570556640625, + "KL/mean": -804.5665283203125, + "KL/rejected_KL_mean": -1010.776123046875, + "KL/std": 515.3653564453125, + "epoch": 0.7856093979441997, + "fcm_dpo/beta": 0.0011251430260017514, + "fcm_dpo/delta": -0.06760876625776291, + "fcm_dpo/margin": 412.41912841796875, + "fcm_dpo/q_t": 0.39604315161705017, + "grad_norm": 32.73942565917969, + "learning_rate": 6.786329772205246e-08, + "logits/chosen": -0.9553531408309937, + "logits/rejected": -0.9562033414840698, + "logps/chosen": -652.687255859375, + "logps/ref_chosen": -54.330238342285156, + "logps/ref_rejected": -96.30763244628906, + "logps/rejected": -1107.083740234375, + "loss": 1.0635, + "margin_dpo/margin_mean": 412.41912841796875, + "margin_dpo/margin_std": 538.2338256835938, + "step": 535 + }, + { + "KL/chosen_KL_mean": -510.51318359375, + "KL/mean": -757.499755859375, + "KL/rejected_KL_mean": -1004.4862060546875, + "KL/std": 566.7451782226562, + "epoch": 0.7870778267254038, + "fcm_dpo/beta": 0.0010907297255471349, + "fcm_dpo/delta": -0.14658187329769135, + "fcm_dpo/margin": 493.97308349609375, + "fcm_dpo/q_t": 0.38460248708724976, + "grad_norm": 26.8385066986084, + "learning_rate": 6.698662514899638e-08, + "logits/chosen": -0.9329211711883545, + "logits/rejected": -0.9625818729400635, + "logps/chosen": -557.59375, + "logps/ref_chosen": -47.08053207397461, + "logps/ref_rejected": -89.09783935546875, + "logps/rejected": -1093.5841064453125, + "loss": 1.0253, + "margin_dpo/margin_mean": 493.97308349609375, + "margin_dpo/margin_std": 671.49755859375, + "step": 536 + }, + { + "KL/chosen_KL_mean": -550.3721313476562, + "KL/mean": -718.6456298828125, + "KL/rejected_KL_mean": -886.919189453125, + "KL/std": 459.65936279296875, + "epoch": 0.788546255506608, + "fcm_dpo/beta": 0.0010922504588961601, + "fcm_dpo/delta": 0.0329880453646183, + "fcm_dpo/margin": 336.547119140625, + "fcm_dpo/q_t": 0.4157649874687195, + "grad_norm": 46.854251861572266, + "learning_rate": 6.611477514857114e-08, + "logits/chosen": -0.9757102727890015, + "logits/rejected": -0.9494297504425049, + "logps/chosen": -608.1195678710938, + "logps/ref_chosen": -57.747467041015625, + "logps/ref_rejected": -70.43838500976562, + "logps/rejected": -957.3576049804688, + "loss": 1.1432, + "margin_dpo/margin_mean": 336.547119140625, + "margin_dpo/margin_std": 562.57177734375, + "step": 537 + }, + { + "KL/chosen_KL_mean": -694.86279296875, + "KL/mean": -885.943115234375, + "KL/rejected_KL_mean": -1077.0233154296875, + "KL/std": 506.5946960449219, + "epoch": 0.7900146842878121, + "fcm_dpo/beta": 0.0010833143023774028, + "fcm_dpo/delta": -0.014802441000938416, + "fcm_dpo/margin": 382.1605224609375, + "fcm_dpo/q_t": 0.40582871437072754, + "grad_norm": 31.442834854125977, + "learning_rate": 6.524777069483525e-08, + "logits/chosen": -0.9763197898864746, + "logits/rejected": -0.9549228549003601, + "logps/chosen": -761.2787475585938, + "logps/ref_chosen": -66.41594696044922, + "logps/ref_rejected": -84.22808837890625, + "logps/rejected": -1161.25146484375, + "loss": 1.0873, + "margin_dpo/margin_mean": 382.1605224609375, + "margin_dpo/margin_std": 518.7582397460938, + "step": 538 + }, + { + "KL/chosen_KL_mean": -594.95458984375, + "KL/mean": -775.4962158203125, + "KL/rejected_KL_mean": -956.0377807617188, + "KL/std": 433.60675048828125, + "epoch": 0.7914831130690162, + "fcm_dpo/beta": 0.0010887064272537827, + "fcm_dpo/delta": 0.007081391289830208, + "fcm_dpo/margin": 361.0832214355469, + "fcm_dpo/q_t": 0.41017356514930725, + "grad_norm": 30.531400680541992, + "learning_rate": 6.438563463416221e-08, + "logits/chosen": -1.028662919998169, + "logits/rejected": -1.015451192855835, + "logps/chosen": -653.4474487304688, + "logps/ref_chosen": -58.492855072021484, + "logps/ref_rejected": -91.85395050048828, + "logps/rejected": -1047.8917236328125, + "loss": 1.0956, + "margin_dpo/margin_mean": 361.0832214355469, + "margin_dpo/margin_std": 479.93939208984375, + "step": 539 + }, + { + "KL/chosen_KL_mean": -608.3707885742188, + "KL/mean": -851.121826171875, + "KL/rejected_KL_mean": -1093.8729248046875, + "KL/std": 551.8743896484375, + "epoch": 0.7929515418502202, + "fcm_dpo/beta": 0.0010707840556278825, + "fcm_dpo/delta": -0.12625397741794586, + "fcm_dpo/margin": 485.5020751953125, + "fcm_dpo/q_t": 0.38717466592788696, + "grad_norm": 33.186012268066406, + "learning_rate": 6.352838968463919e-08, + "logits/chosen": -0.9437620639801025, + "logits/rejected": -0.9658418893814087, + "logps/chosen": -671.853271484375, + "logps/ref_chosen": -63.482513427734375, + "logps/ref_rejected": -116.42999267578125, + "logps/rejected": -1210.3028564453125, + "loss": 1.0431, + "margin_dpo/margin_mean": 485.5020446777344, + "margin_dpo/margin_std": 648.8251953125, + "step": 540 + }, + { + "KL/chosen_KL_mean": -717.1644287109375, + "KL/mean": -842.0802612304688, + "KL/rejected_KL_mean": -966.99609375, + "KL/std": 490.08099365234375, + "epoch": 0.7944199706314243, + "fcm_dpo/beta": 0.0010596727952361107, + "fcm_dpo/delta": -0.0012176802847534418, + "fcm_dpo/margin": 249.83154296875, + "fcm_dpo/q_t": 0.4399704039096832, + "grad_norm": 45.27843475341797, + "learning_rate": 6.267605843546767e-08, + "logits/chosen": -1.0905866622924805, + "logits/rejected": -1.0777101516723633, + "logps/chosen": -795.44482421875, + "logps/ref_chosen": -78.28036499023438, + "logps/ref_rejected": -103.273681640625, + "logps/rejected": -1070.269775390625, + "loss": 1.2291, + "margin_dpo/margin_mean": 249.83154296875, + "margin_dpo/margin_std": 572.767822265625, + "step": 541 + }, + { + "KL/chosen_KL_mean": -626.7008056640625, + "KL/mean": -863.0870361328125, + "KL/rejected_KL_mean": -1099.4732666015625, + "KL/std": 545.5762939453125, + "epoch": 0.7958883994126285, + "fcm_dpo/beta": 0.0010356687707826495, + "fcm_dpo/delta": -0.09657715260982513, + "fcm_dpo/margin": 472.7724609375, + "fcm_dpo/q_t": 0.3910575807094574, + "grad_norm": 49.864784240722656, + "learning_rate": 6.182866334636888e-08, + "logits/chosen": -1.0657211542129517, + "logits/rejected": -1.098391056060791, + "logps/chosen": -684.185791015625, + "logps/ref_chosen": -57.48497009277344, + "logps/ref_rejected": -96.47506713867188, + "logps/rejected": -1195.9483642578125, + "loss": 1.0595, + "margin_dpo/margin_mean": 472.7724609375, + "margin_dpo/margin_std": 642.970947265625, + "step": 542 + }, + { + "KL/chosen_KL_mean": -702.564697265625, + "KL/mean": -870.582763671875, + "KL/rejected_KL_mean": -1038.6007080078125, + "KL/std": 663.33447265625, + "epoch": 0.7973568281938326, + "fcm_dpo/beta": 0.001045349519699812, + "fcm_dpo/delta": 0.050522927194833755, + "fcm_dpo/margin": 336.0360107421875, + "fcm_dpo/q_t": 0.43216556310653687, + "grad_norm": 39.975833892822266, + "learning_rate": 6.098622674699147e-08, + "logits/chosen": -0.9809169769287109, + "logits/rejected": -1.0095728635787964, + "logps/chosen": -763.1822509765625, + "logps/ref_chosen": -60.61750793457031, + "logps/ref_rejected": -105.59896850585938, + "logps/rejected": -1144.19970703125, + "loss": 1.202, + "margin_dpo/margin_mean": 336.0360107421875, + "margin_dpo/margin_std": 774.7861328125, + "step": 543 + }, + { + "KL/chosen_KL_mean": -709.45947265625, + "KL/mean": -901.91064453125, + "KL/rejected_KL_mean": -1094.36181640625, + "KL/std": 522.1925048828125, + "epoch": 0.7988252569750367, + "fcm_dpo/beta": 0.0010487872641533613, + "fcm_dpo/delta": -0.003843428334221244, + "fcm_dpo/margin": 384.9021911621094, + "fcm_dpo/q_t": 0.40891605615615845, + "grad_norm": 32.359127044677734, + "learning_rate": 6.01487708363232e-08, + "logits/chosen": -1.0074541568756104, + "logits/rejected": -1.025747299194336, + "logps/chosen": -769.101806640625, + "logps/ref_chosen": -59.642303466796875, + "logps/ref_rejected": -100.95469665527344, + "logps/rejected": -1195.31640625, + "loss": 1.1097, + "margin_dpo/margin_mean": 384.9022216796875, + "margin_dpo/margin_std": 589.19677734375, + "step": 544 + }, + { + "KL/chosen_KL_mean": -657.5155029296875, + "KL/mean": -898.0047607421875, + "KL/rejected_KL_mean": -1138.494140625, + "KL/std": 535.8654174804688, + "epoch": 0.8002936857562408, + "fcm_dpo/beta": 0.0010332402307540178, + "fcm_dpo/delta": -0.10204954445362091, + "fcm_dpo/margin": 480.9786376953125, + "fcm_dpo/q_t": 0.38913267850875854, + "grad_norm": 33.80911636352539, + "learning_rate": 5.9316317682106294e-08, + "logits/chosen": -0.9045934677124023, + "logits/rejected": -0.9373363256454468, + "logps/chosen": -725.1640625, + "logps/ref_chosen": -67.64859771728516, + "logps/ref_rejected": -95.90800476074219, + "logps/rejected": -1234.402099609375, + "loss": 1.0393, + "margin_dpo/margin_mean": 480.9786376953125, + "margin_dpo/margin_std": 609.970458984375, + "step": 545 + }, + { + "KL/chosen_KL_mean": -630.7238159179688, + "KL/mean": -781.1712646484375, + "KL/rejected_KL_mean": -931.6187133789062, + "KL/std": 462.85028076171875, + "epoch": 0.801762114537445, + "fcm_dpo/beta": 0.0010417320299893618, + "fcm_dpo/delta": 0.0893513560295105, + "fcm_dpo/margin": 300.89483642578125, + "fcm_dpo/q_t": 0.4263242185115814, + "grad_norm": 38.7053108215332, + "learning_rate": 5.848888922025552e-08, + "logits/chosen": -0.9846795797348022, + "logits/rejected": -0.9681577682495117, + "logps/chosen": -681.468017578125, + "logps/ref_chosen": -50.744232177734375, + "logps/ref_rejected": -81.86622619628906, + "logps/rejected": -1013.4849853515625, + "loss": 1.1632, + "margin_dpo/margin_mean": 300.89483642578125, + "margin_dpo/margin_std": 500.70306396484375, + "step": 546 + }, + { + "KL/chosen_KL_mean": -622.949462890625, + "KL/mean": -825.2542724609375, + "KL/rejected_KL_mean": -1027.55908203125, + "KL/std": 521.1716918945312, + "epoch": 0.8032305433186491, + "fcm_dpo/beta": 0.0010446913074702024, + "fcm_dpo/delta": -0.02371753193438053, + "fcm_dpo/margin": 404.6095275878906, + "fcm_dpo/q_t": 0.4047175645828247, + "grad_norm": 48.27106857299805, + "learning_rate": 5.7666507254280265e-08, + "logits/chosen": -0.9310117363929749, + "logits/rejected": -0.9426393508911133, + "logps/chosen": -696.63720703125, + "logps/ref_chosen": -73.6877212524414, + "logps/ref_rejected": -90.76136779785156, + "logps/rejected": -1118.3204345703125, + "loss": 1.0885, + "margin_dpo/margin_mean": 404.60955810546875, + "margin_dpo/margin_std": 567.8289184570312, + "step": 547 + }, + { + "KL/chosen_KL_mean": -656.554443359375, + "KL/mean": -834.1263427734375, + "KL/rejected_KL_mean": -1011.6983642578125, + "KL/std": 554.41552734375, + "epoch": 0.8046989720998532, + "fcm_dpo/beta": 0.0010460072662681341, + "fcm_dpo/delta": 0.02962075173854828, + "fcm_dpo/margin": 355.14385986328125, + "fcm_dpo/q_t": 0.42021819949150085, + "grad_norm": 30.078670501708984, + "learning_rate": 5.684919345471029e-08, + "logits/chosen": -1.0434558391571045, + "logits/rejected": -1.0432292222976685, + "logps/chosen": -721.80078125, + "logps/ref_chosen": -65.24634552001953, + "logps/ref_rejected": -94.11807250976562, + "logps/rejected": -1105.81640625, + "loss": 1.1356, + "margin_dpo/margin_mean": 355.14385986328125, + "margin_dpo/margin_std": 616.107177734375, + "step": 548 + }, + { + "KL/chosen_KL_mean": -705.240478515625, + "KL/mean": -842.5841064453125, + "KL/rejected_KL_mean": -979.927734375, + "KL/std": 473.9487609863281, + "epoch": 0.8061674008810573, + "fcm_dpo/beta": 0.0010487986728549004, + "fcm_dpo/delta": 0.010232968255877495, + "fcm_dpo/margin": 274.687255859375, + "fcm_dpo/q_t": 0.4355009198188782, + "grad_norm": 55.40736770629883, + "learning_rate": 5.603696935852426e-08, + "logits/chosen": -1.0249577760696411, + "logits/rejected": -1.0135101079940796, + "logps/chosen": -754.4528198242188, + "logps/ref_chosen": -49.21235656738281, + "logps/ref_rejected": -73.91031646728516, + "logps/rejected": -1053.8380126953125, + "loss": 1.2042, + "margin_dpo/margin_mean": 274.687255859375, + "margin_dpo/margin_std": 570.506103515625, + "step": 549 + }, + { + "KL/chosen_KL_mean": -676.3907470703125, + "KL/mean": -843.399169921875, + "KL/rejected_KL_mean": -1010.4075927734375, + "KL/std": 506.3270263671875, + "epoch": 0.8076358296622613, + "fcm_dpo/beta": 0.0010564997792243958, + "fcm_dpo/delta": 0.04886303097009659, + "fcm_dpo/margin": 334.0168762207031, + "fcm_dpo/q_t": 0.4197149872779846, + "grad_norm": 45.87727737426758, + "learning_rate": 5.5229856368582376e-08, + "logits/chosen": -0.9613184928894043, + "logits/rejected": -0.9839845299720764, + "logps/chosen": -733.1976318359375, + "logps/ref_chosen": -56.80695343017578, + "logps/ref_rejected": -95.12580871582031, + "logps/rejected": -1105.533447265625, + "loss": 1.1355, + "margin_dpo/margin_mean": 334.01690673828125, + "margin_dpo/margin_std": 528.5301513671875, + "step": 550 + }, + { + "KL/chosen_KL_mean": -605.405029296875, + "KL/mean": -882.225341796875, + "KL/rejected_KL_mean": -1159.045654296875, + "KL/std": 553.9127197265625, + "epoch": 0.8091042584434655, + "fcm_dpo/beta": 0.001030082581564784, + "fcm_dpo/delta": -0.18102356791496277, + "fcm_dpo/margin": 553.6406860351562, + "fcm_dpo/q_t": 0.3695809245109558, + "grad_norm": 48.99431610107422, + "learning_rate": 5.4427875753062734e-08, + "logits/chosen": -0.9334255456924438, + "logits/rejected": -0.9928078055381775, + "logps/chosen": -664.5113525390625, + "logps/ref_chosen": -59.10633087158203, + "logps/ref_rejected": -111.67280578613281, + "logps/rejected": -1270.718505859375, + "loss": 0.9681, + "margin_dpo/margin_mean": 553.6406860351562, + "margin_dpo/margin_std": 553.6318359375, + "step": 551 + }, + { + "KL/chosen_KL_mean": -561.7005004882812, + "KL/mean": -867.2351684570312, + "KL/rejected_KL_mean": -1172.769775390625, + "KL/std": 621.8682861328125, + "epoch": 0.8105726872246696, + "fcm_dpo/beta": 0.000981001416221261, + "fcm_dpo/delta": -0.21600230038166046, + "fcm_dpo/margin": 611.0693359375, + "fcm_dpo/q_t": 0.3693755269050598, + "grad_norm": 55.24009704589844, + "learning_rate": 5.363104864490034e-08, + "logits/chosen": -0.9999994039535522, + "logits/rejected": -1.0425043106079102, + "logps/chosen": -624.0551147460938, + "logps/ref_chosen": -62.35459899902344, + "logps/ref_rejected": -104.56210327148438, + "logps/rejected": -1277.33203125, + "loss": 0.9781, + "margin_dpo/margin_mean": 611.0692749023438, + "margin_dpo/margin_std": 701.7575073242188, + "step": 552 + }, + { + "KL/chosen_KL_mean": -648.89208984375, + "KL/mean": -806.120849609375, + "KL/rejected_KL_mean": -963.349609375, + "KL/std": 528.423828125, + "epoch": 0.8120411160058737, + "fcm_dpo/beta": 0.0009901414159685373, + "fcm_dpo/delta": 0.09159143269062042, + "fcm_dpo/margin": 314.45751953125, + "fcm_dpo/q_t": 0.43176624178886414, + "grad_norm": 28.201841354370117, + "learning_rate": 5.2839396041230415e-08, + "logits/chosen": -0.9241914749145508, + "logits/rejected": -0.9152238368988037, + "logps/chosen": -717.1509399414062, + "logps/ref_chosen": -68.25881958007812, + "logps/ref_rejected": -98.0971450805664, + "logps/rejected": -1061.44677734375, + "loss": 1.1743, + "margin_dpo/margin_mean": 314.45751953125, + "margin_dpo/margin_std": 584.146240234375, + "step": 553 + }, + { + "KL/chosen_KL_mean": -661.1310424804688, + "KL/mean": -876.3670654296875, + "KL/rejected_KL_mean": -1091.60302734375, + "KL/std": 559.095947265625, + "epoch": 0.8135095447870778, + "fcm_dpo/beta": 0.000999167561531067, + "fcm_dpo/delta": -0.03213735669851303, + "fcm_dpo/margin": 430.4720153808594, + "fcm_dpo/q_t": 0.4063616394996643, + "grad_norm": 72.81156921386719, + "learning_rate": 5.205293880283551e-08, + "logits/chosen": -0.9473394155502319, + "logits/rejected": -0.9234938621520996, + "logps/chosen": -729.0787353515625, + "logps/ref_chosen": -67.94767761230469, + "logps/ref_rejected": -89.78272247314453, + "logps/rejected": -1181.3857421875, + "loss": 1.1178, + "margin_dpo/margin_mean": 430.4720153808594, + "margin_dpo/margin_std": 707.259521484375, + "step": 554 + }, + { + "KL/chosen_KL_mean": -683.7077026367188, + "KL/mean": -935.7298583984375, + "KL/rejected_KL_mean": -1187.7520751953125, + "KL/std": 597.616943359375, + "epoch": 0.8149779735682819, + "fcm_dpo/beta": 0.0009781017433851957, + "fcm_dpo/delta": -0.09773121029138565, + "fcm_dpo/margin": 504.04437255859375, + "fcm_dpo/q_t": 0.3947869837284088, + "grad_norm": 50.65883255004883, + "learning_rate": 5.127169765359515e-08, + "logits/chosen": -1.0049800872802734, + "logits/rejected": -1.0619277954101562, + "logps/chosen": -737.0382080078125, + "logps/ref_chosen": -53.33049011230469, + "logps/ref_rejected": -108.47937774658203, + "logps/rejected": -1296.2314453125, + "loss": 1.078, + "margin_dpo/margin_mean": 504.04437255859375, + "margin_dpo/margin_std": 776.3997802734375, + "step": 555 + }, + { + "KL/chosen_KL_mean": -654.3289794921875, + "KL/mean": -803.46728515625, + "KL/rejected_KL_mean": -952.6055908203125, + "KL/std": 454.75347900390625, + "epoch": 0.8164464023494861, + "fcm_dpo/beta": 0.0009878533892333508, + "fcm_dpo/delta": 0.10852371156215668, + "fcm_dpo/margin": 298.27655029296875, + "fcm_dpo/q_t": 0.4318218529224396, + "grad_norm": 28.539920806884766, + "learning_rate": 5.049569317994012e-08, + "logits/chosen": -1.0015957355499268, + "logits/rejected": -0.997646689414978, + "logps/chosen": -712.9735107421875, + "logps/ref_chosen": -58.64447021484375, + "logps/ref_rejected": -101.34040832519531, + "logps/rejected": -1053.946044921875, + "loss": 1.1608, + "margin_dpo/margin_mean": 298.2765808105469, + "margin_dpo/margin_std": 454.01800537109375, + "step": 556 + }, + { + "KL/chosen_KL_mean": -716.552978515625, + "KL/mean": -943.0986328125, + "KL/rejected_KL_mean": -1169.6444091796875, + "KL/std": 638.390625, + "epoch": 0.8179148311306902, + "fcm_dpo/beta": 0.0009835660457611084, + "fcm_dpo/delta": -0.04802338778972626, + "fcm_dpo/margin": 453.0914306640625, + "fcm_dpo/q_t": 0.402864545583725, + "grad_norm": 74.13558959960938, + "learning_rate": 4.9724945830310144e-08, + "logits/chosen": -1.025956392288208, + "logits/rejected": -1.0636675357818604, + "logps/chosen": -784.3936157226562, + "logps/ref_chosen": -67.84066009521484, + "logps/ref_rejected": -109.93965911865234, + "logps/rejected": -1279.583984375, + "loss": 1.105, + "margin_dpo/margin_mean": 453.0914306640625, + "margin_dpo/margin_std": 724.352783203125, + "step": 557 + }, + { + "KL/chosen_KL_mean": -638.923095703125, + "KL/mean": -944.195068359375, + "KL/rejected_KL_mean": -1249.467041015625, + "KL/std": 590.7932739257812, + "epoch": 0.8193832599118943, + "fcm_dpo/beta": 0.000956161180511117, + "fcm_dpo/delta": -0.19506213068962097, + "fcm_dpo/margin": 610.5439453125, + "fcm_dpo/q_t": 0.36552464962005615, + "grad_norm": 37.18507385253906, + "learning_rate": 4.8959475914614554e-08, + "logits/chosen": -1.0881562232971191, + "logits/rejected": -1.1110167503356934, + "logps/chosen": -701.2913208007812, + "logps/ref_chosen": -62.36824035644531, + "logps/ref_rejected": -102.16102600097656, + "logps/rejected": -1351.628173828125, + "loss": 0.9725, + "margin_dpo/margin_mean": 610.5439453125, + "margin_dpo/margin_std": 645.1649169921875, + "step": 558 + }, + { + "KL/chosen_KL_mean": -737.7335205078125, + "KL/mean": -990.26171875, + "KL/rejected_KL_mean": -1242.789794921875, + "KL/std": 609.5598754882812, + "epoch": 0.8208516886930984, + "fcm_dpo/beta": 0.000936803175136447, + "fcm_dpo/delta": -0.07669977843761444, + "fcm_dpo/margin": 505.05615234375, + "fcm_dpo/q_t": 0.39399462938308716, + "grad_norm": 30.2044734954834, + "learning_rate": 4.8199303603697614e-08, + "logits/chosen": -1.1786550283432007, + "logits/rejected": -1.18735933303833, + "logps/chosen": -798.4859008789062, + "logps/ref_chosen": -60.752323150634766, + "logps/ref_rejected": -93.44229125976562, + "logps/rejected": -1336.2320556640625, + "loss": 1.0549, + "margin_dpo/margin_mean": 505.05621337890625, + "margin_dpo/margin_std": 663.0927734375, + "step": 559 + }, + { + "KL/chosen_KL_mean": -665.8540649414062, + "KL/mean": -852.7691650390625, + "KL/rejected_KL_mean": -1039.684326171875, + "KL/std": 539.5546875, + "epoch": 0.8223201174743024, + "fcm_dpo/beta": 0.0009346292354166508, + "fcm_dpo/delta": 0.0522555373609066, + "fcm_dpo/margin": 373.83026123046875, + "fcm_dpo/q_t": 0.4206470251083374, + "grad_norm": 34.21842575073242, + "learning_rate": 4.7444448928806615e-08, + "logits/chosen": -0.9070395231246948, + "logits/rejected": -0.8892009258270264, + "logps/chosen": -723.9578857421875, + "logps/ref_chosen": -58.10382080078125, + "logps/ref_rejected": -79.99122619628906, + "logps/rejected": -1119.675537109375, + "loss": 1.1402, + "margin_dpo/margin_mean": 373.83026123046875, + "margin_dpo/margin_std": 590.425537109375, + "step": 560 + }, + { + "KL/chosen_KL_mean": -797.4371948242188, + "KL/mean": -945.9874877929688, + "KL/rejected_KL_mean": -1094.537841796875, + "KL/std": 540.9091186523438, + "epoch": 0.8237885462555066, + "fcm_dpo/beta": 0.0009608013788238168, + "fcm_dpo/delta": 0.11739911884069443, + "fcm_dpo/margin": 297.1005554199219, + "fcm_dpo/q_t": 0.4335824251174927, + "grad_norm": 41.68199157714844, + "learning_rate": 4.669493178106432e-08, + "logits/chosen": -1.0954780578613281, + "logits/rejected": -1.1158504486083984, + "logps/chosen": -848.35009765625, + "logps/ref_chosen": -50.912879943847656, + "logps/ref_rejected": -99.06856536865234, + "logps/rejected": -1193.6063232421875, + "loss": 1.2181, + "margin_dpo/margin_mean": 297.10052490234375, + "margin_dpo/margin_std": 675.0682373046875, + "step": 561 + }, + { + "KL/chosen_KL_mean": -731.2474975585938, + "KL/mean": -947.933837890625, + "KL/rejected_KL_mean": -1164.6201171875, + "KL/std": 597.2823486328125, + "epoch": 0.8252569750367107, + "fcm_dpo/beta": 0.0009560026228427887, + "fcm_dpo/delta": -0.01568439230322838, + "fcm_dpo/margin": 433.3727722167969, + "fcm_dpo/q_t": 0.40745779871940613, + "grad_norm": 44.4998664855957, + "learning_rate": 4.5950771910944596e-08, + "logits/chosen": -1.0381966829299927, + "logits/rejected": -1.0505871772766113, + "logps/chosen": -790.7119140625, + "logps/ref_chosen": -59.46440124511719, + "logps/ref_rejected": -96.54266357421875, + "logps/rejected": -1261.162841796875, + "loss": 1.1008, + "margin_dpo/margin_mean": 433.3727722167969, + "margin_dpo/margin_std": 646.6458740234375, + "step": 562 + }, + { + "KL/chosen_KL_mean": -818.318603515625, + "KL/mean": -976.48193359375, + "KL/rejected_KL_mean": -1134.645263671875, + "KL/std": 644.12939453125, + "epoch": 0.8267254038179148, + "fcm_dpo/beta": 0.0009566263761371374, + "fcm_dpo/delta": -0.042735543102025986, + "fcm_dpo/margin": 316.32666015625, + "fcm_dpo/q_t": 0.42516082525253296, + "grad_norm": 57.121238708496094, + "learning_rate": 4.521198892775202e-08, + "logits/chosen": -1.0335191488265991, + "logits/rejected": -1.0376369953155518, + "logps/chosen": -878.9268188476562, + "logps/ref_chosen": -60.60819625854492, + "logps/ref_rejected": -94.56770324707031, + "logps/rejected": -1229.2130126953125, + "loss": 1.2364, + "margin_dpo/margin_mean": 316.32666015625, + "margin_dpo/margin_std": 755.5484619140625, + "step": 563 + }, + { + "KL/chosen_KL_mean": -720.1054077148438, + "KL/mean": -927.0765380859375, + "KL/rejected_KL_mean": -1134.0478515625, + "KL/std": 552.9932250976562, + "epoch": 0.8281938325991189, + "fcm_dpo/beta": 0.0009554863208904862, + "fcm_dpo/delta": 0.004538660869002342, + "fcm_dpo/margin": 413.9423828125, + "fcm_dpo/q_t": 0.40960174798965454, + "grad_norm": 40.964012145996094, + "learning_rate": 4.447860229910544e-08, + "logits/chosen": -1.0873092412948608, + "logits/rejected": -1.0802876949310303, + "logps/chosen": -794.373779296875, + "logps/ref_chosen": -74.26837921142578, + "logps/ref_rejected": -93.23818969726562, + "logps/rejected": -1227.285888671875, + "loss": 1.0915, + "margin_dpo/margin_mean": 413.9423828125, + "margin_dpo/margin_std": 534.1187744140625, + "step": 564 + }, + { + "KL/chosen_KL_mean": -773.20751953125, + "KL/mean": -995.1334228515625, + "KL/rejected_KL_mean": -1217.059326171875, + "KL/std": 649.26171875, + "epoch": 0.8296622613803231, + "fcm_dpo/beta": 0.0009499896550551057, + "fcm_dpo/delta": -0.022613905370235443, + "fcm_dpo/margin": 443.85186767578125, + "fcm_dpo/q_t": 0.4094652831554413, + "grad_norm": 60.46991729736328, + "learning_rate": 4.375063135042445e-08, + "logits/chosen": -1.0429582595825195, + "logits/rejected": -1.0467889308929443, + "logps/chosen": -842.2274169921875, + "logps/ref_chosen": -69.0199203491211, + "logps/ref_rejected": -85.7789306640625, + "logps/rejected": -1302.83837890625, + "loss": 1.1303, + "margin_dpo/margin_mean": 443.85186767578125, + "margin_dpo/margin_std": 780.3602294921875, + "step": 565 + }, + { + "KL/chosen_KL_mean": -731.8004760742188, + "KL/mean": -975.913818359375, + "KL/rejected_KL_mean": -1220.0272216796875, + "KL/std": 664.28466796875, + "epoch": 0.8311306901615272, + "fcm_dpo/beta": 0.000945397128816694, + "fcm_dpo/delta": -0.06484313309192657, + "fcm_dpo/margin": 488.22674560546875, + "fcm_dpo/q_t": 0.39865684509277344, + "grad_norm": 32.33478927612305, + "learning_rate": 4.3028095264420525e-08, + "logits/chosen": -1.0355273485183716, + "logits/rejected": -1.0623806715011597, + "logps/chosen": -798.3458251953125, + "logps/ref_chosen": -66.5453109741211, + "logps/ref_rejected": -103.86932373046875, + "logps/rejected": -1323.896484375, + "loss": 1.1021, + "margin_dpo/margin_mean": 488.22674560546875, + "margin_dpo/margin_std": 773.612060546875, + "step": 566 + }, + { + "KL/chosen_KL_mean": -693.953369140625, + "KL/mean": -878.732666015625, + "KL/rejected_KL_mean": -1063.511962890625, + "KL/std": 454.4079284667969, + "epoch": 0.8325991189427313, + "fcm_dpo/beta": 0.0009434693492949009, + "fcm_dpo/delta": 0.053218990564346313, + "fcm_dpo/margin": 369.55865478515625, + "fcm_dpo/q_t": 0.41841405630111694, + "grad_norm": 40.58536911010742, + "learning_rate": 4.231101308059165e-08, + "logits/chosen": -1.1621546745300293, + "logits/rejected": -1.1755425930023193, + "logps/chosen": -746.8116455078125, + "logps/ref_chosen": -52.85829544067383, + "logps/ref_rejected": -85.37095642089844, + "logps/rejected": -1148.8829345703125, + "loss": 1.1196, + "margin_dpo/margin_mean": 369.55865478515625, + "margin_dpo/margin_std": 497.5330505371094, + "step": 567 + }, + { + "KL/chosen_KL_mean": -696.86279296875, + "KL/mean": -949.0692749023438, + "KL/rejected_KL_mean": -1201.275634765625, + "KL/std": 546.216064453125, + "epoch": 0.8340675477239354, + "fcm_dpo/beta": 0.0009356088703498244, + "fcm_dpo/delta": -0.07556474953889847, + "fcm_dpo/margin": 504.4129333496094, + "fcm_dpo/q_t": 0.39082616567611694, + "grad_norm": 34.73697280883789, + "learning_rate": 4.1599403694720145e-08, + "logits/chosen": -1.0284827947616577, + "logits/rejected": -1.069136142730713, + "logps/chosen": -742.05517578125, + "logps/ref_chosen": -45.1923828125, + "logps/ref_rejected": -89.09236907958984, + "logps/rejected": -1290.3680419921875, + "loss": 1.0361, + "margin_dpo/margin_mean": 504.41290283203125, + "margin_dpo/margin_std": 577.991943359375, + "step": 568 + }, + { + "KL/chosen_KL_mean": -800.5419921875, + "KL/mean": -999.2967529296875, + "KL/rejected_KL_mean": -1198.051513671875, + "KL/std": 707.2173461914062, + "epoch": 0.8355359765051396, + "fcm_dpo/beta": 0.0009422843577340245, + "fcm_dpo/delta": 0.025416847318410873, + "fcm_dpo/margin": 397.50946044921875, + "fcm_dpo/q_t": 0.4149819612503052, + "grad_norm": 68.37850952148438, + "learning_rate": 4.089328585837512e-08, + "logits/chosen": -1.0683939456939697, + "logits/rejected": -1.0735970735549927, + "logps/chosen": -864.2625732421875, + "logps/ref_chosen": -63.72056198120117, + "logps/ref_rejected": -79.10325622558594, + "logps/rejected": -1277.15478515625, + "loss": 1.1639, + "margin_dpo/margin_mean": 397.50946044921875, + "margin_dpo/margin_std": 739.0003051757812, + "step": 569 + }, + { + "KL/chosen_KL_mean": -741.1410522460938, + "KL/mean": -932.583251953125, + "KL/rejected_KL_mean": -1124.025390625, + "KL/std": 565.6273803710938, + "epoch": 0.8370044052863436, + "fcm_dpo/beta": 0.0009413023362867534, + "fcm_dpo/delta": 0.04106989875435829, + "fcm_dpo/margin": 382.88421630859375, + "fcm_dpo/q_t": 0.4200833737850189, + "grad_norm": 34.74723815917969, + "learning_rate": 4.019267817841834e-08, + "logits/chosen": -1.177173137664795, + "logits/rejected": -1.1700718402862549, + "logps/chosen": -802.755615234375, + "logps/ref_chosen": -61.61454391479492, + "logps/ref_rejected": -82.14186096191406, + "logps/rejected": -1206.167236328125, + "loss": 1.1407, + "margin_dpo/margin_mean": 382.8842468261719, + "margin_dpo/margin_std": 631.297607421875, + "step": 570 + }, + { + "KL/chosen_KL_mean": -775.4749755859375, + "KL/mean": -999.5449829101562, + "KL/rejected_KL_mean": -1223.614990234375, + "KL/std": 576.8671875, + "epoch": 0.8384728340675477, + "fcm_dpo/beta": 0.00093979382654652, + "fcm_dpo/delta": -0.02226072922348976, + "fcm_dpo/margin": 448.14007568359375, + "fcm_dpo/q_t": 0.4062590003013611, + "grad_norm": 51.04140090942383, + "learning_rate": 3.9497599116513705e-08, + "logits/chosen": -1.042744755744934, + "logits/rejected": -1.0550953149795532, + "logps/chosen": -828.529052734375, + "logps/ref_chosen": -53.05406188964844, + "logps/ref_rejected": -91.33682250976562, + "logps/rejected": -1314.9517822265625, + "loss": 1.1144, + "margin_dpo/margin_mean": 448.14007568359375, + "margin_dpo/margin_std": 723.029052734375, + "step": 571 + }, + { + "KL/chosen_KL_mean": -804.140625, + "KL/mean": -1039.684814453125, + "KL/rejected_KL_mean": -1275.22900390625, + "KL/std": 669.639404296875, + "epoch": 0.8399412628487518, + "fcm_dpo/beta": 0.0009325648425146937, + "fcm_dpo/delta": -0.04140661656856537, + "fcm_dpo/margin": 471.0883483886719, + "fcm_dpo/q_t": 0.4063248038291931, + "grad_norm": 32.49717712402344, + "learning_rate": 3.880806698864086e-08, + "logits/chosen": -1.0944292545318604, + "logits/rejected": -1.1295243501663208, + "logps/chosen": -852.5999755859375, + "logps/ref_chosen": -48.45928955078125, + "logps/ref_rejected": -83.55703735351562, + "logps/rejected": -1358.7861328125, + "loss": 1.12, + "margin_dpo/margin_mean": 471.08837890625, + "margin_dpo/margin_std": 815.548828125, + "step": 572 + }, + { + "KL/chosen_KL_mean": -792.255615234375, + "KL/mean": -1000.1666259765625, + "KL/rejected_KL_mean": -1208.07763671875, + "KL/std": 596.44189453125, + "epoch": 0.8414096916299559, + "fcm_dpo/beta": 0.0009360272670164704, + "fcm_dpo/delta": 0.011164238676428795, + "fcm_dpo/margin": 415.822021484375, + "fcm_dpo/q_t": 0.41275107860565186, + "grad_norm": 29.790285110473633, + "learning_rate": 3.812409996461275e-08, + "logits/chosen": -1.1403576135635376, + "logits/rejected": -1.1524157524108887, + "logps/chosen": -843.8782348632812, + "logps/ref_chosen": -51.62262725830078, + "logps/ref_rejected": -85.32499694824219, + "logps/rejected": -1293.402587890625, + "loss": 1.1041, + "margin_dpo/margin_mean": 415.822021484375, + "margin_dpo/margin_std": 595.5224609375, + "step": 573 + }, + { + "KL/chosen_KL_mean": -698.723876953125, + "KL/mean": -926.019287109375, + "KL/rejected_KL_mean": -1153.314697265625, + "KL/std": 534.9053344726562, + "epoch": 0.8428781204111601, + "fcm_dpo/beta": 0.0009335580398328602, + "fcm_dpo/delta": -0.025494040921330452, + "fcm_dpo/margin": 454.5906982421875, + "fcm_dpo/q_t": 0.4033309519290924, + "grad_norm": 44.21940612792969, + "learning_rate": 3.74457160675965e-08, + "logits/chosen": -1.1207423210144043, + "logits/rejected": -1.147803783416748, + "logps/chosen": -749.768310546875, + "logps/ref_chosen": -51.04446029663086, + "logps/ref_rejected": -92.80640411376953, + "logps/rejected": -1246.12109375, + "loss": 1.0836, + "margin_dpo/margin_mean": 454.5906982421875, + "margin_dpo/margin_std": 616.3843994140625, + "step": 574 + }, + { + "KL/chosen_KL_mean": -788.0826416015625, + "KL/mean": -997.0467529296875, + "KL/rejected_KL_mean": -1206.0108642578125, + "KL/std": 558.839599609375, + "epoch": 0.8443465491923642, + "fcm_dpo/beta": 0.0009250047150999308, + "fcm_dpo/delta": 0.012776091694831848, + "fcm_dpo/margin": 417.9283447265625, + "fcm_dpo/q_t": 0.41284099221229553, + "grad_norm": 47.46909713745117, + "learning_rate": 3.677293317363864e-08, + "logits/chosen": -0.9957201480865479, + "logits/rejected": -1.003206491470337, + "logps/chosen": -859.8727416992188, + "logps/ref_chosen": -71.7901382446289, + "logps/ref_rejected": -95.38619995117188, + "logps/rejected": -1301.3970947265625, + "loss": 1.1451, + "margin_dpo/margin_mean": 417.9283447265625, + "margin_dpo/margin_std": 721.6036376953125, + "step": 575 + }, + { + "KL/chosen_KL_mean": -724.9219970703125, + "KL/mean": -881.224853515625, + "KL/rejected_KL_mean": -1037.5277099609375, + "KL/std": 497.8431396484375, + "epoch": 0.8458149779735683, + "fcm_dpo/beta": 0.0009450684301555157, + "fcm_dpo/delta": 0.10788638889789581, + "fcm_dpo/margin": 312.60577392578125, + "fcm_dpo/q_t": 0.4328186810016632, + "grad_norm": 32.42967224121094, + "learning_rate": 3.6105769011194224e-08, + "logits/chosen": -1.0785043239593506, + "logits/rejected": -1.1083261966705322, + "logps/chosen": -779.1849365234375, + "logps/ref_chosen": -54.262962341308594, + "logps/ref_rejected": -100.75428009033203, + "logps/rejected": -1138.281982421875, + "loss": 1.1847, + "margin_dpo/margin_mean": 312.60577392578125, + "margin_dpo/margin_std": 577.78369140625, + "step": 576 + }, + { + "KL/chosen_KL_mean": -675.4276123046875, + "KL/mean": -879.83935546875, + "KL/rejected_KL_mean": -1084.251220703125, + "KL/std": 572.3829345703125, + "epoch": 0.8472834067547724, + "fcm_dpo/beta": 0.0009535005083307624, + "fcm_dpo/delta": 0.01057706493884325, + "fcm_dpo/margin": 408.823486328125, + "fcm_dpo/q_t": 0.41230309009552, + "grad_norm": 30.890453338623047, + "learning_rate": 3.5444241160659304e-08, + "logits/chosen": -1.104528546333313, + "logits/rejected": -1.0938575267791748, + "logps/chosen": -737.3373413085938, + "logps/ref_chosen": -61.909706115722656, + "logps/ref_rejected": -84.07069396972656, + "logps/rejected": -1168.32177734375, + "loss": 1.1236, + "margin_dpo/margin_mean": 408.8235168457031, + "margin_dpo/margin_std": 617.4876098632812, + "step": 577 + }, + { + "KL/chosen_KL_mean": -653.1123046875, + "KL/mean": -878.5861206054688, + "KL/rejected_KL_mean": -1104.0599365234375, + "KL/std": 559.1902465820312, + "epoch": 0.8487518355359766, + "fcm_dpo/beta": 0.0009461954468861222, + "fcm_dpo/delta": -0.028605271130800247, + "fcm_dpo/margin": 450.94769287109375, + "fcm_dpo/q_t": 0.40265679359436035, + "grad_norm": 39.44662094116211, + "learning_rate": 3.478836705390808e-08, + "logits/chosen": -0.960713803768158, + "logits/rejected": -0.9943492412567139, + "logps/chosen": -702.3759765625, + "logps/ref_chosen": -49.26368713378906, + "logps/ref_rejected": -83.4362564086914, + "logps/rejected": -1187.4962158203125, + "loss": 1.0739, + "margin_dpo/margin_mean": 450.94769287109375, + "margin_dpo/margin_std": 569.8036499023438, + "step": 578 + }, + { + "KL/chosen_KL_mean": -740.7020263671875, + "KL/mean": -872.2453002929688, + "KL/rejected_KL_mean": -1003.78857421875, + "KL/std": 550.201416015625, + "epoch": 0.8502202643171806, + "fcm_dpo/beta": 0.0009595105657353997, + "fcm_dpo/delta": 0.050605472177267075, + "fcm_dpo/margin": 263.0865478515625, + "fcm_dpo/q_t": 0.44163453578948975, + "grad_norm": 62.6992301940918, + "learning_rate": 3.41381639738331e-08, + "logits/chosen": -0.9965687394142151, + "logits/rejected": -0.9950494766235352, + "logps/chosen": -799.587890625, + "logps/ref_chosen": -58.88581848144531, + "logps/ref_rejected": -94.78762817382812, + "logps/rejected": -1098.576171875, + "loss": 1.2315, + "margin_dpo/margin_mean": 263.0865478515625, + "margin_dpo/margin_std": 606.66259765625, + "step": 579 + }, + { + "KL/chosen_KL_mean": -536.5447998046875, + "KL/mean": -791.0418701171875, + "KL/rejected_KL_mean": -1045.5389404296875, + "KL/std": 616.0582275390625, + "epoch": 0.8516886930983847, + "fcm_dpo/beta": 0.0009441368165425956, + "fcm_dpo/delta": -0.08517200499773026, + "fcm_dpo/margin": 508.99420166015625, + "fcm_dpo/q_t": 0.3952370882034302, + "grad_norm": 31.18793487548828, + "learning_rate": 3.349364905389032e-08, + "logits/chosen": -0.8859065771102905, + "logits/rejected": -0.9189168214797974, + "logps/chosen": -585.2516479492188, + "logps/ref_chosen": -48.70683670043945, + "logps/ref_rejected": -81.7583999633789, + "logps/rejected": -1127.29736328125, + "loss": 1.0568, + "margin_dpo/margin_mean": 508.9941711425781, + "margin_dpo/margin_std": 707.2589111328125, + "step": 580 + }, + { + "KL/chosen_KL_mean": -736.6950073242188, + "KL/mean": -915.5865478515625, + "KL/rejected_KL_mean": -1094.477783203125, + "KL/std": 581.46533203125, + "epoch": 0.8531571218795888, + "fcm_dpo/beta": 0.0009529366507194936, + "fcm_dpo/delta": 0.06105329841375351, + "fcm_dpo/margin": 357.78289794921875, + "fcm_dpo/q_t": 0.4243428111076355, + "grad_norm": 41.60260009765625, + "learning_rate": 3.285483927764726e-08, + "logits/chosen": -1.1242549419403076, + "logits/rejected": -1.1350033283233643, + "logps/chosen": -798.9173583984375, + "logps/ref_chosen": -62.22235107421875, + "logps/ref_rejected": -91.73568725585938, + "logps/rejected": -1186.213623046875, + "loss": 1.1564, + "margin_dpo/margin_mean": 357.78289794921875, + "margin_dpo/margin_std": 635.5386962890625, + "step": 581 + }, + { + "KL/chosen_KL_mean": -648.818603515625, + "KL/mean": -849.9896240234375, + "KL/rejected_KL_mean": -1051.16064453125, + "KL/std": 508.7437744140625, + "epoch": 0.8546255506607929, + "fcm_dpo/beta": 0.0009470300283282995, + "fcm_dpo/delta": -0.08303224295377731, + "fcm_dpo/margin": 402.34197998046875, + "fcm_dpo/q_t": 0.41153034567832947, + "grad_norm": 39.270023345947266, + "learning_rate": 3.222175147833556e-08, + "logits/chosen": -1.0631489753723145, + "logits/rejected": -1.087165355682373, + "logps/chosen": -707.0472412109375, + "logps/ref_chosen": -58.228660583496094, + "logps/ref_rejected": -110.06959533691406, + "logps/rejected": -1161.230224609375, + "loss": 1.1143, + "margin_dpo/margin_mean": 402.34197998046875, + "margin_dpo/margin_std": 554.0859375, + "step": 582 + }, + { + "KL/chosen_KL_mean": -734.060546875, + "KL/mean": -855.7951049804688, + "KL/rejected_KL_mean": -977.5296630859375, + "KL/std": 547.7103271484375, + "epoch": 0.856093979441997, + "fcm_dpo/beta": 0.0009388748439960182, + "fcm_dpo/delta": -0.0015546621289104223, + "fcm_dpo/margin": 243.4691162109375, + "fcm_dpo/q_t": 0.44874462485313416, + "grad_norm": 69.85670471191406, + "learning_rate": 3.159440233840763e-08, + "logits/chosen": -1.029801845550537, + "logits/rejected": -1.0272910594940186, + "logps/chosen": -790.9234619140625, + "logps/ref_chosen": -56.86286163330078, + "logps/ref_rejected": -88.4039306640625, + "logps/rejected": -1065.93359375, + "loss": 1.2665, + "margin_dpo/margin_mean": 243.4691162109375, + "margin_dpo/margin_std": 655.7301025390625, + "step": 583 + }, + { + "KL/chosen_KL_mean": -614.206787109375, + "KL/mean": -863.93896484375, + "KL/rejected_KL_mean": -1113.671142578125, + "KL/std": 565.6908569335938, + "epoch": 0.8575624082232012, + "fcm_dpo/beta": 0.0009302167454734445, + "fcm_dpo/delta": -0.06771711260080338, + "fcm_dpo/margin": 499.46441650390625, + "fcm_dpo/q_t": 0.3950466513633728, + "grad_norm": 31.617774963378906, + "learning_rate": 3.0972808389096635e-08, + "logits/chosen": -1.0071210861206055, + "logits/rejected": -1.0183664560317993, + "logps/chosen": -671.107421875, + "logps/ref_chosen": -56.90068054199219, + "logps/ref_rejected": -97.63606262207031, + "logps/rejected": -1211.3072509765625, + "loss": 1.0467, + "margin_dpo/margin_mean": 499.46441650390625, + "margin_dpo/margin_std": 599.928466796875, + "step": 584 + }, + { + "KL/chosen_KL_mean": -719.5697631835938, + "KL/mean": -944.973388671875, + "KL/rejected_KL_mean": -1170.376953125, + "KL/std": 628.8306884765625, + "epoch": 0.8590308370044053, + "fcm_dpo/beta": 0.0009213717421516776, + "fcm_dpo/delta": -0.016240080818533897, + "fcm_dpo/margin": 450.80718994140625, + "fcm_dpo/q_t": 0.4065605700016022, + "grad_norm": 37.25502395629883, + "learning_rate": 3.035698600998121e-08, + "logits/chosen": -1.0849614143371582, + "logits/rejected": -1.1072694063186646, + "logps/chosen": -780.543701171875, + "logps/ref_chosen": -60.973968505859375, + "logps/ref_rejected": -84.16952514648438, + "logps/rejected": -1254.5465087890625, + "loss": 1.1214, + "margin_dpo/margin_mean": 450.8072509765625, + "margin_dpo/margin_std": 750.4278564453125, + "step": 585 + }, + { + "KL/chosen_KL_mean": -764.2457275390625, + "KL/mean": -916.5640869140625, + "KL/rejected_KL_mean": -1068.88232421875, + "KL/std": 550.64111328125, + "epoch": 0.8604992657856094, + "fcm_dpo/beta": 0.0009377297828905284, + "fcm_dpo/delta": 0.11785154044628143, + "fcm_dpo/margin": 304.63677978515625, + "fcm_dpo/q_t": 0.4342753291130066, + "grad_norm": 32.1750602722168, + "learning_rate": 2.974695142855388e-08, + "logits/chosen": -1.0714232921600342, + "logits/rejected": -1.0927150249481201, + "logps/chosen": -821.101318359375, + "logps/ref_chosen": -56.85559844970703, + "logps/ref_rejected": -91.80261993408203, + "logps/rejected": -1160.68505859375, + "loss": 1.1966, + "margin_dpo/margin_mean": 304.63677978515625, + "margin_dpo/margin_std": 600.1655883789062, + "step": 586 + }, + { + "KL/chosen_KL_mean": -531.717041015625, + "KL/mean": -734.9912109375, + "KL/rejected_KL_mean": -938.265380859375, + "KL/std": 562.0120849609375, + "epoch": 0.8619676945668135, + "fcm_dpo/beta": 0.0009496827842667699, + "fcm_dpo/delta": 0.01429927907884121, + "fcm_dpo/margin": 406.54833984375, + "fcm_dpo/q_t": 0.4112315773963928, + "grad_norm": 39.31088638305664, + "learning_rate": 2.9142720719793122e-08, + "logits/chosen": -1.075244426727295, + "logits/rejected": -1.0990477800369263, + "logps/chosen": -576.4086303710938, + "logps/ref_chosen": -44.69159698486328, + "logps/ref_rejected": -82.62385559082031, + "logps/rejected": -1020.8892211914062, + "loss": 1.1065, + "margin_dpo/margin_mean": 406.54833984375, + "margin_dpo/margin_std": 574.6107177734375, + "step": 587 + }, + { + "KL/chosen_KL_mean": -691.1477661132812, + "KL/mean": -867.112060546875, + "KL/rejected_KL_mean": -1043.0762939453125, + "KL/std": 485.6700134277344, + "epoch": 0.8634361233480177, + "fcm_dpo/beta": 0.0009533903794363141, + "fcm_dpo/delta": 0.06662734597921371, + "fcm_dpo/margin": 351.92852783203125, + "fcm_dpo/q_t": 0.4219910204410553, + "grad_norm": 29.434553146362305, + "learning_rate": 2.8544309805740018e-08, + "logits/chosen": -1.0492743253707886, + "logits/rejected": -1.0743939876556396, + "logps/chosen": -741.4427490234375, + "logps/ref_chosen": -50.29494857788086, + "logps/ref_rejected": -107.36988067626953, + "logps/rejected": -1150.4461669921875, + "loss": 1.1371, + "margin_dpo/margin_mean": 351.92852783203125, + "margin_dpo/margin_std": 510.2389831542969, + "step": 588 + }, + { + "KL/chosen_KL_mean": -680.4524536132812, + "KL/mean": -915.0347290039062, + "KL/rejected_KL_mean": -1149.616943359375, + "KL/std": 550.6396484375, + "epoch": 0.8649045521292217, + "fcm_dpo/beta": 0.0009546533692628145, + "fcm_dpo/delta": -0.05010441318154335, + "fcm_dpo/margin": 469.16448974609375, + "fcm_dpo/q_t": 0.3972678780555725, + "grad_norm": 34.32419204711914, + "learning_rate": 2.7951734455078786e-08, + "logits/chosen": -1.0105873346328735, + "logits/rejected": -1.0250484943389893, + "logps/chosen": -740.38232421875, + "logps/ref_chosen": -59.929908752441406, + "logps/ref_rejected": -111.65534973144531, + "logps/rejected": -1261.2723388671875, + "loss": 1.0566, + "margin_dpo/margin_mean": 469.16448974609375, + "margin_dpo/margin_std": 573.5706787109375, + "step": 589 + }, + { + "KL/chosen_KL_mean": -611.4031982421875, + "KL/mean": -840.3101196289062, + "KL/rejected_KL_mean": -1069.217041015625, + "KL/std": 545.7060546875, + "epoch": 0.8663729809104258, + "fcm_dpo/beta": 0.0009457060368731618, + "fcm_dpo/delta": -0.034474555402994156, + "fcm_dpo/margin": 457.81378173828125, + "fcm_dpo/q_t": 0.4021071493625641, + "grad_norm": 26.10135269165039, + "learning_rate": 2.736501028272095e-08, + "logits/chosen": -0.9935860633850098, + "logits/rejected": -1.0191072225570679, + "logps/chosen": -667.2130126953125, + "logps/ref_chosen": -55.80979537963867, + "logps/ref_rejected": -106.06282043457031, + "logps/rejected": -1175.2799072265625, + "loss": 1.0732, + "margin_dpo/margin_mean": 457.81378173828125, + "margin_dpo/margin_std": 602.8309936523438, + "step": 590 + }, + { + "KL/chosen_KL_mean": -662.7632446289062, + "KL/mean": -870.50341796875, + "KL/rejected_KL_mean": -1078.24365234375, + "KL/std": 519.09814453125, + "epoch": 0.8678414096916299, + "fcm_dpo/beta": 0.0009449812932871282, + "fcm_dpo/delta": 0.007675642147660255, + "fcm_dpo/margin": 415.48028564453125, + "fcm_dpo/q_t": 0.4091309905052185, + "grad_norm": 33.07461166381836, + "learning_rate": 2.678415274939408e-08, + "logits/chosen": -1.0695421695709229, + "logits/rejected": -1.0605621337890625, + "logps/chosen": -719.00390625, + "logps/ref_chosen": -56.24061965942383, + "logps/ref_rejected": -83.78629302978516, + "logps/rejected": -1162.02978515625, + "loss": 1.1031, + "margin_dpo/margin_mean": 415.48028564453125, + "margin_dpo/margin_std": 583.4301147460938, + "step": 591 + }, + { + "KL/chosen_KL_mean": -719.1552734375, + "KL/mean": -909.2796020507812, + "KL/rejected_KL_mean": -1099.40380859375, + "KL/std": 545.5037841796875, + "epoch": 0.869309838472834, + "fcm_dpo/beta": 0.0009508398361504078, + "fcm_dpo/delta": 0.039905961602926254, + "fcm_dpo/margin": 380.2485656738281, + "fcm_dpo/q_t": 0.4189043939113617, + "grad_norm": 33.25096893310547, + "learning_rate": 2.6209177161234442e-08, + "logits/chosen": -1.052534818649292, + "logits/rejected": -1.05497407913208, + "logps/chosen": -767.0955810546875, + "logps/ref_chosen": -47.94025421142578, + "logps/ref_rejected": -75.73287963867188, + "logps/rejected": -1175.13671875, + "loss": 1.1759, + "margin_dpo/margin_mean": 380.24853515625, + "margin_dpo/margin_std": 739.9800415039062, + "step": 592 + }, + { + "KL/chosen_KL_mean": -685.7623901367188, + "KL/mean": -834.489990234375, + "KL/rejected_KL_mean": -983.2177124023438, + "KL/std": 593.387939453125, + "epoch": 0.8707782672540382, + "fcm_dpo/beta": 0.0009585937950760126, + "fcm_dpo/delta": 0.026102518662810326, + "fcm_dpo/margin": 297.45526123046875, + "fcm_dpo/q_t": 0.43642657995224, + "grad_norm": 60.29446029663086, + "learning_rate": 2.564009866938349e-08, + "logits/chosen": -0.925950825214386, + "logits/rejected": -0.916424036026001, + "logps/chosen": -734.453125, + "logps/ref_chosen": -48.690757751464844, + "logps/ref_rejected": -60.90800094604492, + "logps/rejected": -1044.125732421875, + "loss": 1.2163, + "margin_dpo/margin_mean": 297.45526123046875, + "margin_dpo/margin_std": 659.6752319335938, + "step": 593 + }, + { + "KL/chosen_KL_mean": -658.8277587890625, + "KL/mean": -844.7755737304688, + "KL/rejected_KL_mean": -1030.723388671875, + "KL/std": 573.47119140625, + "epoch": 0.8722466960352423, + "fcm_dpo/beta": 0.0009541836334392428, + "fcm_dpo/delta": -0.046220000833272934, + "fcm_dpo/margin": 371.89556884765625, + "fcm_dpo/q_t": 0.41900911927223206, + "grad_norm": 36.9598388671875, + "learning_rate": 2.5076932269588708e-08, + "logits/chosen": -1.0531035661697388, + "logits/rejected": -1.044195294380188, + "logps/chosen": -713.7626953125, + "logps/ref_chosen": -54.93488693237305, + "logps/ref_rejected": -86.09967803955078, + "logps/rejected": -1116.822998046875, + "loss": 1.1458, + "margin_dpo/margin_mean": 371.8955993652344, + "margin_dpo/margin_std": 604.8126831054688, + "step": 594 + }, + { + "KL/chosen_KL_mean": -617.7359619140625, + "KL/mean": -833.1766357421875, + "KL/rejected_KL_mean": -1048.6171875, + "KL/std": 538.6633911132812, + "epoch": 0.8737151248164464, + "fcm_dpo/beta": 0.0009470278164371848, + "fcm_dpo/delta": -0.00846764538437128, + "fcm_dpo/margin": 430.8812255859375, + "fcm_dpo/q_t": 0.4093731641769409, + "grad_norm": 39.55894470214844, + "learning_rate": 2.451969280180849e-08, + "logits/chosen": -1.0494012832641602, + "logits/rejected": -1.0700163841247559, + "logps/chosen": -667.1563720703125, + "logps/ref_chosen": -49.4204216003418, + "logps/ref_rejected": -80.62731170654297, + "logps/rejected": -1129.24462890625, + "loss": 1.0908, + "margin_dpo/margin_mean": 430.8812255859375, + "margin_dpo/margin_std": 596.7766723632812, + "step": 595 + }, + { + "KL/chosen_KL_mean": -716.04296875, + "KL/mean": -864.3089599609375, + "KL/rejected_KL_mean": -1012.574951171875, + "KL/std": 536.0817260742188, + "epoch": 0.8751835535976505, + "fcm_dpo/beta": 0.0009654526365920901, + "fcm_dpo/delta": 0.11698772758245468, + "fcm_dpo/margin": 296.531982421875, + "fcm_dpo/q_t": 0.43718546628952026, + "grad_norm": 68.12952423095703, + "learning_rate": 2.396839494982103e-08, + "logits/chosen": -1.018229603767395, + "logits/rejected": -0.9841310381889343, + "logps/chosen": -775.834716796875, + "logps/ref_chosen": -59.791683197021484, + "logps/ref_rejected": -80.09111785888672, + "logps/rejected": -1092.666015625, + "loss": 1.2079, + "margin_dpo/margin_mean": 296.531982421875, + "margin_dpo/margin_std": 639.1839599609375, + "step": 596 + }, + { + "KL/chosen_KL_mean": -683.271728515625, + "KL/mean": -945.9227905273438, + "KL/rejected_KL_mean": -1208.5738525390625, + "KL/std": 629.5780639648438, + "epoch": 0.8766519823788547, + "fcm_dpo/beta": 0.0009458234999328852, + "fcm_dpo/delta": -0.10513734817504883, + "fcm_dpo/margin": 525.3020629882812, + "fcm_dpo/q_t": 0.3902336359024048, + "grad_norm": 26.535120010375977, + "learning_rate": 2.3423053240837514e-08, + "logits/chosen": -0.9855027198791504, + "logits/rejected": -1.036144733428955, + "logps/chosen": -740.5325317382812, + "logps/ref_chosen": -57.26078796386719, + "logps/ref_rejected": -100.6937255859375, + "logps/rejected": -1309.267578125, + "loss": 1.0533, + "margin_dpo/margin_mean": 525.3020629882812, + "margin_dpo/margin_std": 688.6737060546875, + "step": 597 + }, + { + "KL/chosen_KL_mean": -684.9342041015625, + "KL/mean": -889.61328125, + "KL/rejected_KL_mean": -1094.292236328125, + "KL/std": 542.2847900390625, + "epoch": 0.8781204111600588, + "fcm_dpo/beta": 0.0009448026539757848, + "fcm_dpo/delta": 0.012614801526069641, + "fcm_dpo/margin": 409.35821533203125, + "fcm_dpo/q_t": 0.41108059883117676, + "grad_norm": 40.530555725097656, + "learning_rate": 2.2883682045119062e-08, + "logits/chosen": -1.0675361156463623, + "logits/rejected": -1.0781702995300293, + "logps/chosen": -737.45263671875, + "logps/ref_chosen": -52.51850509643555, + "logps/ref_rejected": -89.44385528564453, + "logps/rejected": -1183.7362060546875, + "loss": 1.1156, + "margin_dpo/margin_mean": 409.3581848144531, + "margin_dpo/margin_std": 592.7867431640625, + "step": 598 + }, + { + "KL/chosen_KL_mean": -699.3439331054688, + "KL/mean": -867.4818725585938, + "KL/rejected_KL_mean": -1035.619873046875, + "KL/std": 521.607421875, + "epoch": 0.8795888399412628, + "fcm_dpo/beta": 0.0009433372761122882, + "fcm_dpo/delta": -0.04999526962637901, + "fcm_dpo/margin": 336.27593994140625, + "fcm_dpo/q_t": 0.4230959117412567, + "grad_norm": 32.54823303222656, + "learning_rate": 2.2350295575598367e-08, + "logits/chosen": -1.0585663318634033, + "logits/rejected": -1.0668901205062866, + "logps/chosen": -749.1466064453125, + "logps/ref_chosen": -49.802677154541016, + "logps/ref_rejected": -82.978515625, + "logps/rejected": -1118.598388671875, + "loss": 1.1528, + "margin_dpo/margin_mean": 336.27593994140625, + "margin_dpo/margin_std": 495.86431884765625, + "step": 599 + }, + { + "KL/chosen_KL_mean": -743.595947265625, + "KL/mean": -904.8917846679688, + "KL/rejected_KL_mean": -1066.1876220703125, + "KL/std": 529.2329711914062, + "epoch": 0.8810572687224669, + "fcm_dpo/beta": 0.0009553448762744665, + "fcm_dpo/delta": 0.0948304608464241, + "fcm_dpo/margin": 322.5916748046875, + "fcm_dpo/q_t": 0.4312170147895813, + "grad_norm": 32.78029251098633, + "learning_rate": 2.1822907887504932e-08, + "logits/chosen": -1.1024036407470703, + "logits/rejected": -1.0984766483306885, + "logps/chosen": -810.03076171875, + "logps/ref_chosen": -66.43487548828125, + "logps/ref_rejected": -85.45649719238281, + "logps/rejected": -1151.64404296875, + "loss": 1.1881, + "margin_dpo/margin_mean": 322.5916748046875, + "margin_dpo/margin_std": 638.26123046875, + "step": 600 + }, + { + "KL/chosen_KL_mean": -767.73779296875, + "KL/mean": -976.8656005859375, + "KL/rejected_KL_mean": -1185.993408203125, + "KL/std": 572.8872680664062, + "epoch": 0.882525697503671, + "fcm_dpo/beta": 0.0009599350159987807, + "fcm_dpo/delta": -0.0015968242660164833, + "fcm_dpo/margin": 418.25567626953125, + "fcm_dpo/q_t": 0.40683579444885254, + "grad_norm": 36.36787796020508, + "learning_rate": 2.1301532877994742e-08, + "logits/chosen": -1.050248622894287, + "logits/rejected": -1.0717060565948486, + "logps/chosen": -826.8714599609375, + "logps/ref_chosen": -59.13361358642578, + "logps/ref_rejected": -94.69093322753906, + "logps/rejected": -1280.684326171875, + "loss": 1.0942, + "margin_dpo/margin_mean": 418.2556457519531, + "margin_dpo/margin_std": 568.2978515625, + "step": 601 + }, + { + "KL/chosen_KL_mean": -537.829345703125, + "KL/mean": -798.304931640625, + "KL/rejected_KL_mean": -1058.780517578125, + "KL/std": 537.489990234375, + "epoch": 0.8839941262848752, + "fcm_dpo/beta": 0.0009524415945634246, + "fcm_dpo/delta": -0.10132233053445816, + "fcm_dpo/margin": 520.951171875, + "fcm_dpo/q_t": 0.38715463876724243, + "grad_norm": 62.27396774291992, + "learning_rate": 2.0786184285784298e-08, + "logits/chosen": -1.0703651905059814, + "logits/rejected": -1.1087815761566162, + "logps/chosen": -586.4228515625, + "logps/ref_chosen": -48.59352111816406, + "logps/ref_rejected": -87.6685562133789, + "logps/rejected": -1146.4490966796875, + "loss": 1.0206, + "margin_dpo/margin_mean": 520.9512329101562, + "margin_dpo/margin_std": 575.708740234375, + "step": 602 + }, + { + "KL/chosen_KL_mean": -672.793701171875, + "KL/mean": -911.109619140625, + "KL/rejected_KL_mean": -1149.425537109375, + "KL/std": 605.7391967773438, + "epoch": 0.8854625550660793, + "fcm_dpo/beta": 0.0009342863922938704, + "fcm_dpo/delta": -0.04748653993010521, + "fcm_dpo/margin": 476.6319580078125, + "fcm_dpo/q_t": 0.4025436341762543, + "grad_norm": 34.53113555908203, + "learning_rate": 2.0276875690788204e-08, + "logits/chosen": -1.0783579349517822, + "logits/rejected": -1.0710588693618774, + "logps/chosen": -743.2083129882812, + "logps/ref_chosen": -70.41461944580078, + "logps/ref_rejected": -100.32559967041016, + "logps/rejected": -1249.751220703125, + "loss": 1.0872, + "margin_dpo/margin_mean": 476.6319580078125, + "margin_dpo/margin_std": 712.30078125, + "step": 603 + }, + { + "KL/chosen_KL_mean": -660.3494873046875, + "KL/mean": -920.5330810546875, + "KL/rejected_KL_mean": -1180.716552734375, + "KL/std": 584.669677734375, + "epoch": 0.8869309838472834, + "fcm_dpo/beta": 0.0009213722078129649, + "fcm_dpo/delta": -0.08345615863800049, + "fcm_dpo/margin": 520.3671264648438, + "fcm_dpo/q_t": 0.39393433928489685, + "grad_norm": 40.38688278198242, + "learning_rate": 1.977362051376158e-08, + "logits/chosen": -1.0343176126480103, + "logits/rejected": -1.0740426778793335, + "logps/chosen": -706.8075561523438, + "logps/ref_chosen": -46.45808029174805, + "logps/ref_rejected": -91.8544921875, + "logps/rejected": -1272.571044921875, + "loss": 1.0614, + "margin_dpo/margin_mean": 520.3671875, + "margin_dpo/margin_std": 716.9524536132812, + "step": 604 + }, + { + "KL/chosen_KL_mean": -681.8074951171875, + "KL/mean": -865.7674560546875, + "KL/rejected_KL_mean": -1049.7274169921875, + "KL/std": 528.2335205078125, + "epoch": 0.8883994126284875, + "fcm_dpo/beta": 0.0009236353216692805, + "fcm_dpo/delta": 0.06231696531176567, + "fcm_dpo/margin": 367.9199523925781, + "fcm_dpo/q_t": 0.4245484471321106, + "grad_norm": 31.172998428344727, + "learning_rate": 1.9276432015946446e-08, + "logits/chosen": -1.030253291130066, + "logits/rejected": -1.046684980392456, + "logps/chosen": -748.0568237304688, + "logps/ref_chosen": -66.24933624267578, + "logps/ref_rejected": -102.30496978759766, + "logps/rejected": -1152.032470703125, + "loss": 1.1514, + "margin_dpo/margin_mean": 367.919921875, + "margin_dpo/margin_std": 631.5323486328125, + "step": 605 + }, + { + "KL/chosen_KL_mean": -690.3057861328125, + "KL/mean": -892.7607421875, + "KL/rejected_KL_mean": -1095.2156982421875, + "KL/std": 546.4476928710938, + "epoch": 0.8898678414096917, + "fcm_dpo/beta": 0.0009358040988445282, + "fcm_dpo/delta": 0.021479565650224686, + "fcm_dpo/margin": 404.9098205566406, + "fcm_dpo/q_t": 0.41396206617355347, + "grad_norm": 34.67936706542969, + "learning_rate": 1.8785323298722093e-08, + "logits/chosen": -1.0415606498718262, + "logits/rejected": -1.0564000606536865, + "logps/chosen": -745.1249389648438, + "logps/ref_chosen": -54.819122314453125, + "logps/ref_rejected": -98.37146759033203, + "logps/rejected": -1193.587158203125, + "loss": 1.1119, + "margin_dpo/margin_mean": 404.90985107421875, + "margin_dpo/margin_std": 580.0758666992188, + "step": 606 + }, + { + "KL/chosen_KL_mean": -705.6632080078125, + "KL/mean": -875.1304931640625, + "KL/rejected_KL_mean": -1044.597900390625, + "KL/std": 545.179931640625, + "epoch": 0.8913362701908958, + "fcm_dpo/beta": 0.0009470410877838731, + "fcm_dpo/delta": 0.08123958110809326, + "fcm_dpo/margin": 338.9346923828125, + "fcm_dpo/q_t": 0.4270426332950592, + "grad_norm": 25.185142517089844, + "learning_rate": 1.8300307303259904e-08, + "logits/chosen": -1.0110514163970947, + "logits/rejected": -0.9996987581253052, + "logps/chosen": -763.7471923828125, + "logps/ref_chosen": -58.08403778076172, + "logps/ref_rejected": -79.777099609375, + "logps/rejected": -1124.375, + "loss": 1.1634, + "margin_dpo/margin_mean": 338.9346923828125, + "margin_dpo/margin_std": 585.0699462890625, + "step": 607 + }, + { + "KL/chosen_KL_mean": -620.40673828125, + "KL/mean": -821.8770751953125, + "KL/rejected_KL_mean": -1023.3474731445312, + "KL/std": 486.0280456542969, + "epoch": 0.8928046989720999, + "fcm_dpo/beta": 0.0009526251233182847, + "fcm_dpo/delta": 0.016666967421770096, + "fcm_dpo/margin": 402.94073486328125, + "fcm_dpo/q_t": 0.410768061876297, + "grad_norm": 29.697845458984375, + "learning_rate": 1.7821396810182437e-08, + "logits/chosen": -1.0471224784851074, + "logits/rejected": -1.0614254474639893, + "logps/chosen": -677.8575439453125, + "logps/ref_chosen": -57.450836181640625, + "logps/ref_rejected": -94.77339172363281, + "logps/rejected": -1118.120849609375, + "loss": 1.0917, + "margin_dpo/margin_mean": 402.94073486328125, + "margin_dpo/margin_std": 497.81024169921875, + "step": 608 + }, + { + "KL/chosen_KL_mean": -658.7738037109375, + "KL/mean": -919.4385986328125, + "KL/rejected_KL_mean": -1180.103271484375, + "KL/std": 681.7255249023438, + "epoch": 0.8942731277533039, + "fcm_dpo/beta": 0.000938827870413661, + "fcm_dpo/delta": -0.09398971498012543, + "fcm_dpo/margin": 521.3295288085938, + "fcm_dpo/q_t": 0.39671069383621216, + "grad_norm": 34.812618255615234, + "learning_rate": 1.7348604439226617e-08, + "logits/chosen": -1.0940017700195312, + "logits/rejected": -1.117903709411621, + "logps/chosen": -717.5792236328125, + "logps/ref_chosen": -58.805355072021484, + "logps/ref_rejected": -88.81600952148438, + "logps/rejected": -1268.91943359375, + "loss": 1.0694, + "margin_dpo/margin_mean": 521.32958984375, + "margin_dpo/margin_std": 807.185302734375, + "step": 609 + }, + { + "KL/chosen_KL_mean": -647.0574340820312, + "KL/mean": -813.5626220703125, + "KL/rejected_KL_mean": -980.0677490234375, + "KL/std": 518.0669555664062, + "epoch": 0.895741556534508, + "fcm_dpo/beta": 0.0009477235144004226, + "fcm_dpo/delta": 0.08690465986728668, + "fcm_dpo/margin": 333.0103759765625, + "fcm_dpo/q_t": 0.4269651770591736, + "grad_norm": 41.71244812011719, + "learning_rate": 1.6881942648911074e-08, + "logits/chosen": -0.9856526851654053, + "logits/rejected": -0.9613279104232788, + "logps/chosen": -712.75244140625, + "logps/ref_chosen": -65.69503784179688, + "logps/ref_rejected": -83.40538787841797, + "logps/rejected": -1063.47314453125, + "loss": 1.1704, + "margin_dpo/margin_mean": 333.0104064941406, + "margin_dpo/margin_std": 598.3924560546875, + "step": 610 + }, + { + "KL/chosen_KL_mean": -674.8112182617188, + "KL/mean": -951.8814697265625, + "KL/rejected_KL_mean": -1228.951904296875, + "KL/std": 677.4830932617188, + "epoch": 0.8972099853157122, + "fcm_dpo/beta": 0.000932648777961731, + "fcm_dpo/delta": -0.12321210652589798, + "fcm_dpo/margin": 554.140625, + "fcm_dpo/q_t": 0.3880399465560913, + "grad_norm": 28.344263076782227, + "learning_rate": 1.6421423736208e-08, + "logits/chosen": -1.058631181716919, + "logits/rejected": -1.1068617105484009, + "logps/chosen": -727.41064453125, + "logps/ref_chosen": -52.59946823120117, + "logps/ref_rejected": -86.33099365234375, + "logps/rejected": -1315.2828369140625, + "loss": 1.0449, + "margin_dpo/margin_mean": 554.140625, + "margin_dpo/margin_std": 762.7548828125, + "step": 611 + }, + { + "KL/chosen_KL_mean": -731.4866943359375, + "KL/mean": -947.14697265625, + "KL/rejected_KL_mean": -1162.8072509765625, + "KL/std": 552.690185546875, + "epoch": 0.8986784140969163, + "fcm_dpo/beta": 0.0009295167401432991, + "fcm_dpo/delta": -0.001130029559135437, + "fcm_dpo/margin": 431.320556640625, + "fcm_dpo/q_t": 0.40825164318084717, + "grad_norm": 29.17259979248047, + "learning_rate": 1.5967059836219042e-08, + "logits/chosen": -1.0582959651947021, + "logits/rejected": -1.0618293285369873, + "logps/chosen": -790.8104248046875, + "logps/ref_chosen": -59.32372283935547, + "logps/ref_rejected": -88.31239318847656, + "logps/rejected": -1251.11962890625, + "loss": 1.0948, + "margin_dpo/margin_mean": 431.320556640625, + "margin_dpo/margin_std": 588.9520263671875, + "step": 612 + }, + { + "KL/chosen_KL_mean": -643.850830078125, + "KL/mean": -893.5146484375, + "KL/rejected_KL_mean": -1143.178466796875, + "KL/std": 606.8695068359375, + "epoch": 0.9001468428781204, + "fcm_dpo/beta": 0.0009158622706308961, + "fcm_dpo/delta": -0.0603950060904026, + "fcm_dpo/margin": 499.3277282714844, + "fcm_dpo/q_t": 0.39547261595726013, + "grad_norm": 36.79396057128906, + "learning_rate": 1.551886292185553e-08, + "logits/chosen": -1.0371217727661133, + "logits/rejected": -1.0906472206115723, + "logps/chosen": -703.5807495117188, + "logps/ref_chosen": -59.72996520996094, + "logps/ref_rejected": -105.10752868652344, + "logps/rejected": -1248.2861328125, + "loss": 1.0542, + "margin_dpo/margin_mean": 499.3277282714844, + "margin_dpo/margin_std": 621.1013793945312, + "step": 613 + }, + { + "KL/chosen_KL_mean": -698.552978515625, + "KL/mean": -949.2009887695312, + "KL/rejected_KL_mean": -1199.84912109375, + "KL/std": 597.0087890625, + "epoch": 0.9016152716593245, + "fcm_dpo/beta": 0.0009089302038773894, + "fcm_dpo/delta": -0.058258313685655594, + "fcm_dpo/margin": 501.296142578125, + "fcm_dpo/q_t": 0.398156076669693, + "grad_norm": 42.726219177246094, + "learning_rate": 1.507684480352292e-08, + "logits/chosen": -1.0097222328186035, + "logits/rejected": -1.0844841003417969, + "logps/chosen": -751.491943359375, + "logps/ref_chosen": -52.93898010253906, + "logps/ref_rejected": -104.67938232421875, + "logps/rejected": -1304.5284423828125, + "loss": 1.0711, + "margin_dpo/margin_mean": 501.296142578125, + "margin_dpo/margin_std": 695.1583251953125, + "step": 614 + }, + { + "KL/chosen_KL_mean": -673.9354248046875, + "KL/mean": -883.3118896484375, + "KL/rejected_KL_mean": -1092.688232421875, + "KL/std": 623.587158203125, + "epoch": 0.9030837004405287, + "fcm_dpo/beta": 0.0009105931967496872, + "fcm_dpo/delta": 0.019142257049679756, + "fcm_dpo/margin": 418.7528076171875, + "fcm_dpo/q_t": 0.41320300102233887, + "grad_norm": 30.516206741333008, + "learning_rate": 1.4641017128809801e-08, + "logits/chosen": -1.0232172012329102, + "logits/rejected": -1.0445995330810547, + "logps/chosen": -739.7527465820312, + "logps/ref_chosen": -65.81727600097656, + "logps/ref_rejected": -95.17749786376953, + "logps/rejected": -1187.86572265625, + "loss": 1.1256, + "margin_dpo/margin_mean": 418.7528076171875, + "margin_dpo/margin_std": 670.531982421875, + "step": 615 + }, + { + "KL/chosen_KL_mean": -789.86279296875, + "KL/mean": -958.6298828125, + "KL/rejected_KL_mean": -1127.3968505859375, + "KL/std": 513.2537841796875, + "epoch": 0.9045521292217328, + "fcm_dpo/beta": 0.0009216421167366207, + "fcm_dpo/delta": 0.0916953831911087, + "fcm_dpo/margin": 337.53411865234375, + "fcm_dpo/q_t": 0.4289320111274719, + "grad_norm": 29.76529884338379, + "learning_rate": 1.4211391382180637e-08, + "logits/chosen": -1.0982820987701416, + "logits/rejected": -1.080725073814392, + "logps/chosen": -854.9956665039062, + "logps/ref_chosen": -65.13285827636719, + "logps/ref_rejected": -74.70050048828125, + "logps/rejected": -1202.097412109375, + "loss": 1.1665, + "margin_dpo/margin_mean": 337.53411865234375, + "margin_dpo/margin_std": 574.6236572265625, + "step": 616 + }, + { + "KL/chosen_KL_mean": -749.3828125, + "KL/mean": -869.354248046875, + "KL/rejected_KL_mean": -989.32568359375, + "KL/std": 496.443359375, + "epoch": 0.9060205580029369, + "fcm_dpo/beta": 0.0009326934814453125, + "fcm_dpo/delta": 0.079354427754879, + "fcm_dpo/margin": 239.94287109375, + "fcm_dpo/q_t": 0.4489472508430481, + "grad_norm": 61.406524658203125, + "learning_rate": 1.378797888467345e-08, + "logits/chosen": -1.001933217048645, + "logits/rejected": -0.9698858261108398, + "logps/chosen": -812.3883056640625, + "logps/ref_chosen": -63.005550384521484, + "logps/ref_rejected": -64.234130859375, + "logps/rejected": -1053.559814453125, + "loss": 1.2466, + "margin_dpo/margin_mean": 239.94284057617188, + "margin_dpo/margin_std": 587.9033813476562, + "step": 617 + }, + { + "KL/chosen_KL_mean": -782.6476440429688, + "KL/mean": -1043.6942138671875, + "KL/rejected_KL_mean": -1304.74072265625, + "KL/std": 657.9475708007812, + "epoch": 0.9074889867841409, + "fcm_dpo/beta": 0.0009334392379969358, + "fcm_dpo/delta": -0.0920899510383606, + "fcm_dpo/margin": 522.0931396484375, + "fcm_dpo/q_t": 0.39517101645469666, + "grad_norm": 39.66215896606445, + "learning_rate": 1.3370790793601371e-08, + "logits/chosen": -1.0424597263336182, + "logits/rejected": -1.0812008380889893, + "logps/chosen": -849.7490234375, + "logps/ref_chosen": -67.10134887695312, + "logps/ref_rejected": -92.15340423583984, + "logps/rejected": -1396.8941650390625, + "loss": 1.0962, + "margin_dpo/margin_mean": 522.0931396484375, + "margin_dpo/margin_std": 848.4417114257812, + "step": 618 + }, + { + "KL/chosen_KL_mean": -782.80810546875, + "KL/mean": -982.2987060546875, + "KL/rejected_KL_mean": -1181.789306640625, + "KL/std": 620.9131469726562, + "epoch": 0.908957415565345, + "fcm_dpo/beta": 0.000927778659388423, + "fcm_dpo/delta": 0.030953753739595413, + "fcm_dpo/margin": 398.981201171875, + "fcm_dpo/q_t": 0.42306482791900635, + "grad_norm": 59.24640655517578, + "learning_rate": 1.2959838102258535e-08, + "logits/chosen": -1.0202059745788574, + "logits/rejected": -1.0303071737289429, + "logps/chosen": -838.7863159179688, + "logps/ref_chosen": -55.978233337402344, + "logps/ref_rejected": -93.1854019165039, + "logps/rejected": -1274.974609375, + "loss": 1.1805, + "margin_dpo/margin_mean": 398.981201171875, + "margin_dpo/margin_std": 824.7608642578125, + "step": 619 + }, + { + "KL/chosen_KL_mean": -691.927490234375, + "KL/mean": -884.0120239257812, + "KL/rejected_KL_mean": -1076.0965576171875, + "KL/std": 550.659912109375, + "epoch": 0.9104258443465492, + "fcm_dpo/beta": 0.0009312491165474057, + "fcm_dpo/delta": 0.04368671402335167, + "fcm_dpo/margin": 384.1690673828125, + "fcm_dpo/q_t": 0.4192659258842468, + "grad_norm": 35.7849235534668, + "learning_rate": 1.2555131639630567e-08, + "logits/chosen": -1.0948054790496826, + "logits/rejected": -1.1047601699829102, + "logps/chosen": -751.7249755859375, + "logps/ref_chosen": -59.79750061035156, + "logps/ref_rejected": -78.41075134277344, + "logps/rejected": -1154.50732421875, + "loss": 1.1388, + "margin_dpo/margin_mean": 384.1690979003906, + "margin_dpo/margin_std": 621.861572265625, + "step": 620 + }, + { + "KL/chosen_KL_mean": -702.0250244140625, + "KL/mean": -988.3785400390625, + "KL/rejected_KL_mean": -1274.73193359375, + "KL/std": 669.7676391601562, + "epoch": 0.9118942731277533, + "fcm_dpo/beta": 0.0009251298615708947, + "fcm_dpo/delta": -0.1372598260641098, + "fcm_dpo/margin": 572.7069091796875, + "fcm_dpo/q_t": 0.3810199499130249, + "grad_norm": 43.405094146728516, + "learning_rate": 1.2156682070109086e-08, + "logits/chosen": -1.1240839958190918, + "logits/rejected": -1.1762495040893555, + "logps/chosen": -755.9588012695312, + "logps/ref_chosen": -53.93375778198242, + "logps/ref_rejected": -88.36951446533203, + "logps/rejected": -1363.1015625, + "loss": 1.0337, + "margin_dpo/margin_mean": 572.7069091796875, + "margin_dpo/margin_std": 737.669189453125, + "step": 621 + }, + { + "KL/chosen_KL_mean": -679.5125732421875, + "KL/mean": -879.1805419921875, + "KL/rejected_KL_mean": -1078.8485107421875, + "KL/std": 515.3754272460938, + "epoch": 0.9133627019089574, + "fcm_dpo/beta": 0.0009116814471781254, + "fcm_dpo/delta": 0.03691772744059563, + "fcm_dpo/margin": 399.3359680175781, + "fcm_dpo/q_t": 0.4187527894973755, + "grad_norm": 34.01809310913086, + "learning_rate": 1.1764499893210878e-08, + "logits/chosen": -0.9621305465698242, + "logits/rejected": -0.9480363726615906, + "logps/chosen": -739.79833984375, + "logps/ref_chosen": -60.28582000732422, + "logps/ref_rejected": -85.51873779296875, + "logps/rejected": -1164.3673095703125, + "loss": 1.1357, + "margin_dpo/margin_mean": 399.3359680175781, + "margin_dpo/margin_std": 653.330322265625, + "step": 622 + }, + { + "KL/chosen_KL_mean": -744.8868408203125, + "KL/mean": -900.721435546875, + "KL/rejected_KL_mean": -1056.55615234375, + "KL/std": 522.1895141601562, + "epoch": 0.9148311306901615, + "fcm_dpo/beta": 0.0009337057126685977, + "fcm_dpo/delta": 0.11226323246955872, + "fcm_dpo/margin": 311.66925048828125, + "fcm_dpo/q_t": 0.435872882604599, + "grad_norm": 34.93113327026367, + "learning_rate": 1.1378595443300998e-08, + "logits/chosen": -1.1241803169250488, + "logits/rejected": -1.125817060470581, + "logps/chosen": -809.0438232421875, + "logps/ref_chosen": -64.1569595336914, + "logps/ref_rejected": -85.08304595947266, + "logps/rejected": -1141.63916015625, + "loss": 1.1971, + "margin_dpo/margin_mean": 311.66925048828125, + "margin_dpo/margin_std": 637.570556640625, + "step": 623 + }, + { + "KL/chosen_KL_mean": -713.7887573242188, + "KL/mean": -955.57958984375, + "KL/rejected_KL_mean": -1197.370361328125, + "KL/std": 544.0978393554688, + "epoch": 0.9162995594713657, + "fcm_dpo/beta": 0.0009321460966020823, + "fcm_dpo/delta": -0.05314317345619202, + "fcm_dpo/margin": 483.5816650390625, + "fcm_dpo/q_t": 0.3945468068122864, + "grad_norm": 43.926517486572266, + "learning_rate": 1.0998978889320582e-08, + "logits/chosen": -1.0758100748062134, + "logits/rejected": -1.0797600746154785, + "logps/chosen": -785.7073974609375, + "logps/ref_chosen": -71.91862487792969, + "logps/ref_rejected": -97.13203430175781, + "logps/rejected": -1294.50244140625, + "loss": 1.0544, + "margin_dpo/margin_mean": 483.5816345214844, + "margin_dpo/margin_std": 579.5042114257812, + "step": 624 + }, + { + "KL/chosen_KL_mean": -692.0985107421875, + "KL/mean": -961.903076171875, + "KL/rejected_KL_mean": -1231.707763671875, + "KL/std": 610.5233764648438, + "epoch": 0.9177679882525698, + "fcm_dpo/beta": 0.0009185270173475146, + "fcm_dpo/delta": -0.10053034871816635, + "fcm_dpo/margin": 539.6091918945312, + "fcm_dpo/q_t": 0.386716365814209, + "grad_norm": 63.644493103027344, + "learning_rate": 1.0625660234518913e-08, + "logits/chosen": -1.0029915571212769, + "logits/rejected": -1.0321152210235596, + "logps/chosen": -750.4405517578125, + "logps/ref_chosen": -58.342071533203125, + "logps/ref_rejected": -86.09038543701172, + "logps/rejected": -1317.798095703125, + "loss": 1.0132, + "margin_dpo/margin_mean": 539.6091918945312, + "margin_dpo/margin_std": 572.4862670898438, + "step": 625 + }, + { + "KL/chosen_KL_mean": -853.5245971679688, + "KL/mean": -1002.6884765625, + "KL/rejected_KL_mean": -1151.852294921875, + "KL/std": 662.7627563476562, + "epoch": 0.9192364170337739, + "fcm_dpo/beta": 0.0009280656231567264, + "fcm_dpo/delta": 0.1266339272260666, + "fcm_dpo/margin": 298.32769775390625, + "fcm_dpo/q_t": 0.4352928698062897, + "grad_norm": 35.488224029541016, + "learning_rate": 1.0258649316189721e-08, + "logits/chosen": -0.9830967783927917, + "logits/rejected": -0.974500298500061, + "logps/chosen": -928.63720703125, + "logps/ref_chosen": -75.11260986328125, + "logps/ref_rejected": -99.188720703125, + "logps/rejected": -1251.041015625, + "loss": 1.2186, + "margin_dpo/margin_mean": 298.3277282714844, + "margin_dpo/margin_std": 647.6847534179688, + "step": 626 + }, + { + "KL/chosen_KL_mean": -586.8433227539062, + "KL/mean": -882.2752685546875, + "KL/rejected_KL_mean": -1177.707275390625, + "KL/std": 712.061767578125, + "epoch": 0.920704845814978, + "fcm_dpo/beta": 0.0009197980398312211, + "fcm_dpo/delta": -0.15168313682079315, + "fcm_dpo/margin": 590.864013671875, + "fcm_dpo/q_t": 0.3880508542060852, + "grad_norm": 31.904329299926758, + "learning_rate": 9.897955805412e-09, + "logits/chosen": -0.9286566972732544, + "logits/rejected": -1.0041477680206299, + "logps/chosen": -634.5864868164062, + "logps/ref_chosen": -47.74314880371094, + "logps/ref_rejected": -106.75448608398438, + "logps/rejected": -1284.4617919921875, + "loss": 1.0393, + "margin_dpo/margin_mean": 590.864013671875, + "margin_dpo/margin_std": 831.529296875, + "step": 627 + }, + { + "KL/chosen_KL_mean": -786.1205444335938, + "KL/mean": -1010.9585571289062, + "KL/rejected_KL_mean": -1235.796630859375, + "KL/std": 596.643798828125, + "epoch": 0.922173274596182, + "fcm_dpo/beta": 0.0009038818534463644, + "fcm_dpo/delta": -0.006778441369533539, + "fcm_dpo/margin": 449.6760559082031, + "fcm_dpo/q_t": 0.4091545343399048, + "grad_norm": 33.698768615722656, + "learning_rate": 9.543589206795238e-09, + "logits/chosen": -1.0994905233383179, + "logits/rejected": -1.1181318759918213, + "logps/chosen": -846.303466796875, + "logps/ref_chosen": -60.182945251464844, + "logps/ref_rejected": -101.55467224121094, + "logps/rejected": -1337.351318359375, + "loss": 1.1095, + "margin_dpo/margin_mean": 449.6760559082031, + "margin_dpo/margin_std": 686.2415161132812, + "step": 628 + }, + { + "KL/chosen_KL_mean": -741.7413330078125, + "KL/mean": -951.3485107421875, + "KL/rejected_KL_mean": -1160.955810546875, + "KL/std": 565.2870483398438, + "epoch": 0.9236417033773862, + "fcm_dpo/beta": 0.0009075739653781056, + "fcm_dpo/delta": 0.020304802805185318, + "fcm_dpo/margin": 419.21453857421875, + "fcm_dpo/q_t": 0.4112434983253479, + "grad_norm": 35.47370910644531, + "learning_rate": 9.19555885822887e-09, + "logits/chosen": -1.0898232460021973, + "logits/rejected": -1.107914686203003, + "logps/chosen": -805.954833984375, + "logps/ref_chosen": -64.21354675292969, + "logps/ref_rejected": -91.65367126464844, + "logps/rejected": -1252.609375, + "loss": 1.1025, + "margin_dpo/margin_mean": 419.21453857421875, + "margin_dpo/margin_std": 558.5067138671875, + "step": 629 + }, + { + "KL/chosen_KL_mean": -688.7796020507812, + "KL/mean": -806.9313354492188, + "KL/rejected_KL_mean": -925.0830078125, + "KL/std": 583.607421875, + "epoch": 0.9251101321585903, + "fcm_dpo/beta": 0.0009130248799920082, + "fcm_dpo/delta": 0.04915432631969452, + "fcm_dpo/margin": 236.30340576171875, + "fcm_dpo/q_t": 0.45570600032806396, + "grad_norm": 55.93423080444336, + "learning_rate": 8.85387393063622e-09, + "logits/chosen": -1.0273975133895874, + "logits/rejected": -1.0003504753112793, + "logps/chosen": -748.0706176757812, + "logps/ref_chosen": -59.29100036621094, + "logps/ref_rejected": -83.59829711914062, + "logps/rejected": -1008.6813354492188, + "loss": 1.2755, + "margin_dpo/margin_mean": 236.30337524414062, + "margin_dpo/margin_std": 700.1854248046875, + "step": 630 + }, + { + "KL/chosen_KL_mean": -818.428955078125, + "KL/mean": -1009.658935546875, + "KL/rejected_KL_mean": -1200.888916015625, + "KL/std": 587.8419189453125, + "epoch": 0.9265785609397944, + "fcm_dpo/beta": 0.0009250535513274372, + "fcm_dpo/delta": 0.04782557487487793, + "fcm_dpo/margin": 382.4600830078125, + "fcm_dpo/q_t": 0.41905054450035095, + "grad_norm": 36.24885559082031, + "learning_rate": 8.518543427732949e-09, + "logits/chosen": -1.1538431644439697, + "logits/rejected": -1.1630300283432007, + "logps/chosen": -877.882568359375, + "logps/ref_chosen": -59.45360565185547, + "logps/ref_rejected": -80.95156860351562, + "logps/rejected": -1281.840576171875, + "loss": 1.1544, + "margin_dpo/margin_mean": 382.4600830078125, + "margin_dpo/margin_std": 670.58447265625, + "step": 631 + }, + { + "KL/chosen_KL_mean": -706.385986328125, + "KL/mean": -901.2720947265625, + "KL/rejected_KL_mean": -1096.158203125, + "KL/std": 529.7587280273438, + "epoch": 0.9280469897209985, + "fcm_dpo/beta": 0.0009276444325223565, + "fcm_dpo/delta": 0.039635516703128815, + "fcm_dpo/margin": 389.7721252441406, + "fcm_dpo/q_t": 0.4175952672958374, + "grad_norm": 47.56220245361328, + "learning_rate": 8.189576185789637e-09, + "logits/chosen": -1.1132001876831055, + "logits/rejected": -1.1175191402435303, + "logps/chosen": -767.737548828125, + "logps/ref_chosen": -61.35155487060547, + "logps/ref_rejected": -86.16017150878906, + "logps/rejected": -1182.318359375, + "loss": 1.1434, + "margin_dpo/margin_mean": 389.7721252441406, + "margin_dpo/margin_std": 647.5654296875, + "step": 632 + }, + { + "KL/chosen_KL_mean": -791.9481201171875, + "KL/mean": -929.552978515625, + "KL/rejected_KL_mean": -1067.157958984375, + "KL/std": 522.26171875, + "epoch": 0.9295154185022027, + "fcm_dpo/beta": 0.0009377728565596044, + "fcm_dpo/delta": 0.043396495282649994, + "fcm_dpo/margin": 275.209716796875, + "fcm_dpo/q_t": 0.4401985704898834, + "grad_norm": 48.036155700683594, + "learning_rate": 7.866980873399015e-09, + "logits/chosen": -1.1201856136322021, + "logits/rejected": -1.129399061203003, + "logps/chosen": -849.226318359375, + "logps/ref_chosen": -57.27816390991211, + "logps/ref_rejected": -91.58395385742188, + "logps/rejected": -1158.741943359375, + "loss": 1.2186, + "margin_dpo/margin_mean": 275.209716796875, + "margin_dpo/margin_std": 591.9662475585938, + "step": 633 + }, + { + "KL/chosen_KL_mean": -896.2001342773438, + "KL/mean": -1023.7356567382812, + "KL/rejected_KL_mean": -1151.271240234375, + "KL/std": 637.21044921875, + "epoch": 0.9309838472834068, + "fcm_dpo/beta": 0.0009550247923471034, + "fcm_dpo/delta": 0.06949655711650848, + "fcm_dpo/margin": 255.07102966308594, + "fcm_dpo/q_t": 0.44587743282318115, + "grad_norm": 40.304161071777344, + "learning_rate": 7.550765991247654e-09, + "logits/chosen": -1.0025546550750732, + "logits/rejected": -0.9946834444999695, + "logps/chosen": -962.819091796875, + "logps/ref_chosen": -66.61896514892578, + "logps/ref_rejected": -107.12564849853516, + "logps/rejected": -1258.3968505859375, + "loss": 1.2358, + "margin_dpo/margin_mean": 255.071044921875, + "margin_dpo/margin_std": 605.7219848632812, + "step": 634 + }, + { + "KL/chosen_KL_mean": -769.7693481445312, + "KL/mean": -949.5761108398438, + "KL/rejected_KL_mean": -1129.3828125, + "KL/std": 656.5932006835938, + "epoch": 0.9324522760646109, + "fcm_dpo/beta": 0.0009627408580854535, + "fcm_dpo/delta": 0.0557277575135231, + "fcm_dpo/margin": 359.6134948730469, + "fcm_dpo/q_t": 0.42400288581848145, + "grad_norm": 48.429481506347656, + "learning_rate": 7.240939871891699e-09, + "logits/chosen": -1.0978808403015137, + "logits/rejected": -1.0793735980987549, + "logps/chosen": -843.724853515625, + "logps/ref_chosen": -73.95551300048828, + "logps/ref_rejected": -82.50045776367188, + "logps/rejected": -1211.88330078125, + "loss": 1.1613, + "margin_dpo/margin_mean": 359.61346435546875, + "margin_dpo/margin_std": 665.019287109375, + "step": 635 + }, + { + "KL/chosen_KL_mean": -728.061279296875, + "KL/mean": -942.2210693359375, + "KL/rejected_KL_mean": -1156.380859375, + "KL/std": 644.4195556640625, + "epoch": 0.933920704845815, + "fcm_dpo/beta": 0.0009705645497888327, + "fcm_dpo/delta": -0.017428025603294373, + "fcm_dpo/margin": 428.31964111328125, + "fcm_dpo/q_t": 0.4088486135005951, + "grad_norm": 26.07723617553711, + "learning_rate": 6.937510679537628e-09, + "logits/chosen": -1.0272531509399414, + "logits/rejected": -1.0296359062194824, + "logps/chosen": -787.690185546875, + "logps/ref_chosen": -59.628910064697266, + "logps/ref_rejected": -81.97883605957031, + "logps/rejected": -1238.3597412109375, + "loss": 1.1025, + "margin_dpo/margin_mean": 428.3197021484375, + "margin_dpo/margin_std": 656.77587890625, + "step": 636 + }, + { + "KL/chosen_KL_mean": -722.7738647460938, + "KL/mean": -958.4360961914062, + "KL/rejected_KL_mean": -1194.098388671875, + "KL/std": 627.090576171875, + "epoch": 0.9353891336270191, + "fcm_dpo/beta": 0.0009508421644568443, + "fcm_dpo/delta": -0.051047492772340775, + "fcm_dpo/margin": 471.324462890625, + "fcm_dpo/q_t": 0.400098979473114, + "grad_norm": 28.57129669189453, + "learning_rate": 6.640486409826785e-09, + "logits/chosen": -1.144999384880066, + "logits/rejected": -1.1962953805923462, + "logps/chosen": -772.426513671875, + "logps/ref_chosen": -49.652687072753906, + "logps/ref_rejected": -98.40513610839844, + "logps/rejected": -1292.50341796875, + "loss": 1.0731, + "margin_dpo/margin_mean": 471.32452392578125, + "margin_dpo/margin_std": 648.1543579101562, + "step": 637 + }, + { + "KL/chosen_KL_mean": -715.32373046875, + "KL/mean": -905.3469848632812, + "KL/rejected_KL_mean": -1095.3702392578125, + "KL/std": 610.3677368164062, + "epoch": 0.9368575624082232, + "fcm_dpo/beta": 0.0009441774454899132, + "fcm_dpo/delta": -0.08913271129131317, + "fcm_dpo/margin": 380.04638671875, + "fcm_dpo/q_t": 0.41255509853363037, + "grad_norm": 32.758209228515625, + "learning_rate": 6.349874889624962e-09, + "logits/chosen": -0.9814478158950806, + "logits/rejected": -0.9641016721725464, + "logps/chosen": -773.4804077148438, + "logps/ref_chosen": -58.156639099121094, + "logps/ref_rejected": -79.3014907836914, + "logps/rejected": -1174.671630859375, + "loss": 1.1707, + "margin_dpo/margin_mean": 380.0464172363281, + "margin_dpo/margin_std": 710.3338623046875, + "step": 638 + }, + { + "KL/chosen_KL_mean": -967.8865966796875, + "KL/mean": -1046.7158203125, + "KL/rejected_KL_mean": -1125.5450439453125, + "KL/std": 574.20703125, + "epoch": 0.9383259911894273, + "fcm_dpo/beta": 0.0009357619564980268, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 157.65838623046875, + "fcm_dpo/q_t": 0.4668254852294922, + "grad_norm": 124.77977752685547, + "learning_rate": 6.065683776815933e-09, + "logits/chosen": -1.0108537673950195, + "logits/rejected": -0.9488674402236938, + "logps/chosen": -1040.209716796875, + "logps/ref_chosen": -72.32319641113281, + "logps/ref_rejected": -74.2749252319336, + "logps/rejected": -1199.8199462890625, + "loss": 1.3504, + "margin_dpo/margin_mean": 157.65838623046875, + "margin_dpo/margin_std": 713.7766723632812, + "step": 639 + }, + { + "KL/chosen_KL_mean": -752.870361328125, + "KL/mean": -1040.066650390625, + "KL/rejected_KL_mean": -1327.262939453125, + "KL/std": 677.1709594726562, + "epoch": 0.9397944199706314, + "fcm_dpo/beta": 0.0009211286087520421, + "fcm_dpo/delta": -0.13610000908374786, + "fcm_dpo/margin": 574.392578125, + "fcm_dpo/q_t": 0.3863770365715027, + "grad_norm": 41.8037223815918, + "learning_rate": 5.7879205600998296e-09, + "logits/chosen": -0.9777020215988159, + "logits/rejected": -1.0074682235717773, + "logps/chosen": -809.0047607421875, + "logps/ref_chosen": -56.13436508178711, + "logps/ref_rejected": -108.60014343261719, + "logps/rejected": -1435.863037109375, + "loss": 1.0416, + "margin_dpo/margin_mean": 574.392578125, + "margin_dpo/margin_std": 796.70458984375, + "step": 640 + }, + { + "KL/chosen_KL_mean": -851.2357177734375, + "KL/mean": -1040.4761962890625, + "KL/rejected_KL_mean": -1229.7166748046875, + "KL/std": 575.572509765625, + "epoch": 0.9412628487518355, + "fcm_dpo/beta": 0.0009160140762105584, + "fcm_dpo/delta": 0.0552375465631485, + "fcm_dpo/margin": 378.48095703125, + "fcm_dpo/q_t": 0.4241793751716614, + "grad_norm": 35.59013366699219, + "learning_rate": 5.516592558795746e-09, + "logits/chosen": -1.040936827659607, + "logits/rejected": -1.0504437685012817, + "logps/chosen": -916.2326049804688, + "logps/ref_chosen": -64.99689483642578, + "logps/ref_rejected": -86.99232482910156, + "logps/rejected": -1316.708984375, + "loss": 1.1765, + "margin_dpo/margin_mean": 378.48095703125, + "margin_dpo/margin_std": 753.5254516601562, + "step": 641 + }, + { + "KL/chosen_KL_mean": -817.596435546875, + "KL/mean": -1044.927001953125, + "KL/rejected_KL_mean": -1272.257568359375, + "KL/std": 752.8553466796875, + "epoch": 0.9427312775330396, + "fcm_dpo/beta": 0.0009222212247550488, + "fcm_dpo/delta": -0.02048617973923683, + "fcm_dpo/margin": 454.66107177734375, + "fcm_dpo/q_t": 0.4156268537044525, + "grad_norm": 43.08086013793945, + "learning_rate": 5.251706922648868e-09, + "logits/chosen": -0.9874919652938843, + "logits/rejected": -1.022093653678894, + "logps/chosen": -883.28564453125, + "logps/ref_chosen": -65.68924713134766, + "logps/ref_rejected": -110.24205017089844, + "logps/rejected": -1382.4996337890625, + "loss": 1.1533, + "margin_dpo/margin_mean": 454.6611328125, + "margin_dpo/margin_std": 916.9835205078125, + "step": 642 + }, + { + "KL/chosen_KL_mean": -737.8060302734375, + "KL/mean": -912.438720703125, + "KL/rejected_KL_mean": -1087.071533203125, + "KL/std": 547.3657836914062, + "epoch": 0.9441997063142438, + "fcm_dpo/beta": 0.0009113398264162242, + "fcm_dpo/delta": -0.030877836048603058, + "fcm_dpo/margin": 349.2655334472656, + "fcm_dpo/q_t": 0.42619654536247253, + "grad_norm": 44.11703872680664, + "learning_rate": 4.993270631642038e-09, + "logits/chosen": -1.1141959428787231, + "logits/rejected": -1.1129988431930542, + "logps/chosen": -789.7559814453125, + "logps/ref_chosen": -51.94999694824219, + "logps/ref_rejected": -87.46833801269531, + "logps/rejected": -1174.539794921875, + "loss": 1.1568, + "margin_dpo/margin_mean": 349.2655029296875, + "margin_dpo/margin_std": 548.5968017578125, + "step": 643 + }, + { + "KL/chosen_KL_mean": -727.4107666015625, + "KL/mean": -910.2191162109375, + "KL/rejected_KL_mean": -1093.02734375, + "KL/std": 654.0157470703125, + "epoch": 0.9456681350954479, + "fcm_dpo/beta": 0.0009200773201882839, + "fcm_dpo/delta": 0.06583556532859802, + "fcm_dpo/margin": 365.61663818359375, + "fcm_dpo/q_t": 0.42611658573150635, + "grad_norm": 56.25822067260742, + "learning_rate": 4.741290495811873e-09, + "logits/chosen": -1.02555513381958, + "logits/rejected": -1.0345053672790527, + "logps/chosen": -786.428466796875, + "logps/ref_chosen": -59.017662048339844, + "logps/ref_rejected": -87.13668823242188, + "logps/rejected": -1180.1640625, + "loss": 1.1889, + "margin_dpo/margin_mean": 365.61663818359375, + "margin_dpo/margin_std": 751.4439697265625, + "step": 644 + }, + { + "KL/chosen_KL_mean": -752.749267578125, + "KL/mean": -840.9324951171875, + "KL/rejected_KL_mean": -929.1156005859375, + "KL/std": 507.17401123046875, + "epoch": 0.947136563876652, + "fcm_dpo/beta": 0.0009371960768476129, + "fcm_dpo/delta": 0.0740480124950409, + "fcm_dpo/margin": 176.36630249023438, + "fcm_dpo/q_t": 0.4641192555427551, + "grad_norm": 108.85242462158203, + "learning_rate": 4.495773155069299e-09, + "logits/chosen": -0.961013913154602, + "logits/rejected": -0.9447523355484009, + "logps/chosen": -808.6253051757812, + "logps/ref_chosen": -55.87602233886719, + "logps/ref_rejected": -97.78080749511719, + "logps/rejected": -1026.896484375, + "loss": 1.3301, + "margin_dpo/margin_mean": 176.3662872314453, + "margin_dpo/margin_std": 675.3572998046875, + "step": 645 + }, + { + "KL/chosen_KL_mean": -723.980712890625, + "KL/mean": -878.9730224609375, + "KL/rejected_KL_mean": -1033.9652099609375, + "KL/std": 492.32672119140625, + "epoch": 0.9486049926578561, + "fcm_dpo/beta": 0.0009495633421465755, + "fcm_dpo/delta": 0.10899513214826584, + "fcm_dpo/margin": 309.98443603515625, + "fcm_dpo/q_t": 0.4337136745452881, + "grad_norm": 58.64928436279297, + "learning_rate": 4.256725079024553e-09, + "logits/chosen": -1.0737169981002808, + "logits/rejected": -1.0561877489089966, + "logps/chosen": -785.2565307617188, + "logps/ref_chosen": -61.275787353515625, + "logps/ref_rejected": -77.50580596923828, + "logps/rejected": -1111.470947265625, + "loss": 1.1901, + "margin_dpo/margin_mean": 309.9844665527344, + "margin_dpo/margin_std": 590.6322021484375, + "step": 646 + }, + { + "KL/chosen_KL_mean": -644.8477172851562, + "KL/mean": -838.4265747070312, + "KL/rejected_KL_mean": -1032.00537109375, + "KL/std": 562.4344482421875, + "epoch": 0.9500734214390602, + "fcm_dpo/beta": 0.0009617937030270696, + "fcm_dpo/delta": 0.028707262128591537, + "fcm_dpo/margin": 387.1577453613281, + "fcm_dpo/q_t": 0.41403427720069885, + "grad_norm": 30.214832305908203, + "learning_rate": 4.024152566816791e-09, + "logits/chosen": -0.966199517250061, + "logits/rejected": -0.9970808029174805, + "logps/chosen": -699.7001342773438, + "logps/ref_chosen": -54.8524169921875, + "logps/ref_rejected": -93.5194091796875, + "logps/rejected": -1125.52490234375, + "loss": 1.1156, + "margin_dpo/margin_mean": 387.1577453613281, + "margin_dpo/margin_std": 551.2623291015625, + "step": 647 + }, + { + "KL/chosen_KL_mean": -684.664306640625, + "KL/mean": -976.056884765625, + "KL/rejected_KL_mean": -1267.4493408203125, + "KL/std": 685.91015625, + "epoch": 0.9515418502202643, + "fcm_dpo/beta": 0.0009418315021321177, + "fcm_dpo/delta": -0.15742585062980652, + "fcm_dpo/margin": 582.7850952148438, + "fcm_dpo/q_t": 0.38359707593917847, + "grad_norm": 31.0360164642334, + "learning_rate": 3.798061746947995e-09, + "logits/chosen": -1.1210038661956787, + "logits/rejected": -1.1833868026733398, + "logps/chosen": -738.8358154296875, + "logps/ref_chosen": -54.17146682739258, + "logps/ref_rejected": -98.7127914428711, + "logps/rejected": -1366.162109375, + "loss": 1.0315, + "margin_dpo/margin_mean": 582.7850952148438, + "margin_dpo/margin_std": 816.8055419921875, + "step": 648 + }, + { + "KL/chosen_KL_mean": -711.638916015625, + "KL/mean": -853.6250610351562, + "KL/rejected_KL_mean": -995.6112060546875, + "KL/std": 527.212890625, + "epoch": 0.9530102790014684, + "fcm_dpo/beta": 0.0009543564519844949, + "fcm_dpo/delta": 0.13216045498847961, + "fcm_dpo/margin": 283.9723205566406, + "fcm_dpo/q_t": 0.4425292909145355, + "grad_norm": 28.521751403808594, + "learning_rate": 3.5784585771215235e-09, + "logits/chosen": -1.1217185258865356, + "logits/rejected": -1.1153336763381958, + "logps/chosen": -774.1192626953125, + "logps/ref_chosen": -62.480350494384766, + "logps/ref_rejected": -80.07717895507812, + "logps/rejected": -1075.6884765625, + "loss": 1.2295, + "margin_dpo/margin_mean": 283.9723205566406, + "margin_dpo/margin_std": 654.6543579101562, + "step": 649 + }, + { + "KL/chosen_KL_mean": -754.6192016601562, + "KL/mean": -973.61767578125, + "KL/rejected_KL_mean": -1192.6162109375, + "KL/std": 647.8673095703125, + "epoch": 0.9544787077826725, + "fcm_dpo/beta": 0.000953975017182529, + "fcm_dpo/delta": -0.018730733543634415, + "fcm_dpo/margin": 437.99713134765625, + "fcm_dpo/q_t": 0.4086850881576538, + "grad_norm": 33.09385681152344, + "learning_rate": 3.3653488440851253e-09, + "logits/chosen": -1.028601884841919, + "logits/rejected": -1.0516587495803833, + "logps/chosen": -810.7120361328125, + "logps/ref_chosen": -56.09281921386719, + "logps/ref_rejected": -98.26483917236328, + "logps/rejected": -1290.881103515625, + "loss": 1.1243, + "margin_dpo/margin_mean": 437.99713134765625, + "margin_dpo/margin_std": 743.27490234375, + "step": 650 + }, + { + "KL/chosen_KL_mean": -512.770263671875, + "KL/mean": -790.38525390625, + "KL/rejected_KL_mean": -1068.000244140625, + "KL/std": 609.9718017578125, + "epoch": 0.9559471365638766, + "fcm_dpo/beta": 0.0009362648124806583, + "fcm_dpo/delta": -0.12655048072338104, + "fcm_dpo/margin": 555.2301025390625, + "fcm_dpo/q_t": 0.38282567262649536, + "grad_norm": 40.27021408081055, + "learning_rate": 3.158738163478475e-09, + "logits/chosen": -1.0640699863433838, + "logits/rejected": -1.1213992834091187, + "logps/chosen": -556.1956787109375, + "logps/ref_chosen": -43.42544937133789, + "logps/ref_rejected": -99.95791625976562, + "logps/rejected": -1167.958251953125, + "loss": 1.0095, + "margin_dpo/margin_mean": 555.2301025390625, + "margin_dpo/margin_std": 632.1912841796875, + "step": 651 + }, + { + "KL/chosen_KL_mean": -651.0936279296875, + "KL/mean": -865.922119140625, + "KL/rejected_KL_mean": -1080.7506103515625, + "KL/std": 616.6910400390625, + "epoch": 0.9574155653450808, + "fcm_dpo/beta": 0.000932047376409173, + "fcm_dpo/delta": -0.0005056131631135941, + "fcm_dpo/margin": 429.6570129394531, + "fcm_dpo/q_t": 0.411517471075058, + "grad_norm": 32.868282318115234, + "learning_rate": 2.9586319796851555e-09, + "logits/chosen": -1.0609800815582275, + "logits/rejected": -1.087823748588562, + "logps/chosen": -713.67041015625, + "logps/ref_chosen": -62.57680892944336, + "logps/ref_rejected": -111.76779174804688, + "logps/rejected": -1192.5184326171875, + "loss": 1.1248, + "margin_dpo/margin_mean": 429.6570129394531, + "margin_dpo/margin_std": 717.76123046875, + "step": 652 + }, + { + "KL/chosen_KL_mean": -803.664794921875, + "KL/mean": -996.8284912109375, + "KL/rejected_KL_mean": -1189.9921875, + "KL/std": 658.72021484375, + "epoch": 0.9588839941262849, + "fcm_dpo/beta": 0.0009377988171763718, + "fcm_dpo/delta": 0.03901583328843117, + "fcm_dpo/margin": 386.3274841308594, + "fcm_dpo/q_t": 0.4204859137535095, + "grad_norm": 35.157344818115234, + "learning_rate": 2.7650355656892166e-09, + "logits/chosen": -1.138892412185669, + "logits/rejected": -1.1634893417358398, + "logps/chosen": -864.7777709960938, + "logps/ref_chosen": -61.11295700073242, + "logps/ref_rejected": -103.24960327148438, + "logps/rejected": -1293.241943359375, + "loss": 1.153, + "margin_dpo/margin_mean": 386.3274841308594, + "margin_dpo/margin_std": 692.9034423828125, + "step": 653 + }, + { + "KL/chosen_KL_mean": -724.307373046875, + "KL/mean": -912.4171142578125, + "KL/rejected_KL_mean": -1100.52685546875, + "KL/std": 534.213134765625, + "epoch": 0.960352422907489, + "fcm_dpo/beta": 0.0009442999726161361, + "fcm_dpo/delta": 0.04640195518732071, + "fcm_dpo/margin": 376.21954345703125, + "fcm_dpo/q_t": 0.4215119779109955, + "grad_norm": 39.73265075683594, + "learning_rate": 2.577954022936174e-09, + "logits/chosen": -1.0920642614364624, + "logits/rejected": -1.1086204051971436, + "logps/chosen": -786.0355224609375, + "logps/ref_chosen": -61.7281379699707, + "logps/ref_rejected": -98.7738037109375, + "logps/rejected": -1199.30078125, + "loss": 1.1429, + "margin_dpo/margin_mean": 376.21954345703125, + "margin_dpo/margin_std": 627.8160400390625, + "step": 654 + }, + { + "KL/chosen_KL_mean": -665.0706787109375, + "KL/mean": -861.1571044921875, + "KL/rejected_KL_mean": -1057.24365234375, + "KL/std": 536.1744384765625, + "epoch": 0.9618208516886931, + "fcm_dpo/beta": 0.0009508723160251975, + "fcm_dpo/delta": 0.028146151453256607, + "fcm_dpo/margin": 392.17303466796875, + "fcm_dpo/q_t": 0.416969895362854, + "grad_norm": 28.9021053314209, + "learning_rate": 2.397392281198729e-09, + "logits/chosen": -1.0546410083770752, + "logits/rejected": -1.0955651998519897, + "logps/chosen": -714.6474609375, + "logps/ref_chosen": -49.576812744140625, + "logps/ref_rejected": -98.29183197021484, + "logps/rejected": -1155.535400390625, + "loss": 1.1326, + "margin_dpo/margin_mean": 392.17303466796875, + "margin_dpo/margin_std": 639.540771484375, + "step": 655 + }, + { + "KL/chosen_KL_mean": -728.656005859375, + "KL/mean": -1060.576904296875, + "KL/rejected_KL_mean": -1392.497802734375, + "KL/std": 723.1806030273438, + "epoch": 0.9632892804698973, + "fcm_dpo/beta": 0.0009240615181624889, + "fcm_dpo/delta": -0.2268456667661667, + "fcm_dpo/margin": 663.841796875, + "fcm_dpo/q_t": 0.3659891188144684, + "grad_norm": 87.20188903808594, + "learning_rate": 2.223355098446622e-09, + "logits/chosen": -0.9737097024917603, + "logits/rejected": -1.0463311672210693, + "logps/chosen": -781.2054443359375, + "logps/ref_chosen": -52.54943084716797, + "logps/ref_rejected": -113.67464447021484, + "logps/rejected": -1506.1724853515625, + "loss": 0.9623, + "margin_dpo/margin_mean": 663.841796875, + "margin_dpo/margin_std": 737.1829833984375, + "step": 656 + }, + { + "KL/chosen_KL_mean": -679.6723022460938, + "KL/mean": -946.5911865234375, + "KL/rejected_KL_mean": -1213.510009765625, + "KL/std": 687.6986083984375, + "epoch": 0.9647577092511013, + "fcm_dpo/beta": 0.0008973278454504907, + "fcm_dpo/delta": -0.08321470022201538, + "fcm_dpo/margin": 533.8377075195312, + "fcm_dpo/q_t": 0.39242735505104065, + "grad_norm": 39.09135437011719, + "learning_rate": 2.055847060721566e-09, + "logits/chosen": -1.1246776580810547, + "logits/rejected": -1.1695971488952637, + "logps/chosen": -726.372802734375, + "logps/ref_chosen": -46.700538635253906, + "logps/ref_rejected": -97.91487121582031, + "logps/rejected": -1311.4248046875, + "loss": 1.0554, + "margin_dpo/margin_mean": 533.8377075195312, + "margin_dpo/margin_std": 719.7000122070312, + "step": 657 + }, + { + "KL/chosen_KL_mean": -739.4154663085938, + "KL/mean": -951.917724609375, + "KL/rejected_KL_mean": -1164.420166015625, + "KL/std": 541.0897216796875, + "epoch": 0.9662261380323054, + "fcm_dpo/beta": 0.0008927997550927103, + "fcm_dpo/delta": 0.020825423300266266, + "fcm_dpo/margin": 425.004638671875, + "fcm_dpo/q_t": 0.4129961133003235, + "grad_norm": 36.563560485839844, + "learning_rate": 1.8948725820160662e-09, + "logits/chosen": -1.074481725692749, + "logits/rejected": -1.1052826642990112, + "logps/chosen": -800.3736572265625, + "logps/ref_chosen": -60.95820999145508, + "logps/ref_rejected": -95.93949127197266, + "logps/rejected": -1260.359619140625, + "loss": 1.1161, + "margin_dpo/margin_mean": 425.004638671875, + "margin_dpo/margin_std": 614.8718872070312, + "step": 658 + }, + { + "KL/chosen_KL_mean": -648.5974731445312, + "KL/mean": -853.8447265625, + "KL/rejected_KL_mean": -1059.092041015625, + "KL/std": 544.5018310546875, + "epoch": 0.9676945668135095, + "fcm_dpo/beta": 0.0009003398008644581, + "fcm_dpo/delta": 0.03149181231856346, + "fcm_dpo/margin": 410.4945068359375, + "fcm_dpo/q_t": 0.41639554500579834, + "grad_norm": 42.586883544921875, + "learning_rate": 1.7404359041573723e-09, + "logits/chosen": -0.9929611086845398, + "logits/rejected": -0.9660124778747559, + "logps/chosen": -725.3404541015625, + "logps/ref_chosen": -76.74298095703125, + "logps/ref_rejected": -87.4709701538086, + "logps/rejected": -1146.56298828125, + "loss": 1.1178, + "margin_dpo/margin_mean": 410.4945068359375, + "margin_dpo/margin_std": 598.6842041015625, + "step": 659 + }, + { + "KL/chosen_KL_mean": -703.4244384765625, + "KL/mean": -971.2889404296875, + "KL/rejected_KL_mean": -1239.1533203125, + "KL/std": 633.0969848632812, + "epoch": 0.9691629955947136, + "fcm_dpo/beta": 0.0008946568705141544, + "fcm_dpo/delta": -0.0832086056470871, + "fcm_dpo/margin": 535.7288818359375, + "fcm_dpo/q_t": 0.3920001983642578, + "grad_norm": 36.593955993652344, + "learning_rate": 1.592541096695571e-09, + "logits/chosen": -1.0897400379180908, + "logits/rejected": -1.1135540008544922, + "logps/chosen": -762.4723510742188, + "logps/ref_chosen": -59.04788589477539, + "logps/ref_rejected": -75.96005249023438, + "logps/rejected": -1315.11328125, + "loss": 1.0481, + "margin_dpo/margin_mean": 535.7288818359375, + "margin_dpo/margin_std": 685.7335205078125, + "step": 660 + }, + { + "KL/chosen_KL_mean": -620.7945556640625, + "KL/mean": -853.5462646484375, + "KL/rejected_KL_mean": -1086.2978515625, + "KL/std": 685.778564453125, + "epoch": 0.9706314243759178, + "fcm_dpo/beta": 0.0008907719748094678, + "fcm_dpo/delta": -0.015506003051996231, + "fcm_dpo/margin": 465.50323486328125, + "fcm_dpo/q_t": 0.4084845185279846, + "grad_norm": 50.08948516845703, + "learning_rate": 1.4511920567963908e-09, + "logits/chosen": -1.1007812023162842, + "logits/rejected": -1.116791009902954, + "logps/chosen": -671.4685668945312, + "logps/ref_chosen": -50.673973083496094, + "logps/ref_rejected": -86.00569152832031, + "logps/rejected": -1172.303466796875, + "loss": 1.089, + "margin_dpo/margin_mean": 465.5032958984375, + "margin_dpo/margin_std": 672.30322265625, + "step": 661 + }, + { + "KL/chosen_KL_mean": -724.219970703125, + "KL/mean": -912.091552734375, + "KL/rejected_KL_mean": -1099.963134765625, + "KL/std": 585.681884765625, + "epoch": 0.9720998531571219, + "fcm_dpo/beta": 0.0008955647936090827, + "fcm_dpo/delta": 0.06570842862129211, + "fcm_dpo/margin": 375.7431640625, + "fcm_dpo/q_t": 0.4247671663761139, + "grad_norm": 29.727699279785156, + "learning_rate": 1.3163925091384532e-09, + "logits/chosen": -1.0112884044647217, + "logits/rejected": -1.012375831604004, + "logps/chosen": -793.4810791015625, + "logps/ref_chosen": -69.26106262207031, + "logps/ref_rejected": -89.05593872070312, + "logps/rejected": -1189.01904296875, + "loss": 1.1725, + "margin_dpo/margin_mean": 375.74310302734375, + "margin_dpo/margin_std": 715.9241333007812, + "step": 662 + }, + { + "KL/chosen_KL_mean": -677.1693115234375, + "KL/mean": -901.0325927734375, + "KL/rejected_KL_mean": -1124.8958740234375, + "KL/std": 655.4639892578125, + "epoch": 0.973568281938326, + "fcm_dpo/beta": 0.0008970214985311031, + "fcm_dpo/delta": -0.0017335359007120132, + "fcm_dpo/margin": 447.7265930175781, + "fcm_dpo/q_t": 0.4119381904602051, + "grad_norm": 25.254993438720703, + "learning_rate": 1.1881460058152382e-09, + "logits/chosen": -1.0759081840515137, + "logits/rejected": -1.1006672382354736, + "logps/chosen": -742.0482177734375, + "logps/ref_chosen": -64.87890625, + "logps/ref_rejected": -113.92536926269531, + "logps/rejected": -1238.8212890625, + "loss": 1.125, + "margin_dpo/margin_mean": 447.7265930175781, + "margin_dpo/margin_std": 758.81591796875, + "step": 663 + }, + { + "KL/chosen_KL_mean": -732.6714477539062, + "KL/mean": -988.3836669921875, + "KL/rejected_KL_mean": -1244.095947265625, + "KL/std": 662.098876953125, + "epoch": 0.9750367107195301, + "fcm_dpo/beta": 0.0008872643811628222, + "fcm_dpo/delta": -0.05684386566281319, + "fcm_dpo/margin": 511.424560546875, + "fcm_dpo/q_t": 0.39906027913093567, + "grad_norm": 27.473468780517578, + "learning_rate": 1.066455926241383e-09, + "logits/chosen": -1.0546207427978516, + "logits/rejected": -1.0900723934173584, + "logps/chosen": -793.5599365234375, + "logps/ref_chosen": -60.88847351074219, + "logps/ref_rejected": -105.521728515625, + "logps/rejected": -1349.61767578125, + "loss": 1.0717, + "margin_dpo/margin_mean": 511.424560546875, + "margin_dpo/margin_std": 705.2711181640625, + "step": 664 + }, + { + "KL/chosen_KL_mean": -635.2431640625, + "KL/mean": -848.298583984375, + "KL/rejected_KL_mean": -1061.35400390625, + "KL/std": 531.826416015625, + "epoch": 0.9765051395007343, + "fcm_dpo/beta": 0.0008908901363611221, + "fcm_dpo/delta": 0.021186400204896927, + "fcm_dpo/margin": 426.11090087890625, + "fcm_dpo/q_t": 0.4121313691139221, + "grad_norm": 48.51000213623047, + "learning_rate": 9.513254770636137e-10, + "logits/chosen": -1.1572396755218506, + "logits/rejected": -1.1837971210479736, + "logps/chosen": -695.8072509765625, + "logps/ref_chosen": -60.56413269042969, + "logps/ref_rejected": -84.80882263183594, + "logps/rejected": -1146.162841796875, + "loss": 1.0953, + "margin_dpo/margin_mean": 426.11090087890625, + "margin_dpo/margin_std": 529.1512451171875, + "step": 665 + }, + { + "KL/chosen_KL_mean": -686.9071044921875, + "KL/mean": -899.4036865234375, + "KL/rejected_KL_mean": -1111.900146484375, + "KL/std": 544.5043334960938, + "epoch": 0.9779735682819384, + "fcm_dpo/beta": 0.0008939065737649798, + "fcm_dpo/delta": 0.020892852917313576, + "fcm_dpo/margin": 424.9931335449219, + "fcm_dpo/q_t": 0.4139803946018219, + "grad_norm": 34.465694427490234, + "learning_rate": 8.427576920763956e-10, + "logits/chosen": -0.9737902283668518, + "logits/rejected": -0.9849244356155396, + "logps/chosen": -751.3270263671875, + "logps/ref_chosen": -64.41996002197266, + "logps/ref_rejected": -95.8916244506836, + "logps/rejected": -1207.791748046875, + "loss": 1.1128, + "margin_dpo/margin_mean": 424.9931640625, + "margin_dpo/margin_std": 604.1651611328125, + "step": 666 + }, + { + "KL/chosen_KL_mean": -779.2694091796875, + "KL/mean": -1032.952392578125, + "KL/rejected_KL_mean": -1286.635498046875, + "KL/std": 609.7968139648438, + "epoch": 0.9794419970631424, + "fcm_dpo/beta": 0.0008860268862918019, + "fcm_dpo/delta": -0.05220697447657585, + "fcm_dpo/margin": 507.36602783203125, + "fcm_dpo/q_t": 0.3982255458831787, + "grad_norm": 44.31229782104492, + "learning_rate": 7.407554321417764e-10, + "logits/chosen": -1.008927822113037, + "logits/rejected": -1.0107920169830322, + "logps/chosen": -848.5464477539062, + "logps/ref_chosen": -69.27702331542969, + "logps/ref_rejected": -87.83549499511719, + "logps/rejected": -1374.470947265625, + "loss": 1.067, + "margin_dpo/margin_mean": 507.36602783203125, + "margin_dpo/margin_std": 670.2469482421875, + "step": 667 + }, + { + "KL/chosen_KL_mean": -829.6917724609375, + "KL/mean": -1003.8605346679688, + "KL/rejected_KL_mean": -1178.029296875, + "KL/std": 655.914794921875, + "epoch": 0.9809104258443465, + "fcm_dpo/beta": 0.0009021821897476912, + "fcm_dpo/delta": 0.08765879273414612, + "fcm_dpo/margin": 348.3375244140625, + "fcm_dpo/q_t": 0.43241050839424133, + "grad_norm": 51.814239501953125, + "learning_rate": 6.453213851142225e-10, + "logits/chosen": -1.082472324371338, + "logits/rejected": -1.0882298946380615, + "logps/chosen": -902.2957763671875, + "logps/ref_chosen": -72.60400390625, + "logps/ref_rejected": -103.73905944824219, + "logps/rejected": -1281.768310546875, + "loss": 1.2118, + "margin_dpo/margin_mean": 348.3375244140625, + "margin_dpo/margin_std": 782.4864501953125, + "step": 668 + }, + { + "KL/chosen_KL_mean": -632.581787109375, + "KL/mean": -886.140625, + "KL/rejected_KL_mean": -1139.6993408203125, + "KL/std": 600.3204345703125, + "epoch": 0.9823788546255506, + "fcm_dpo/beta": 0.0008957190439105034, + "fcm_dpo/delta": -0.05677647516131401, + "fcm_dpo/margin": 507.1175537109375, + "fcm_dpo/q_t": 0.3966999053955078, + "grad_norm": 27.12503433227539, + "learning_rate": 5.564580657695939e-10, + "logits/chosen": -1.0696676969528198, + "logits/rejected": -1.0817254781723022, + "logps/chosen": -678.6981811523438, + "logps/ref_chosen": -46.116416931152344, + "logps/ref_rejected": -77.92434692382812, + "logps/rejected": -1217.623779296875, + "loss": 1.0658, + "margin_dpo/margin_mean": 507.1175537109375, + "margin_dpo/margin_std": 671.839599609375, + "step": 669 + }, + { + "KL/chosen_KL_mean": -601.8228759765625, + "KL/mean": -861.4261474609375, + "KL/rejected_KL_mean": -1121.0294189453125, + "KL/std": 569.886962890625, + "epoch": 0.9838472834067548, + "fcm_dpo/beta": 0.0008871153695508838, + "fcm_dpo/delta": -0.06361524760723114, + "fcm_dpo/margin": 519.2064819335938, + "fcm_dpo/q_t": 0.3957204818725586, + "grad_norm": 23.987220764160156, + "learning_rate": 4.741678157389739e-10, + "logits/chosen": -0.9869112968444824, + "logits/rejected": -1.0059700012207031, + "logps/chosen": -664.168701171875, + "logps/ref_chosen": -62.34575271606445, + "logps/ref_rejected": -96.9405517578125, + "logps/rejected": -1217.969970703125, + "loss": 1.066, + "margin_dpo/margin_mean": 519.2064819335938, + "margin_dpo/margin_std": 689.0188598632812, + "step": 670 + }, + { + "KL/chosen_KL_mean": -770.5162353515625, + "KL/mean": -969.230224609375, + "KL/rejected_KL_mean": -1167.944091796875, + "KL/std": 568.30517578125, + "epoch": 0.9853157121879589, + "fcm_dpo/beta": 0.0008903343696147203, + "fcm_dpo/delta": 0.047325365245342255, + "fcm_dpo/margin": 397.4279479980469, + "fcm_dpo/q_t": 0.41793012619018555, + "grad_norm": 37.049495697021484, + "learning_rate": 3.9845280344705245e-10, + "logits/chosen": -1.0835880041122437, + "logits/rejected": -1.1127347946166992, + "logps/chosen": -818.516357421875, + "logps/ref_chosen": -48.00010681152344, + "logps/ref_rejected": -83.81932067871094, + "logps/rejected": -1251.763427734375, + "loss": 1.1536, + "margin_dpo/margin_mean": 397.427978515625, + "margin_dpo/margin_std": 692.941650390625, + "step": 671 + }, + { + "KL/chosen_KL_mean": -854.4781494140625, + "KL/mean": -1061.944091796875, + "KL/rejected_KL_mean": -1269.41015625, + "KL/std": 686.4140625, + "epoch": 0.986784140969163, + "fcm_dpo/beta": 0.0008913551573641598, + "fcm_dpo/delta": 0.03129229322075844, + "fcm_dpo/margin": 414.9320983886719, + "fcm_dpo/q_t": 0.41862136125564575, + "grad_norm": 60.62648010253906, + "learning_rate": 3.293150240547549e-10, + "logits/chosen": -1.1662323474884033, + "logits/rejected": -1.1740036010742188, + "logps/chosen": -913.0614013671875, + "logps/ref_chosen": -58.58328628540039, + "logps/ref_rejected": -93.14015197753906, + "logps/rejected": -1362.55029296875, + "loss": 1.1557, + "margin_dpo/margin_mean": 414.9320983886719, + "margin_dpo/margin_std": 755.1729125976562, + "step": 672 + }, + { + "KL/chosen_KL_mean": -753.8118896484375, + "KL/mean": -946.7315063476562, + "KL/rejected_KL_mean": -1139.651123046875, + "KL/std": 579.491455078125, + "epoch": 0.9882525697503671, + "fcm_dpo/beta": 0.0009015346877276897, + "fcm_dpo/delta": 0.054041508585214615, + "fcm_dpo/margin": 385.8390808105469, + "fcm_dpo/q_t": 0.42215287685394287, + "grad_norm": 42.15021514892578, + "learning_rate": 2.6675629940689504e-10, + "logits/chosen": -1.1026825904846191, + "logits/rejected": -1.1077499389648438, + "logps/chosen": -800.53515625, + "logps/ref_chosen": -46.72320556640625, + "logps/ref_rejected": -85.29623413085938, + "logps/rejected": -1224.947265625, + "loss": 1.1464, + "margin_dpo/margin_mean": 385.8390808105469, + "margin_dpo/margin_std": 660.7847900390625, + "step": 673 + }, + { + "KL/chosen_KL_mean": -630.5238037109375, + "KL/mean": -885.9769287109375, + "KL/rejected_KL_mean": -1141.429931640625, + "KL/std": 571.075439453125, + "epoch": 0.9897209985315712, + "fcm_dpo/beta": 0.0008984719170257449, + "fcm_dpo/delta": -0.061865366995334625, + "fcm_dpo/margin": 510.90618896484375, + "fcm_dpo/q_t": 0.3981897830963135, + "grad_norm": 36.43729782104492, + "learning_rate": 2.1077827798404725e-10, + "logits/chosen": -0.9947335720062256, + "logits/rejected": -1.015453577041626, + "logps/chosen": -675.9693603515625, + "logps/ref_chosen": -45.445526123046875, + "logps/ref_rejected": -70.04593658447266, + "logps/rejected": -1211.475830078125, + "loss": 1.0617, + "margin_dpo/margin_mean": 510.90618896484375, + "margin_dpo/margin_std": 678.5731811523438, + "step": 674 + }, + { + "KL/chosen_KL_mean": -708.9229736328125, + "KL/mean": -968.3958740234375, + "KL/rejected_KL_mean": -1227.868896484375, + "KL/std": 633.1185302734375, + "epoch": 0.9911894273127754, + "fcm_dpo/beta": 0.0008785349782556295, + "fcm_dpo/delta": -0.060169536620378494, + "fcm_dpo/margin": 518.9458618164062, + "fcm_dpo/q_t": 0.39929330348968506, + "grad_norm": 25.439342498779297, + "learning_rate": 1.6138243485910863e-10, + "logits/chosen": -1.0798540115356445, + "logits/rejected": -1.094804286956787, + "logps/chosen": -753.0992431640625, + "logps/ref_chosen": -44.17628479003906, + "logps/ref_rejected": -74.09197998046875, + "logps/rejected": -1301.9608154296875, + "loss": 1.0676, + "margin_dpo/margin_mean": 518.9458618164062, + "margin_dpo/margin_std": 681.2320556640625, + "step": 675 + }, + { + "KL/chosen_KL_mean": -749.7974853515625, + "KL/mean": -996.75341796875, + "KL/rejected_KL_mean": -1243.709228515625, + "KL/std": 598.0169067382812, + "epoch": 0.9926578560939795, + "fcm_dpo/beta": 0.0008790518622845411, + "fcm_dpo/delta": -0.03571845218539238, + "fcm_dpo/margin": 493.9117736816406, + "fcm_dpo/q_t": 0.4009990990161896, + "grad_norm": 26.6938419342041, + "learning_rate": 1.1857007165852472e-10, + "logits/chosen": -1.0037989616394043, + "logits/rejected": -1.0201971530914307, + "logps/chosen": -821.196044921875, + "logps/ref_chosen": -71.39852905273438, + "logps/ref_rejected": -88.3587646484375, + "logps/rejected": -1332.0679931640625, + "loss": 1.065, + "margin_dpo/margin_mean": 493.9117736816406, + "margin_dpo/margin_std": 605.2867431640625, + "step": 676 + }, + { + "KL/chosen_KL_mean": -764.7623291015625, + "KL/mean": -998.9996337890625, + "KL/rejected_KL_mean": -1233.237060546875, + "KL/std": 625.9779052734375, + "epoch": 0.9941262848751835, + "fcm_dpo/beta": 0.0008738588076084852, + "fcm_dpo/delta": -0.009816518053412437, + "fcm_dpo/margin": 468.474609375, + "fcm_dpo/q_t": 0.4123176634311676, + "grad_norm": 31.511207580566406, + "learning_rate": 8.23423165278725e-11, + "logits/chosen": -1.093052864074707, + "logits/rejected": -1.0892189741134644, + "logps/chosen": -821.289794921875, + "logps/ref_chosen": -56.527435302734375, + "logps/ref_rejected": -78.22654724121094, + "logps/rejected": -1311.4635009765625, + "loss": 1.1107, + "margin_dpo/margin_mean": 468.4746398925781, + "margin_dpo/margin_std": 753.8814697265625, + "step": 677 + }, + { + "KL/chosen_KL_mean": -644.7113037109375, + "KL/mean": -927.87255859375, + "KL/rejected_KL_mean": -1211.03369140625, + "KL/std": 692.720458984375, + "epoch": 0.9955947136563876, + "fcm_dpo/beta": 0.0008661206811666489, + "fcm_dpo/delta": -0.09516976773738861, + "fcm_dpo/margin": 566.3224487304688, + "fcm_dpo/q_t": 0.39175188541412354, + "grad_norm": 28.7852840423584, + "learning_rate": 5.270012410216185e-11, + "logits/chosen": -1.0311825275421143, + "logits/rejected": -1.070950984954834, + "logps/chosen": -690.8457641601562, + "logps/ref_chosen": -46.13447570800781, + "logps/ref_rejected": -80.60462951660156, + "logps/rejected": -1291.638427734375, + "loss": 1.0583, + "margin_dpo/margin_mean": 566.3224487304688, + "margin_dpo/margin_std": 791.4454345703125, + "step": 678 + }, + { + "KL/chosen_KL_mean": -734.8692626953125, + "KL/mean": -927.3165283203125, + "KL/rejected_KL_mean": -1119.763916015625, + "KL/std": 549.86669921875, + "epoch": 0.9970631424375918, + "fcm_dpo/beta": 0.0008657841826789081, + "fcm_dpo/delta": 0.06911883503198624, + "fcm_dpo/margin": 384.8945617675781, + "fcm_dpo/q_t": 0.4249575436115265, + "grad_norm": 39.39456558227539, + "learning_rate": 2.9644275480772416e-11, + "logits/chosen": -1.0478802919387817, + "logits/rejected": -1.0363208055496216, + "logps/chosen": -785.1641845703125, + "logps/ref_chosen": -50.294921875, + "logps/ref_rejected": -76.59813690185547, + "logps/rejected": -1196.3619384765625, + "loss": 1.1507, + "margin_dpo/margin_mean": 384.8945617675781, + "margin_dpo/margin_std": 635.107666015625, + "step": 679 + }, + { + "KL/chosen_KL_mean": -748.73779296875, + "KL/mean": -1005.2867431640625, + "KL/rejected_KL_mean": -1261.835693359375, + "KL/std": 711.034912109375, + "epoch": 0.9985315712187959, + "fcm_dpo/beta": 0.0008600302971899509, + "fcm_dpo/delta": -0.04386995732784271, + "fcm_dpo/margin": 513.0979614257812, + "fcm_dpo/q_t": 0.39925122261047363, + "grad_norm": 38.68387985229492, + "learning_rate": 1.31753782067201e-11, + "logits/chosen": -1.037444829940796, + "logits/rejected": -1.0661684274673462, + "logps/chosen": -825.6535034179688, + "logps/ref_chosen": -76.91569519042969, + "logps/ref_rejected": -112.384765625, + "logps/rejected": -1374.220458984375, + "loss": 1.0929, + "margin_dpo/margin_mean": 513.0979614257812, + "margin_dpo/margin_std": 767.623046875, + "step": 680 + }, + { + "KL/chosen_KL_mean": -737.5836791992188, + "KL/mean": -939.5941772460938, + "KL/rejected_KL_mean": -1141.6046142578125, + "KL/std": 583.9456176757812, + "epoch": 1.0, + "fcm_dpo/beta": 0.0008584200404584408, + "fcm_dpo/delta": -0.04448072612285614, + "fcm_dpo/margin": 404.02093505859375, + "fcm_dpo/q_t": 0.4208639860153198, + "grad_norm": 35.66378402709961, + "learning_rate": 3.2938662507808745e-12, + "logits/chosen": -1.1038322448730469, + "logits/rejected": -1.1210821866989136, + "logps/chosen": -798.5409545898438, + "logps/ref_chosen": -60.957279205322266, + "logps/ref_rejected": -88.55797576904297, + "logps/rejected": -1230.16259765625, + "loss": 1.1462, + "margin_dpo/margin_mean": 404.0209655761719, + "margin_dpo/margin_std": 639.079833984375, + "step": 681 + }, + { + "epoch": 1.0, + "step": 681, + "total_flos": 0.0, + "train_loss": 1.093637848565582, + "train_runtime": 1736.9515, + "train_samples_per_second": 25.1, + "train_steps_per_second": 0.392 + } + ], + "logging_steps": 1, + "max_steps": 681, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}